diff options
Diffstat (limited to 'fs')
151 files changed, 3419 insertions, 2993 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 041c52692284..68bf2af6c389 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -178,7 +178,7 @@ v9fs_file_read(struct file *filp, char __user *udata, size_t count, int ret; struct p9_fid *fid; - P9_DPRINTK(P9_DEBUG_VFS, "count %d offset %lld\n", count, *offset); + P9_DPRINTK(P9_DEBUG_VFS, "count %zu offset %lld\n", count, *offset); fid = filp->private_data; if (count > (fid->clnt->msize - P9_IOHDRSZ)) diff --git a/fs/Kconfig b/fs/Kconfig index e46297f020c1..522469a7eca3 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -22,9 +22,10 @@ source "fs/jbd2/Kconfig" config FS_MBCACHE # Meta block cache for Extended Attributes (ext2/ext3/ext4) tristate - depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR - default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y - default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m + default y if EXT2_FS=y && EXT2_FS_XATTR + default y if EXT3_FS=y && EXT3_FS_XATTR + default y if EXT4_FS=y && EXT4_FS_XATTR + default m if EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR config REISERFS_FS tristate "Reiserfs support" diff --git a/fs/Makefile b/fs/Makefile index 2168c902d5ca..d9f8afe6f0c4 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -81,8 +81,6 @@ obj-$(CONFIG_HUGETLBFS) += hugetlbfs/ obj-$(CONFIG_CODA_FS) += coda/ obj-$(CONFIG_MINIX_FS) += minix/ obj-$(CONFIG_FAT_FS) += fat/ -obj-$(CONFIG_MSDOS_FS) += msdos/ -obj-$(CONFIG_VFAT_FS) += vfat/ obj-$(CONFIG_BFS_FS) += bfs/ obj-$(CONFIG_ISO9660_FS) += isofs/ obj-$(CONFIG_HFSPLUS_FS) += hfsplus/ # Before hfs to find wrapped HFS+ diff --git a/fs/afs/dir.c b/fs/afs/dir.c index dfda03d4397d..99cf390641f7 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -45,6 +45,7 @@ const struct file_operations afs_dir_file_operations = { .release = afs_release, .readdir = afs_readdir, .lock = afs_lock, + .llseek = generic_file_llseek, }; const struct inode_operations afs_dir_inode_operations = { diff --git a/fs/attr.c b/fs/attr.c index 26c71ba1eed4..7a83819f6ba2 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -159,17 +159,17 @@ int notify_change(struct dentry * dentry, struct iattr * attr) if (!(attr->ia_valid & ~(ATTR_KILL_SUID | ATTR_KILL_SGID))) return 0; + error = security_inode_setattr(dentry, attr); + if (error) + return error; + if (ia_valid & ATTR_SIZE) down_write(&dentry->d_inode->i_alloc_sem); if (inode->i_op && inode->i_op->setattr) { - error = security_inode_setattr(dentry, attr); - if (!error) - error = inode->i_op->setattr(dentry, attr); + error = inode->i_op->setattr(dentry, attr); } else { error = inode_change_ok(inode, attr); - if (!error) - error = security_inode_setattr(dentry, attr); if (!error) { if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) || (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 625abf5422e2..33bf8cbfd051 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -128,9 +128,10 @@ static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) */ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) { - int err = -EINVAL; + int err; - if (check_dev_ioctl_version(cmd, param)) { + err = check_dev_ioctl_version(cmd, param); + if (err) { AUTOFS_WARN("invalid device control module version " "supplied for cmd(0x%08x)", cmd); goto out; diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cde2f8e8935a..4b6fb3f628c0 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c @@ -56,12 +56,23 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry) mntget(mnt); dget(dentry); - if (!autofs4_follow_mount(&mnt, &dentry)) + if (!follow_down(&mnt, &dentry)) goto done; - /* This is an autofs submount, we can't expire it */ - if (is_autofs4_dentry(dentry)) - goto done; + if (is_autofs4_dentry(dentry)) { + struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); + + /* This is an autofs submount, we can't expire it */ + if (sbi->type == AUTOFS_TYPE_INDIRECT) + goto done; + + /* + * Otherwise it's an offset mount and we need to check + * if we can umount its mount, if there is one. + */ + if (!d_mountpoint(dentry)) + goto done; + } /* Update the expiry counter if fs is busy */ if (!may_umount_tree(mnt)) { diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index ed8feb052df9..daae463068e4 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -80,6 +80,7 @@ const struct file_operations bfs_dir_operations = { .read = generic_read_dir, .readdir = bfs_readdir, .fsync = file_fsync, + .llseek = generic_file_llseek, }; extern void dump_imap(const char *, struct super_block *); diff --git a/fs/block_dev.c b/fs/block_dev.c index 218408eed1bb..88a776fa0ef6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -840,13 +840,12 @@ EXPORT_SYMBOL_GPL(bd_release_from_disk); * to be used for internal purposes. If you ever need it - reconsider * your API. */ -struct block_device *open_by_devnum(dev_t dev, unsigned mode) +struct block_device *open_by_devnum(dev_t dev, fmode_t mode) { struct block_device *bdev = bdget(dev); int err = -ENOMEM; - int flags = mode & FMODE_WRITE ? O_RDWR : O_RDONLY; if (bdev) - err = blkdev_get(bdev, mode, flags); + err = blkdev_get(bdev, mode); return err ? ERR_PTR(err) : bdev; } @@ -975,9 +974,7 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, - int for_part); -static int __blkdev_put(struct block_device *bdev, int for_part); +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* * bd_mutex locking: @@ -986,7 +983,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part); * mutex_lock_nested(whole->bd_mutex, 1) */ -static int do_open(struct block_device *bdev, struct file *file, int for_part) +static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) { struct gendisk *disk; struct hd_struct *part = NULL; @@ -994,9 +991,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) int partno; int perm = 0; - if (file->f_mode & FMODE_READ) + if (mode & FMODE_READ) perm |= MAY_READ; - if (file->f_mode & FMODE_WRITE) + if (mode & FMODE_WRITE) perm |= MAY_WRITE; /* * hooks: /n/, see "layering violations". @@ -1008,7 +1005,6 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) } ret = -ENXIO; - file->f_mapping = bdev->bd_inode->i_mapping; lock_kernel(); @@ -1027,7 +1023,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!partno) { struct backing_dev_info *bdi; if (disk->fops->open) { - ret = disk->fops->open(bdev->bd_inode, file); + ret = disk->fops->open(bdev, mode); if (ret) goto out_clear; } @@ -1047,7 +1043,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) if (!whole) goto out_clear; BUG_ON(for_part); - ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); + ret = __blkdev_get(whole, mode, 1); if (ret) goto out_clear; bdev->bd_contains = whole; @@ -1068,7 +1064,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) disk = NULL; if (bdev->bd_contains == bdev) { if (bdev->bd_disk->fops->open) { - ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); + ret = bdev->bd_disk->fops->open(bdev, mode); if (ret) goto out_unlock_bdev; } @@ -1088,7 +1084,7 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) bdev->bd_part = NULL; bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; if (bdev != bdev->bd_contains) - __blkdev_put(bdev->bd_contains, 1); + __blkdev_put(bdev->bd_contains, mode, 1); bdev->bd_contains = NULL; out_unlock_bdev: mutex_unlock(&bdev->bd_mutex); @@ -1104,28 +1100,9 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) return ret; } -static int __blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags, - int for_part) +int blkdev_get(struct block_device *bdev, fmode_t mode) { - /* - * This crockload is due to bad choice of ->open() type. - * It will go away. - * For now, block device ->open() routine must _not_ - * examine anything in 'inode' argument except ->i_rdev. - */ - struct file fake_file = {}; - struct dentry fake_dentry = {}; - fake_file.f_mode = mode; - fake_file.f_flags = flags; - fake_file.f_path.dentry = &fake_dentry; - fake_dentry.d_inode = bdev->bd_inode; - - return do_open(bdev, &fake_file, for_part); -} - -int blkdev_get(struct block_device *bdev, mode_t mode, unsigned flags) -{ - return __blkdev_get(bdev, mode, flags, 0); + return __blkdev_get(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_get); @@ -1142,28 +1119,36 @@ static int blkdev_open(struct inode * inode, struct file * filp) */ filp->f_flags |= O_LARGEFILE; + if (filp->f_flags & O_NDELAY) + filp->f_mode |= FMODE_NDELAY; + if (filp->f_flags & O_EXCL) + filp->f_mode |= FMODE_EXCL; + if ((filp->f_flags & O_ACCMODE) == 3) + filp->f_mode |= FMODE_WRITE_IOCTL; + bdev = bd_acquire(inode); if (bdev == NULL) return -ENOMEM; - res = do_open(bdev, filp, 0); + filp->f_mapping = bdev->bd_inode->i_mapping; + + res = blkdev_get(bdev, filp->f_mode); if (res) return res; - if (!(filp->f_flags & O_EXCL) ) + if (!(filp->f_mode & FMODE_EXCL)) return 0; if (!(res = bd_claim(bdev, filp))) return 0; - blkdev_put(bdev); + blkdev_put(bdev, filp->f_mode); return res; } -static int __blkdev_put(struct block_device *bdev, int for_part) +static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) { int ret = 0; - struct inode *bd_inode = bdev->bd_inode; struct gendisk *disk = bdev->bd_disk; struct block_device *victim = NULL; @@ -1178,7 +1163,7 @@ static int __blkdev_put(struct block_device *bdev, int for_part) } if (bdev->bd_contains == bdev) { if (disk->fops->release) - ret = disk->fops->release(bd_inode, NULL); + ret = disk->fops->release(disk, mode); } if (!bdev->bd_openers) { struct module *owner = disk->fops->owner; @@ -1197,13 +1182,13 @@ static int __blkdev_put(struct block_device *bdev, int for_part) mutex_unlock(&bdev->bd_mutex); bdput(bdev); if (victim) - __blkdev_put(victim, 1); + __blkdev_put(victim, mode, 1); return ret; } -int blkdev_put(struct block_device *bdev) +int blkdev_put(struct block_device *bdev, fmode_t mode) { - return __blkdev_put(bdev, 0); + return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); @@ -1212,12 +1197,16 @@ static int blkdev_close(struct inode * inode, struct file * filp) struct block_device *bdev = I_BDEV(filp->f_mapping->host); if (bdev->bd_holder == filp) bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, filp->f_mode); } static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { - return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); + struct block_device *bdev = I_BDEV(file->f_mapping->host); + fmode_t mode = file->f_mode; + if (file->f_flags & O_NDELAY) + mode |= FMODE_NDELAY_NOW; + return blkdev_ioctl(bdev, mode, cmd, arg); } static const struct address_space_operations def_blk_aops = { @@ -1253,7 +1242,7 @@ int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) int res; mm_segment_t old_fs = get_fs(); set_fs(KERNEL_DS); - res = blkdev_ioctl(bdev->bd_inode, NULL, cmd, arg); + res = blkdev_ioctl(bdev, 0, cmd, arg); set_fs(old_fs); return res; } @@ -1268,33 +1257,33 @@ EXPORT_SYMBOL(ioctl_by_bdev); * namespace if possible and return it. Return ERR_PTR(error) * otherwise. */ -struct block_device *lookup_bdev(const char *path) +struct block_device *lookup_bdev(const char *pathname) { struct block_device *bdev; struct inode *inode; - struct nameidata nd; + struct path path; int error; - if (!path || !*path) + if (!pathname || !*pathname) return ERR_PTR(-EINVAL); - error = path_lookup(path, LOOKUP_FOLLOW, &nd); + error = kern_path(pathname, LOOKUP_FOLLOW, &path); if (error) return ERR_PTR(error); - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; error = -ENOTBLK; if (!S_ISBLK(inode->i_mode)) goto fail; error = -EACCES; - if (nd.path.mnt->mnt_flags & MNT_NODEV) + if (path.mnt->mnt_flags & MNT_NODEV) goto fail; error = -ENOMEM; bdev = bd_acquire(inode); if (!bdev) goto fail; out: - path_put(&nd.path); + path_put(&path); return bdev; fail: bdev = ERR_PTR(error); @@ -1303,32 +1292,29 @@ fail: EXPORT_SYMBOL(lookup_bdev); /** - * open_bdev_excl - open a block device by name and set it up for use + * open_bdev_exclusive - open a block device by name and set it up for use * * @path: special file representing the block device - * @flags: %MS_RDONLY for opening read-only + * @mode: FMODE_... combination to pass be used * @holder: owner for exclusion * * Open the blockdevice described by the special file at @path, claim it * for the @holder. */ -struct block_device *open_bdev_excl(const char *path, int flags, void *holder) +struct block_device *open_bdev_exclusive(const char *path, fmode_t mode, void *holder) { struct block_device *bdev; - mode_t mode = FMODE_READ; int error = 0; bdev = lookup_bdev(path); if (IS_ERR(bdev)) return bdev; - if (!(flags & MS_RDONLY)) - mode |= FMODE_WRITE; - error = blkdev_get(bdev, mode, 0); + error = blkdev_get(bdev, mode); if (error) return ERR_PTR(error); error = -EACCES; - if (!(flags & MS_RDONLY) && bdev_read_only(bdev)) + if ((mode & FMODE_WRITE) && bdev_read_only(bdev)) goto blkdev_put; error = bd_claim(bdev, holder); if (error) @@ -1337,26 +1323,27 @@ struct block_device *open_bdev_excl(const char *path, int flags, void *holder) return bdev; blkdev_put: - blkdev_put(bdev); + blkdev_put(bdev, mode); return ERR_PTR(error); } -EXPORT_SYMBOL(open_bdev_excl); +EXPORT_SYMBOL(open_bdev_exclusive); /** - * close_bdev_excl - release a blockdevice openen by open_bdev_excl() + * close_bdev_exclusive - close a blockdevice opened by open_bdev_exclusive() * * @bdev: blockdevice to close + * @mode: mode, must match that used to open. * - * This is the counterpart to open_bdev_excl(). + * This is the counterpart to open_bdev_exclusive(). */ -void close_bdev_excl(struct block_device *bdev) +void close_bdev_exclusive(struct block_device *bdev, fmode_t mode) { bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, mode); } -EXPORT_SYMBOL(close_bdev_excl); +EXPORT_SYMBOL(close_bdev_exclusive); int __invalidate_device(struct block_device *bdev) { diff --git a/fs/char_dev.c b/fs/char_dev.c index 262fa10e213d..700697a72618 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c @@ -386,15 +386,22 @@ static int chrdev_open(struct inode *inode, struct file *filp) cdev_put(new); if (ret) return ret; + + ret = -ENXIO; filp->f_op = fops_get(p->ops); - if (!filp->f_op) { - cdev_put(p); - return -ENXIO; - } - if (filp->f_op->open) + if (!filp->f_op) + goto out_cdev_put; + + if (filp->f_op->open) { ret = filp->f_op->open(inode,filp); - if (ret) - cdev_put(p); + if (ret) + goto out_cdev_put; + } + + return 0; + + out_cdev_put: + cdev_put(p); return ret; } diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index 06e521a945c3..8855331b2fba 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES @@ -1,3 +1,15 @@ +Version 1.55 +------------ +Various fixes to make delete of open files behavior more predictable +(when delete of an open file fails we mark the file as "delete-on-close" +in a way that more servers accept, but only if we can first rename the +file to a temporary name). Add experimental support for more safely +handling fcntl(F_SETLEASE). Convert cifs to using blocking tcp +sends, and also let tcp autotune the socket send and receive buffers. +This reduces the number of EAGAIN errors returned by TCP/IP in +high stress workloads (and the number of retries on socket writes +when sending large SMBWriteX requests). + Version 1.54 ------------ Fix premature write failure on congested networks (we would give up @@ -13,6 +25,7 @@ on dns_upcall (resolving DFS referralls). Fix plain text password authentication (requires setting SecurityFlags to 0x30030 to enable lanman and plain text though). Fix writes to be at correct offset when file is open with O_APPEND and file is on a directio (forcediretio) mount. +Fix bug in rewinding readdir directory searches. Add nodfs mount option. Version 1.53 ------------ diff --git a/fs/cifs/README b/fs/cifs/README index bd2343d4c6a6..a439dc1739b3 100644 --- a/fs/cifs/README +++ b/fs/cifs/README @@ -463,6 +463,9 @@ A partial list of the supported mount options follows: with cifs style mandatory byte range locks (and most cifs servers do not yet support requesting advisory byte range locks). + nodfs Disable DFS (global name space support) even if the + server claims to support it. This can help work around + a problem with parsing of DFS paths with Samba 3.0.24 server. remount remount the share (often used to change from ro to rw mounts or vice versa) cifsacl Report mode bits (e.g. on stat) based on the Windows ACL for @@ -488,6 +491,19 @@ A partial list of the supported mount options follows: Note that this differs from the sign mount option in that it causes encryption of data sent over this mounted share but other shares mounted to the same server are unaffected. + locallease This option is rarely needed. Fcntl F_SETLEASE is + used by some applications such as Samba and NFSv4 server to + check to see whether a file is cacheable. CIFS has no way + to explicitly request a lease, but can check whether a file + is cacheable (oplocked). Unfortunately, even if a file + is not oplocked, it could still be cacheable (ie cifs client + could grant fcntl leases if no other local processes are using + the file) for cases for example such as when the server does not + support oplocks and the user is sure that the only updates to + the file will be from this client. Specifying this mount option + will allow the cifs client to check for leases (only) locally + for files which are not oplocked instead of denying leases + in that case. (EXPERIMENTAL) sec Security mode. Allowed values are: none attempt to connection as a null user (no name) krb5 Use Kerberos version 5 authentication @@ -638,6 +654,9 @@ requires enabling CONFIG_CIFS_EXPERIMENTAL cifsacl support needed to retrieve approximated mode bits based on the contents on the CIFS ACL. + lease support: cifs will check the oplock state before calling into + the vfs to see if we can grant a lease on a file. + DNOTIFY fcntl: needed for support of directory change notification and perhaps later for file leases) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 25ecbd5b0404..ac5915d61dca 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -275,9 +275,12 @@ static int cifs_permission(struct inode *inode, int mask) cifs_sb = CIFS_SB(inode->i_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) - return 0; - else /* file mode might have been restricted at mount time + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { + if ((mask & MAY_EXEC) && !execute_ok(inode)) + return -EACCES; + else + return 0; + } else /* file mode might have been restricted at mount time on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, so allowing client to check permissions is useful */ @@ -309,6 +312,7 @@ cifs_alloc_inode(struct super_block *sb) file data or metadata */ cifs_inode->clientCanCacheRead = false; cifs_inode->clientCanCacheAll = false; + cifs_inode->delete_pending = false; cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */ /* Can not set i_flags here - they get immediately overwritten @@ -617,6 +621,37 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) return generic_file_llseek_unlocked(file, offset, origin); } +#ifdef CONFIG_CIFS_EXPERIMENTAL +static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) +{ + /* note that this is called by vfs setlease with the BKL held + although I doubt that BKL is needed here in cifs */ + struct inode *inode = file->f_path.dentry->d_inode; + + if (!(S_ISREG(inode->i_mode))) + return -EINVAL; + + /* check if file is oplocked */ + if (((arg == F_RDLCK) && + (CIFS_I(inode)->clientCanCacheRead)) || + ((arg == F_WRLCK) && + (CIFS_I(inode)->clientCanCacheAll))) + return generic_setlease(file, arg, lease); + else if (CIFS_SB(inode->i_sb)->tcon->local_lease && + !CIFS_I(inode)->clientCanCacheRead) + /* If the server claims to support oplock on this + file, then we still need to check oplock even + if the local_lease mount option is set, but there + are servers which do not support oplock for which + this mount option may be useful if the user + knows that the file won't be changed on the server + by anyone else */ + return generic_setlease(file, arg, lease); + else + return -EAGAIN; +} +#endif + struct file_system_type cifs_fs_type = { .owner = THIS_MODULE, .name = "cifs", @@ -695,6 +730,7 @@ const struct file_operations cifs_file_ops = { #ifdef CONFIG_CIFS_EXPERIMENTAL .dir_notify = cifs_dir_notify, + .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -715,6 +751,7 @@ const struct file_operations cifs_file_direct_ops = { .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL .dir_notify = cifs_dir_notify, + .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; const struct file_operations cifs_file_nobrl_ops = { @@ -735,6 +772,7 @@ const struct file_operations cifs_file_nobrl_ops = { #ifdef CONFIG_CIFS_EXPERIMENTAL .dir_notify = cifs_dir_notify, + .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -754,6 +792,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { .llseek = cifs_llseek, #ifdef CONFIG_CIFS_EXPERIMENTAL .dir_notify = cifs_dir_notify, + .setlease = cifs_setlease, #endif /* CONFIG_CIFS_EXPERIMENTAL */ }; @@ -765,6 +804,7 @@ const struct file_operations cifs_dir_ops = { .dir_notify = cifs_dir_notify, #endif /* CONFIG_CIFS_EXPERIMENTAL */ .unlocked_ioctl = cifs_ioctl, + .llseek = generic_file_llseek, }; static void @@ -945,6 +985,12 @@ static int cifs_oplock_thread(void *dummyarg) the call */ /* mutex_lock(&inode->i_mutex);*/ if (S_ISREG(inode->i_mode)) { +#ifdef CONFIG_CIFS_EXPERIMENTAL + if (CIFS_I(inode)->clientCanCacheAll == 0) + break_lease(inode, FMODE_READ); + else if (CIFS_I(inode)->clientCanCacheRead == 0) + break_lease(inode, FMODE_WRITE); +#endif rc = filemap_fdatawrite(inode->i_mapping); if (CIFS_I(inode)->clientCanCacheRead == 0) { waitrc = filemap_fdatawait( diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index f7b4a5cd837b..074de0b5064d 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -101,5 +101,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* EXPERIMENTAL */ -#define CIFS_VERSION "1.54" +#define CIFS_VERSION "1.55" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0d22479d99b7..1cb1189f24e0 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -141,6 +141,8 @@ struct TCP_Server_Info { char versionMajor; char versionMinor; bool svlocal:1; /* local server or remote */ + bool noblocksnd; /* use blocking sendmsg */ + bool noautotune; /* do not autotune send buf sizes */ atomic_t socketUseCount; /* number of open cifs sessions on socket */ atomic_t inFlight; /* number of requests on the wire to server */ #ifdef CONFIG_CIFS_STATS2 @@ -285,6 +287,7 @@ struct cifsTconInfo { bool seal:1; /* transport encryption for this mounted share */ bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ + bool local_lease:1; /* check leases (only) on local system not remote */ /* BB add field for back pointer to sb struct(s)? */ }; @@ -353,6 +356,7 @@ struct cifsInodeInfo { bool clientCanCacheRead:1; /* read oplock */ bool clientCanCacheAll:1; /* read and writebehind oplock */ bool oplockPending:1; + bool delete_pending:1; /* DELETE_ON_CLOSE is set */ struct inode vfs_inode; }; diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 0cff7fe986e8..6f21ecb85ce5 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -36,7 +36,7 @@ extern void cifs_buf_release(void *); extern struct smb_hdr *cifs_small_buf_get(void); extern void cifs_small_buf_release(void *); extern int smb_send(struct socket *, struct smb_hdr *, - unsigned int /* length */ , struct sockaddr *); + unsigned int /* length */ , struct sockaddr *, bool); extern unsigned int _GetXid(void); extern void _FreeXid(unsigned int); #define GetXid() (int)_GetXid(); cFYI(1,("CIFS VFS: in %s as Xid: %d with uid: %d",__func__, xid,current->fsuid)); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 6f4ffe15d68d..d5eac48fc415 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -1309,6 +1309,7 @@ OldOpenRetry: cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile)); pfile_info->EndOfFile = pfile_info->AllocationSize; pfile_info->NumberOfLinks = cpu_to_le32(1); + pfile_info->DeletePending = 0; } } @@ -1410,6 +1411,7 @@ openRetry: pfile_info->AllocationSize = pSMBr->AllocationSize; pfile_info->EndOfFile = pSMBr->EndOfFile; pfile_info->NumberOfLinks = cpu_to_le32(1); + pfile_info->DeletePending = 0; } } @@ -1534,7 +1536,7 @@ CIFSSMBWrite(const int xid, struct cifsTconInfo *tcon, __u32 bytes_sent; __u16 byte_count; - /* cFYI(1,("write at %lld %d bytes",offset,count));*/ + /* cFYI(1, ("write at %lld %d bytes", offset, count));*/ if (tcon->ses == NULL) return -ECONNABORTED; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4c13bcdb92a5..e9f9248cb3fe 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -90,6 +90,10 @@ struct smb_vol { bool nocase:1; /* request case insensitive filenames */ bool nobrl:1; /* disable sending byte range locks to srv */ bool seal:1; /* request transport encryption on share */ + bool nodfs:1; /* Do not request DFS, even if available */ + bool local_lease:1; /* check leases only on local system, not remote */ + bool noblocksnd:1; + bool noautotune:1; unsigned int rsize; unsigned int wsize; unsigned int sockopt; @@ -100,9 +104,11 @@ struct smb_vol { static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, char *netb_name, - char *server_netb_name); + char *server_netb_name, + bool noblocksnd, + bool nosndbuf); /* ipv6 never set sndbuf size */ static int ipv6_connect(struct sockaddr_in6 *psin_server, - struct socket **csocket); + struct socket **csocket, bool noblocksnd); /* @@ -124,7 +130,7 @@ cifs_reconnect(struct TCP_Server_Info *server) struct mid_q_entry *mid_entry; spin_lock(&GlobalMid_Lock); - if (kthread_should_stop()) { + if (server->tcpStatus == CifsExiting) { /* the demux thread will exit normally next time through the loop */ spin_unlock(&GlobalMid_Lock); @@ -184,16 +190,18 @@ cifs_reconnect(struct TCP_Server_Info *server) spin_unlock(&GlobalMid_Lock); up(&server->tcpSem); - while ((!kthread_should_stop()) && (server->tcpStatus != CifsGood)) { + while ((server->tcpStatus != CifsExiting) && + (server->tcpStatus != CifsGood)) { try_to_freeze(); if (server->protocolType == IPV6) { rc = ipv6_connect(&server->addr.sockAddr6, - &server->ssocket); + &server->ssocket, server->noautotune); } else { rc = ipv4_connect(&server->addr.sockAddr, &server->ssocket, server->workstation_RFC1001_name, - server->server_RFC1001_name); + server->server_RFC1001_name, + server->noblocksnd, server->noautotune); } if (rc) { cFYI(1, ("reconnect error %d", rc)); @@ -201,7 +209,7 @@ cifs_reconnect(struct TCP_Server_Info *server) } else { atomic_inc(&tcpSesReconnectCount); spin_lock(&GlobalMid_Lock); - if (!kthread_should_stop()) + if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsGood; server->sequence_number = 0; spin_unlock(&GlobalMid_Lock); @@ -356,7 +364,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) GFP_KERNEL); set_freezable(); - while (!kthread_should_stop()) { + while (server->tcpStatus != CifsExiting) { if (try_to_freeze()) continue; if (bigbuf == NULL) { @@ -397,7 +405,7 @@ incomplete_rcv: kernel_recvmsg(csocket, &smb_msg, &iov, 1, pdu_length, 0 /* BB other flags? */); - if (kthread_should_stop()) { + if (server->tcpStatus == CifsExiting) { break; } else if (server->tcpStatus == CifsNeedReconnect) { cFYI(1, ("Reconnect after server stopped responding")); @@ -522,7 +530,7 @@ incomplete_rcv: total_read += length) { length = kernel_recvmsg(csocket, &smb_msg, &iov, 1, pdu_length - total_read, 0); - if (kthread_should_stop() || + if ((server->tcpStatus == CifsExiting) || (length == -EINTR)) { /* then will exit */ reconnect = 2; @@ -651,14 +659,6 @@ multi_t2_fnd: spin_unlock(&GlobalMid_Lock); wake_up_all(&server->response_q); - /* don't exit until kthread_stop is called */ - set_current_state(TASK_UNINTERRUPTIBLE); - while (!kthread_should_stop()) { - schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); - } - set_current_state(TASK_RUNNING); - /* check if we have blocked requests that need to free */ /* Note that cifs_max_pending is normally 50, but can be set at module install time to as little as two */ @@ -755,6 +755,7 @@ multi_t2_fnd: write_unlock(&GlobalSMBSeslock); kfree(server->hostname); + task_to_wake = xchg(&server->tsk, NULL); kfree(server); length = atomic_dec_return(&tcpSesAllocCount); @@ -762,6 +763,16 @@ multi_t2_fnd: mempool_resize(cifs_req_poolp, length + cifs_min_rcv, GFP_KERNEL); + /* if server->tsk was NULL then wait for a signal before exiting */ + if (!task_to_wake) { + set_current_state(TASK_INTERRUPTIBLE); + while (!signal_pending(current)) { + schedule(); + set_current_state(TASK_INTERRUPTIBLE); + } + set_current_state(TASK_RUNNING); + } + return 0; } @@ -1186,6 +1197,10 @@ cifs_parse_mount_options(char *options, const char *devname, /* ignore */ } else if (strnicmp(data, "rw", 2) == 0) { vol->rw = true; + } else if (strnicmp(data, "noblocksend", 11) == 0) { + vol->noblocksnd = 1; + } else if (strnicmp(data, "noautotune", 10) == 0) { + vol->noautotune = 1; } else if ((strnicmp(data, "suid", 4) == 0) || (strnicmp(data, "nosuid", 6) == 0) || (strnicmp(data, "exec", 4) == 0) || @@ -1218,6 +1233,8 @@ cifs_parse_mount_options(char *options, const char *devname, vol->sfu_emul = 1; } else if (strnicmp(data, "nosfu", 5) == 0) { vol->sfu_emul = 0; + } else if (strnicmp(data, "nodfs", 5) == 0) { + vol->nodfs = 1; } else if (strnicmp(data, "posixpaths", 10) == 0) { vol->posix_paths = 1; } else if (strnicmp(data, "noposixpaths", 12) == 0) { @@ -1268,6 +1285,10 @@ cifs_parse_mount_options(char *options, const char *devname, vol->no_psx_acl = 0; } else if (strnicmp(data, "noacl", 5) == 0) { vol->no_psx_acl = 1; +#ifdef CONFIG_CIFS_EXPERIMENTAL + } else if (strnicmp(data, "locallease", 6) == 0) { + vol->local_lease = 1; +#endif } else if (strnicmp(data, "sign", 4) == 0) { vol->secFlg |= CIFSSEC_MUST_SIGN; } else if (strnicmp(data, "seal", 4) == 0) { @@ -1506,7 +1527,8 @@ static void rfc1002mangle(char *target, char *source, unsigned int length) static int ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, - char *netbios_name, char *target_name) + char *netbios_name, char *target_name, + bool noblocksnd, bool noautotune) { int rc = 0; int connected = 0; @@ -1578,11 +1600,16 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, (*csocket)->sk->sk_sndbuf, (*csocket)->sk->sk_rcvbuf, (*csocket)->sk->sk_rcvtimeo)); (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + /* make the bufsizes depend on wsize/rsize and max requests */ - if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) - (*csocket)->sk->sk_sndbuf = 200 * 1024; - if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) - (*csocket)->sk->sk_rcvbuf = 140 * 1024; + if (noautotune) { + if ((*csocket)->sk->sk_sndbuf < (200 * 1024)) + (*csocket)->sk->sk_sndbuf = 200 * 1024; + if ((*csocket)->sk->sk_rcvbuf < (140 * 1024)) + (*csocket)->sk->sk_rcvbuf = 140 * 1024; + } /* send RFC1001 sessinit */ if (psin_server->sin_port == htons(RFC1001_PORT)) { @@ -1619,7 +1646,7 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, /* sizeof RFC1002_SESSION_REQUEST with no scope */ smb_buf->smb_buf_length = 0x81000044; rc = smb_send(*csocket, smb_buf, 0x44, - (struct sockaddr *)psin_server); + (struct sockaddr *)psin_server, noblocksnd); kfree(ses_init_buf); msleep(1); /* RFC1001 layer in at least one server requires very short break before negprot @@ -1639,7 +1666,8 @@ ipv4_connect(struct sockaddr_in *psin_server, struct socket **csocket, } static int -ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) +ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket, + bool noblocksnd) { int rc = 0; int connected = 0; @@ -1708,6 +1736,9 @@ ipv6_connect(struct sockaddr_in6 *psin_server, struct socket **csocket) the default. sock_setsockopt not used because it expects user space buffer */ (*csocket)->sk->sk_rcvtimeo = 7 * HZ; + if (!noblocksnd) + (*csocket)->sk->sk_sndtimeo = 3 * HZ; + return rc; } @@ -1845,6 +1876,16 @@ convert_delimiter(char *path, char delim) } } +static void +kill_cifsd(struct TCP_Server_Info *server) +{ + struct task_struct *task; + + task = xchg(&server->tsk, NULL); + if (task) + force_sig(SIGKILL, task); +} + int cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, char *mount_data, const char *devname) @@ -1961,11 +2002,14 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, cFYI(1, ("attempting ipv6 connect")); /* BB should we allow ipv6 on port 139? */ /* other OS never observed in Wild doing 139 with v6 */ - rc = ipv6_connect(&sin_server6, &csocket); + rc = ipv6_connect(&sin_server6, &csocket, + volume_info.noblocksnd); } else rc = ipv4_connect(&sin_server, &csocket, volume_info.source_rfc1001_name, - volume_info.target_rfc1001_name); + volume_info.target_rfc1001_name, + volume_info.noblocksnd, + volume_info.noautotune); if (rc < 0) { cERROR(1, ("Error connecting to IPv4 socket. " "Aborting operation")); @@ -1980,6 +2024,8 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, sock_release(csocket); goto out; } else { + srvTcp->noblocksnd = volume_info.noblocksnd; + srvTcp->noautotune = volume_info.noautotune; memcpy(&srvTcp->addr.sockAddr, &sin_server, sizeof(struct sockaddr_in)); atomic_set(&srvTcp->inFlight, 0); @@ -2166,6 +2212,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, for the retry flag is used */ tcon->retry = volume_info.retry; tcon->nocase = volume_info.nocase; + tcon->local_lease = volume_info.local_lease; if (tcon->seal != volume_info.seal) cERROR(1, ("transport encryption setting " "conflicts with existing tid")); @@ -2197,6 +2244,12 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, volume_info.UNC, tcon, cifs_sb->local_nls); cFYI(1, ("CIFS Tcon rc = %d", rc)); + if (volume_info.nodfs) { + tcon->Flags &= + ~SMB_SHARE_IS_IN_DFS; + cFYI(1, ("DFS disabled (%d)", + tcon->Flags)); + } } if (!rc) { atomic_inc(&pSesInfo->inUse); @@ -2225,14 +2278,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, spin_lock(&GlobalMid_Lock); srvTcp->tcpStatus = CifsExiting; spin_unlock(&GlobalMid_Lock); - if (srvTcp->tsk) { - /* If we could verify that kthread_stop would - always wake up processes blocked in - tcp in recv_mesg then we could remove the - send_sig call */ - force_sig(SIGKILL, srvTcp->tsk); - kthread_stop(srvTcp->tsk); - } + kill_cifsd(srvTcp); } /* If find_unc succeeded then rc == 0 so we can not end */ if (tcon) /* up accidently freeing someone elses tcon struct */ @@ -2245,19 +2291,15 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, temp_rc = CIFSSMBLogoff(xid, pSesInfo); /* if the socketUseCount is now zero */ if ((temp_rc == -ESHUTDOWN) && - (pSesInfo->server) && - (pSesInfo->server->tsk)) { - force_sig(SIGKILL, - pSesInfo->server->tsk); - kthread_stop(pSesInfo->server->tsk); - } + (pSesInfo->server)) + kill_cifsd(pSesInfo->server); } else { cFYI(1, ("No session or bad tcon")); - if ((pSesInfo->server) && - (pSesInfo->server->tsk)) { - force_sig(SIGKILL, - pSesInfo->server->tsk); - kthread_stop(pSesInfo->server->tsk); + if (pSesInfo->server) { + spin_lock(&GlobalMid_Lock); + srvTcp->tcpStatus = CifsExiting; + spin_unlock(&GlobalMid_Lock); + kill_cifsd(pSesInfo->server); } } sesInfoFree(pSesInfo); @@ -3544,7 +3586,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) int rc = 0; int xid; struct cifsSesInfo *ses = NULL; - struct task_struct *cifsd_task; char *tmp; xid = GetXid(); @@ -3560,7 +3601,6 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) tconInfoFree(cifs_sb->tcon); if ((ses) && (ses->server)) { /* save off task so we do not refer to ses later */ - cifsd_task = ses->server->tsk; cFYI(1, ("About to do SMBLogoff ")); rc = CIFSSMBLogoff(xid, ses); if (rc == -EBUSY) { @@ -3568,10 +3608,8 @@ cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) return 0; } else if (rc == -ESHUTDOWN) { cFYI(1, ("Waking up socket by sending signal")); - if (cifsd_task) { - force_sig(SIGKILL, cifsd_task); - kthread_stop(cifsd_task); - } + if (ses->server) + kill_cifsd(ses->server); rc = 0; } /* else - we have an smb session left on this socket do not kill cifsd */ @@ -3701,7 +3739,9 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, cERROR(1, ("Send error in SessSetup = %d", rc)); } else { cFYI(1, ("CIFS Session Established successfully")); + spin_lock(&GlobalMid_Lock); pSesInfo->status = CifsGood; + spin_unlock(&GlobalMid_Lock); } ss_err_exit: diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 62d8bd8f14c0..ead1a3bb0256 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -1824,7 +1824,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, pTcon = cifs_sb->tcon; pagevec_init(&lru_pvec, 0); - cFYI(DBG2, ("rpages: num pages %d", num_pages)); + cFYI(DBG2, ("rpages: num pages %d", num_pages)); for (i = 0; i < num_pages; ) { unsigned contig_pages; struct page *tmp_page; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a8c833345fc9..ff8c68de4a92 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -506,6 +506,7 @@ int cifs_get_inode_info(struct inode **pinode, inode = *pinode; cifsInfo = CIFS_I(inode); cifsInfo->cifsAttrs = attr; + cifsInfo->delete_pending = pfindData->DeletePending ? true : false; cFYI(1, ("Old time %ld", cifsInfo->time)); cifsInfo->time = jiffies; cFYI(1, ("New time %ld", cifsInfo->time)); @@ -772,63 +773,106 @@ out: * anything else. */ static int -cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) +cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid) { int oplock = 0; int rc; __u16 netfid; + struct inode *inode = dentry->d_inode; struct cifsInodeInfo *cifsInode = CIFS_I(inode); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsTconInfo *tcon = cifs_sb->tcon; - __u32 dosattr; - FILE_BASIC_INFO *info_buf; + __u32 dosattr, origattr; + FILE_BASIC_INFO *info_buf = NULL; rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, - DELETE|FILE_WRITE_ATTRIBUTES, - CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE, + DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, &netfid, &oplock, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); if (rc != 0) goto out; - /* set ATTR_HIDDEN and clear ATTR_READONLY */ - cifsInode = CIFS_I(inode); - dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; + origattr = cifsInode->cifsAttrs; + if (origattr == 0) + origattr |= ATTR_NORMAL; + + dosattr = origattr & ~ATTR_READONLY; if (dosattr == 0) dosattr |= ATTR_NORMAL; dosattr |= ATTR_HIDDEN; - info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); - if (info_buf == NULL) { - rc = -ENOMEM; - goto out_close; + /* set ATTR_HIDDEN and clear ATTR_READONLY, but only if needed */ + if (dosattr != origattr) { + info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); + if (info_buf == NULL) { + rc = -ENOMEM; + goto out_close; + } + info_buf->Attributes = cpu_to_le32(dosattr); + rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, + current->tgid); + /* although we would like to mark the file hidden + if that fails we will still try to rename it */ + if (rc != 0) + cifsInode->cifsAttrs = dosattr; + else + dosattr = origattr; /* since not able to change them */ } - info_buf->Attributes = cpu_to_le32(dosattr); - rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid); - kfree(info_buf); - if (rc != 0) - goto out_close; - cifsInode->cifsAttrs = dosattr; - /* silly-rename the file */ - CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, + /* rename the file */ + rc = CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc != 0) { + rc = -ETXTBSY; + goto undo_setattr; + } - /* set DELETE_ON_CLOSE */ - rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); - - /* - * some samba versions return -ENOENT when we try to set the file - * disposition here. Likely a samba bug, but work around it for now - */ - if (rc == -ENOENT) - rc = 0; + /* try to set DELETE_ON_CLOSE */ + if (!cifsInode->delete_pending) { + rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, + current->tgid); + /* + * some samba versions return -ENOENT when we try to set the + * file disposition here. Likely a samba bug, but work around + * it for now. This means that some cifsXXX files may hang + * around after they shouldn't. + * + * BB: remove this hack after more servers have the fix + */ + if (rc == -ENOENT) + rc = 0; + else if (rc != 0) { + rc = -ETXTBSY; + goto undo_rename; + } + cifsInode->delete_pending = true; + } out_close: CIFSSMBClose(xid, tcon, netfid); out: + kfree(info_buf); return rc; + + /* + * reset everything back to the original state. Don't bother + * dealing with errors here since we can't do anything about + * them anyway. + */ +undo_rename: + CIFSSMBRenameOpenFile(xid, tcon, netfid, dentry->d_name.name, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); +undo_setattr: + if (dosattr != origattr) { + info_buf->Attributes = cpu_to_le32(origattr); + if (!CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, + current->tgid)) + cifsInode->cifsAttrs = origattr; + } + + goto out_close; } int cifs_unlink(struct inode *dir, struct dentry *dentry) @@ -878,7 +922,7 @@ psx_del_no_retry: } else if (rc == -ENOENT) { d_drop(dentry); } else if (rc == -ETXTBSY) { - rc = cifs_rename_pending_delete(full_path, inode, xid); + rc = cifs_rename_pending_delete(full_path, dentry, xid); if (rc == 0) drop_nlink(inode); } else if (rc == -EACCES && dosattr == 0) { @@ -1241,22 +1285,21 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, return rc; } -int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, - struct inode *target_inode, struct dentry *target_direntry) +int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, + struct inode *target_dir, struct dentry *target_dentry) { char *fromName = NULL; char *toName = NULL; struct cifs_sb_info *cifs_sb_source; struct cifs_sb_info *cifs_sb_target; - struct cifsTconInfo *pTcon; + struct cifsTconInfo *tcon; FILE_UNIX_BASIC_INFO *info_buf_source = NULL; FILE_UNIX_BASIC_INFO *info_buf_target; - int xid; - int rc; + int xid, rc, tmprc; - cifs_sb_target = CIFS_SB(target_inode->i_sb); - cifs_sb_source = CIFS_SB(source_inode->i_sb); - pTcon = cifs_sb_source->tcon; + cifs_sb_target = CIFS_SB(target_dir->i_sb); + cifs_sb_source = CIFS_SB(source_dir->i_sb); + tcon = cifs_sb_source->tcon; xid = GetXid(); @@ -1264,7 +1307,7 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, * BB: this might be allowed if same server, but different share. * Consider adding support for this */ - if (pTcon != cifs_sb_target->tcon) { + if (tcon != cifs_sb_target->tcon) { rc = -EXDEV; goto cifs_rename_exit; } @@ -1273,65 +1316,67 @@ int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, * we already have the rename sem so we do not need to * grab it again here to protect the path integrity */ - fromName = build_path_from_dentry(source_direntry); + fromName = build_path_from_dentry(source_dentry); if (fromName == NULL) { rc = -ENOMEM; goto cifs_rename_exit; } - toName = build_path_from_dentry(target_direntry); + toName = build_path_from_dentry(target_dentry); if (toName == NULL) { rc = -ENOMEM; goto cifs_rename_exit; } - rc = cifs_do_rename(xid, source_direntry, fromName, - target_direntry, toName); + rc = cifs_do_rename(xid, source_dentry, fromName, + target_dentry, toName); - if (rc == -EEXIST) { - if (pTcon->unix_ext) { - /* - * Are src and dst hardlinks of same inode? We can - * only tell with unix extensions enabled - */ - info_buf_source = - kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), - GFP_KERNEL); - if (info_buf_source == NULL) - goto unlink_target; - - info_buf_target = info_buf_source + 1; - rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, - info_buf_source, - cifs_sb_source->local_nls, - cifs_sb_source->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc != 0) - goto unlink_target; - - rc = CIFSSMBUnixQPathInfo(xid, pTcon, - toName, info_buf_target, - cifs_sb_target->local_nls, - /* remap based on source sb */ - cifs_sb_source->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + if (rc == -EEXIST && tcon->unix_ext) { + /* + * Are src and dst hardlinks of same inode? We can + * only tell with unix extensions enabled + */ + info_buf_source = + kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), + GFP_KERNEL); + if (info_buf_source == NULL) { + rc = -ENOMEM; + goto cifs_rename_exit; + } + + info_buf_target = info_buf_source + 1; + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, fromName, + info_buf_source, + cifs_sb_source->local_nls, + cifs_sb_source->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + if (tmprc != 0) + goto unlink_target; + + tmprc = CIFSSMBUnixQPathInfo(xid, tcon, + toName, info_buf_target, + cifs_sb_target->local_nls, + /* remap based on source sb */ + cifs_sb_source->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); - if (rc == 0 && (info_buf_source->UniqueId == - info_buf_target->UniqueId)) - /* same file, POSIX says that this is a noop */ - goto cifs_rename_exit; - } /* else ... BB we could add the same check for Windows by + if (tmprc == 0 && (info_buf_source->UniqueId == + info_buf_target->UniqueId)) { + /* same file, POSIX says that this is a noop */ + rc = 0; + goto cifs_rename_exit; + } + } /* else ... BB we could add the same check for Windows by checking the UniqueId via FILE_INTERNAL_INFO */ + unlink_target: - /* - * we either can not tell the files are hardlinked (as with - * Windows servers) or files are not hardlinked. Delete the - * target manually before renaming to follow POSIX rather than - * Windows semantics - */ - cifs_unlink(target_inode, target_direntry); - rc = cifs_do_rename(xid, source_direntry, fromName, - target_direntry, toName); + if ((rc == -EACCES) || (rc == -EEXIST)) { + tmprc = cifs_unlink(target_dir, target_dentry); + if (tmprc) + goto cifs_rename_exit; + + rc = cifs_do_rename(xid, source_dentry, fromName, + target_dentry, toName); } cifs_rename_exit: diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 765adf12d54f..58d57299f2a0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -762,14 +762,15 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, rc)); return rc; } + cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); } while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && (rc == 0) && !cifsFile->srch_inf.endOfSearch) { cFYI(1, ("calling findnext2")); - cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, &cifsFile->srch_inf); + cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); if (rc) return -ENOENT; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index bf0e6d8e382a..ff8243a8fe3e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -161,7 +161,7 @@ void DeleteTconOplockQEntries(struct cifsTconInfo *tcon) int smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, - unsigned int smb_buf_length, struct sockaddr *sin) + unsigned int smb_buf_length, struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -178,7 +178,10 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -229,8 +232,8 @@ smb_send(struct socket *ssocket, struct smb_hdr *smb_buffer, } static int -smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, - struct sockaddr *sin) +smb_send2(struct TCP_Server_Info *server, struct kvec *iov, int n_vec, + struct sockaddr *sin, bool noblocksnd) { int rc = 0; int i = 0; @@ -240,6 +243,7 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, unsigned int total_len; int first_vec = 0; unsigned int smb_buf_length = smb_buffer->smb_buf_length; + struct socket *ssocket = server->ssocket; if (ssocket == NULL) return -ENOTSOCK; /* BB eventually add reconnect code here */ @@ -248,7 +252,10 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, smb_msg.msg_namelen = sizeof(struct sockaddr); smb_msg.msg_control = NULL; smb_msg.msg_controllen = 0; - smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; /* BB add more flags?*/ + if (noblocksnd) + smb_msg.msg_flags = MSG_DONTWAIT + MSG_NOSIGNAL; + else + smb_msg.msg_flags = MSG_NOSIGNAL; /* smb header is converted in header_assemble. bcc and rest of SMB word area, and byte area if necessary, is converted to littleendian in @@ -283,8 +290,11 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, if (rc < 0) break; - if (rc >= total_len) { - WARN_ON(rc > total_len); + if (rc == total_len) { + total_len = 0; + break; + } else if (rc > total_len) { + cERROR(1, ("sent %d requested %d", rc, total_len)); break; } if (rc == 0) { @@ -312,6 +322,16 @@ smb_send2(struct socket *ssocket, struct kvec *iov, int n_vec, i = 0; /* in case we get ENOSPC on the next send */ } + if ((total_len > 0) && (total_len != smb_buf_length + 4)) { + cFYI(1, ("partial send (%d remaining), terminating session", + total_len)); + /* If we have only sent part of an SMB then the next SMB + could be taken as the remainder of this one. We need + to kill the socket so the server throws away the partial + SMB */ + server->tcpStatus = CifsNeedReconnect; + } + if (rc < 0) { cERROR(1, ("Error %d sending data on socket to server", rc)); } else @@ -518,8 +538,9 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, #ifdef CONFIG_CIFS_STATS2 atomic_inc(&ses->server->inSend); #endif - rc = smb_send2(ses->server->ssocket, iov, n_vec, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + rc = smb_send2(ses->server, iov, n_vec, + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -711,7 +732,8 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; @@ -851,7 +873,8 @@ send_nt_cancel(struct cifsTconInfo *tcon, struct smb_hdr *in_buf, return rc; } rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); up(&ses->server->tcpSem); return rc; } @@ -941,7 +964,8 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, atomic_inc(&ses->server->inSend); #endif rc = smb_send(ses->server->ssocket, in_buf, in_buf->smb_buf_length, - (struct sockaddr *) &(ses->server->addr.sockAddr)); + (struct sockaddr *) &(ses->server->addr.sockAddr), + ses->server->noblocksnd); #ifdef CONFIG_CIFS_STATS2 atomic_dec(&ses->server->inSend); midQ->when_sent = jiffies; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index c5916228243c..75b1fa90b2cb 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -146,6 +146,9 @@ int coda_permission(struct inode *inode, int mask) if (!mask) return 0; + if ((mask & MAY_EXEC) && !execute_ok(inode)) + return -EACCES; + lock_kernel(); if (coda_cache_check(inode, mask)) diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index c51365422aa8..773f2ce9aa06 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -43,7 +43,7 @@ const struct file_operations coda_ioctl_operations = { /* the coda pioctl inode ops */ static int coda_ioctl_permission(struct inode *inode, int mask) { - return 0; + return (mask & MAY_EXEC) ? -EACCES : 0; } static int coda_pioctl(struct inode * inode, struct file * filp, diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index cfd29da714d1..0376ac66c44a 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -2,7 +2,7 @@ * An implementation of a loadable kernel mode driver providing * multiple kernel/user space bidirectional communications links. * - * Author: Alan Cox <alan@redhat.com> + * Author: Alan Cox <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/fs/compat.c b/fs/compat.c index 5f9ec449c799..e5f49f538502 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -869,7 +869,7 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, buf.dirent = dirent; error = vfs_readdir(file, compat_fillonedir, &buf); - if (error >= 0) + if (buf.result) error = buf.result; fput(file); @@ -956,9 +956,8 @@ asmlinkage long compat_sys_getdents(unsigned int fd, buf.error = 0; error = vfs_readdir(file, compat_filldir, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { if (put_user(file->f_pos, &lastdirent->d_off)) @@ -966,8 +965,6 @@ asmlinkage long compat_sys_getdents(unsigned int fd, else error = count - buf.count; } - -out_putf: fput(file); out: return error; @@ -1047,19 +1044,16 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, buf.error = 0; error = vfs_readdir(file, compat_filldir64, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { typeof(lastdirent->d_off) d_off = file->f_pos; - error = -EFAULT; if (__put_user_unaligned(d_off, &lastdirent->d_off)) - goto out_putf; - error = count - buf.count; + error = -EFAULT; + else + error = count - buf.count; } - -out_putf: fput(file); out: return error; @@ -1475,6 +1469,57 @@ out_ret: #define __COMPAT_NFDBITS (8 * sizeof(compat_ulong_t)) +static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, + int timeval, int ret) +{ + struct timespec ts; + + if (!p) + return ret; + + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + + /* No update for zero timeout */ + if (!end_time->tv_sec && !end_time->tv_nsec) + return ret; + + ktime_get_ts(&ts); + ts = timespec_sub(*end_time, ts); + if (ts.tv_sec < 0) + ts.tv_sec = ts.tv_nsec = 0; + + if (timeval) { + struct compat_timeval rtv; + + rtv.tv_sec = ts.tv_sec; + rtv.tv_usec = ts.tv_nsec / NSEC_PER_USEC; + + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + } else { + struct compat_timespec rts; + + rts.tv_sec = ts.tv_sec; + rts.tv_nsec = ts.tv_nsec; + + if (!copy_to_user(p, &rts, sizeof(rts))) + return ret; + } + /* + * If an application puts its timeval in read-only memory, we + * don't want the Linux-specific update to the timeval to + * cause a fault after the select has completed + * successfully. However, because we're not updating the + * timeval, we can't restart the system call. + */ + +sticky: + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + return ret; +} + /* * Ooo, nasty. We need here to frob 32-bit unsigned longs to * 64-bit unsigned longs. @@ -1556,7 +1601,8 @@ int compat_set_fd_set(unsigned long nr, compat_ulong_t __user *ufdset, ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) int compat_core_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, s64 *timeout) + compat_ulong_t __user *outp, compat_ulong_t __user *exp, + struct timespec *end_time) { fd_set_bits fds; void *bits; @@ -1603,7 +1649,7 @@ int compat_core_sys_select(int n, compat_ulong_t __user *inp, zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); - ret = do_select(n, &fds, timeout); + ret = do_select(n, &fds, end_time); if (ret < 0) goto out; @@ -1629,7 +1675,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, compat_ulong_t __user *outp, compat_ulong_t __user *exp, struct compat_timeval __user *tvp) { - s64 timeout = -1; + struct timespec end_time, *to = NULL; struct compat_timeval tv; int ret; @@ -1637,43 +1683,15 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; - if (tv.tv_sec < 0 || tv.tv_usec < 0) + to = &end_time; + if (poll_select_set_timeout(to, + tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), + (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(tv.tv_usec, 1000000/HZ); - timeout += tv.tv_sec * HZ; - } } - ret = compat_core_sys_select(n, inp, outp, exp, &timeout); - - if (tvp) { - struct compat_timeval rtv; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - rtv.tv_sec = timeout; - if (compat_timeval_compare(&rtv, &tv) >= 0) - rtv = tv; - if (copy_to_user(tvp, &rtv, sizeof(rtv))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); return ret; } @@ -1686,15 +1704,16 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, { compat_sigset_t ss32; sigset_t ksigmask, sigsaved; - s64 timeout = MAX_SCHEDULE_TIMEOUT; struct compat_timespec ts; + struct timespec end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - if (ts.tv_sec < 0 || ts.tv_nsec < 0) + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; } @@ -1709,51 +1728,8 @@ asmlinkage long compat_sys_pselect7(int n, compat_ulong_t __user *inp, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - do { - if (tsp) { - if ((unsigned long)ts.tv_sec < MAX_SELECT_SECONDS) { - timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); - timeout += ts.tv_sec * (unsigned long)HZ; - ts.tv_sec = 0; - ts.tv_nsec = 0; - } else { - ts.tv_sec -= MAX_SELECT_SECONDS; - timeout = MAX_SELECT_SECONDS * HZ; - } - } - - ret = compat_core_sys_select(n, inp, outp, exp, &timeout); - - } while (!ret && !timeout && tsp && (ts.tv_sec || ts.tv_nsec)); - - if (tsp) { - struct compat_timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - - rts.tv_sec = timeout / HZ; - rts.tv_nsec = (timeout % HZ) * (NSEC_PER_SEC/HZ); - if (rts.tv_nsec >= NSEC_PER_SEC) { - rts.tv_sec++; - rts.tv_nsec -= NSEC_PER_SEC; - } - if (compat_timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = compat_core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); if (ret == -ERESTARTNOHAND) { /* @@ -1798,18 +1774,16 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, compat_sigset_t ss32; sigset_t ksigmask, sigsaved; struct compat_timespec ts; - s64 timeout = -1; + struct timespec end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - /* We assume that ts.tv_sec is always lower than - the number of seconds that can be expressed in - an s64. Otherwise the compiler bitches at us */ - timeout = DIV_ROUND_UP(ts.tv_nsec, 1000000000/HZ); - timeout += ts.tv_sec * HZ; + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; } if (sigmask) { @@ -1823,7 +1797,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = do_sys_poll(ufds, nfds, &timeout); + ret = do_sys_poll(ufds, nfds, to); /* We can restart this syscall, usually */ if (ret == -EINTR) { @@ -1841,31 +1815,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); - if (tsp && timeout >= 0) { - struct compat_timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - /* Yes, we know it's actually an s64, but it's also positive. */ - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (compat_timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND && timeout >= 0) - ret = -EINTR; - } - } + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); return ret; } diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index bf74973b0492..932a92b31483 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -108,18 +108,18 @@ out: } -static int get_target(const char *symname, struct nameidata *nd, +static int get_target(const char *symname, struct path *path, struct config_item **target) { int ret; - ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd); + ret = kern_path(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, path); if (!ret) { - if (nd->path.dentry->d_sb == configfs_sb) { - *target = configfs_get_config_item(nd->path.dentry); + if (path->dentry->d_sb == configfs_sb) { + *target = configfs_get_config_item(path->dentry); if (!*target) { ret = -ENOENT; - path_put(&nd->path); + path_put(path); } } else ret = -EPERM; @@ -132,7 +132,7 @@ static int get_target(const char *symname, struct nameidata *nd, int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) { int ret; - struct nameidata nd; + struct path path; struct configfs_dirent *sd; struct config_item *parent_item; struct config_item *target_item; @@ -159,7 +159,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna !type->ct_item_ops->allow_link) goto out_put; - ret = get_target(symname, &nd, &target_item); + ret = get_target(symname, &path, &target_item); if (ret) goto out_put; @@ -174,7 +174,7 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna } config_item_put(target_item); - path_put(&nd.path); + path_put(&path); out_put: config_item_put(parent_item); diff --git a/fs/dcache.c b/fs/dcache.c index e7a1a99b7464..a1d86c7f3e66 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -69,6 +69,7 @@ struct dentry_stat_t dentry_stat = { static void __d_free(struct dentry *dentry) { + WARN_ON(!list_empty(&dentry->d_alias)); if (dname_external(dentry)) kfree(dentry->d_name.name); kmem_cache_free(dentry_cache, dentry); @@ -174,9 +175,12 @@ static struct dentry *d_kill(struct dentry *dentry) dentry_stat.nr_dentry--; /* For d_free, below */ /*drops the locks, at that point nobody can reach this dentry */ dentry_iput(dentry); - parent = dentry->d_parent; + if (IS_ROOT(dentry)) + parent = NULL; + else + parent = dentry->d_parent; d_free(dentry); - return dentry == parent ? NULL : parent; + return parent; } /* @@ -666,11 +670,12 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) BUG(); } - parent = dentry->d_parent; - if (parent == dentry) + if (IS_ROOT(dentry)) parent = NULL; - else + else { + parent = dentry->d_parent; atomic_dec(&parent->d_count); + } list_del(&dentry->d_u.d_child); detached++; @@ -977,6 +982,15 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name) return d_alloc(parent, &q); } +/* the caller must hold dcache_lock */ +static void __d_instantiate(struct dentry *dentry, struct inode *inode) +{ + if (inode) + list_add(&dentry->d_alias, &inode->i_dentry); + dentry->d_inode = inode; + fsnotify_d_instantiate(dentry, inode); +} + /** * d_instantiate - fill in inode information for a dentry * @entry: dentry to complete @@ -996,10 +1010,7 @@ void d_instantiate(struct dentry *entry, struct inode * inode) { BUG_ON(!list_empty(&entry->d_alias)); spin_lock(&dcache_lock); - if (inode) - list_add(&entry->d_alias, &inode->i_dentry); - entry->d_inode = inode; - fsnotify_d_instantiate(entry, inode); + __d_instantiate(entry, inode); spin_unlock(&dcache_lock); security_d_instantiate(entry, inode); } @@ -1029,7 +1040,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, unsigned int hash = entry->d_name.hash; if (!inode) { - entry->d_inode = NULL; + __d_instantiate(entry, NULL); return NULL; } @@ -1048,9 +1059,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry, return alias; } - list_add(&entry->d_alias, &inode->i_dentry); - entry->d_inode = inode; - fsnotify_d_instantiate(entry, inode); + __d_instantiate(entry, inode); return NULL; } @@ -1111,69 +1120,71 @@ static inline struct hlist_head *d_hash(struct dentry *parent, } /** - * d_alloc_anon - allocate an anonymous dentry + * d_obtain_alias - find or allocate a dentry for a given inode * @inode: inode to allocate the dentry for * - * This is similar to d_alloc_root. It is used by filesystems when - * creating a dentry for a given inode, often in the process of - * mapping a filehandle to a dentry. The returned dentry may be - * anonymous, or may have a full name (if the inode was already - * in the cache). The file system may need to make further - * efforts to connect this dentry into the dcache properly. + * Obtain a dentry for an inode resulting from NFS filehandle conversion or + * similar open by handle operations. The returned dentry may be anonymous, + * or may have a full name (if the inode was already in the cache). * - * When called on a directory inode, we must ensure that - * the inode only ever has one dentry. If a dentry is - * found, that is returned instead of allocating a new one. + * When called on a directory inode, we must ensure that the inode only ever + * has one dentry. If a dentry is found, that is returned instead of + * allocating a new one. * * On successful return, the reference to the inode has been transferred - * to the dentry. If %NULL is returned (indicating kmalloc failure), - * the reference on the inode has not been released. + * to the dentry. In case of an error the reference on the inode is released. + * To make it easier to use in export operations a %NULL or IS_ERR inode may + * be passed in and will be the error will be propagate to the return value, + * with a %NULL @inode replaced by ERR_PTR(-ESTALE). */ - -struct dentry * d_alloc_anon(struct inode *inode) +struct dentry *d_obtain_alias(struct inode *inode) { static const struct qstr anonstring = { .name = "" }; struct dentry *tmp; struct dentry *res; - if ((res = d_find_alias(inode))) { - iput(inode); - return res; - } + if (!inode) + return ERR_PTR(-ESTALE); + if (IS_ERR(inode)) + return ERR_CAST(inode); - tmp = d_alloc(NULL, &anonstring); - if (!tmp) - return NULL; + res = d_find_alias(inode); + if (res) + goto out_iput; + tmp = d_alloc(NULL, &anonstring); + if (!tmp) { + res = ERR_PTR(-ENOMEM); + goto out_iput; + } tmp->d_parent = tmp; /* make sure dput doesn't croak */ - + spin_lock(&dcache_lock); res = __d_find_alias(inode, 0); - if (!res) { - /* attach a disconnected dentry */ - res = tmp; - tmp = NULL; - spin_lock(&res->d_lock); - res->d_sb = inode->i_sb; - res->d_parent = res; - res->d_inode = inode; - res->d_flags |= DCACHE_DISCONNECTED; - res->d_flags &= ~DCACHE_UNHASHED; - list_add(&res->d_alias, &inode->i_dentry); - hlist_add_head(&res->d_hash, &inode->i_sb->s_anon); - spin_unlock(&res->d_lock); - - inode = NULL; /* don't drop reference */ + if (res) { + spin_unlock(&dcache_lock); + dput(tmp); + goto out_iput; } + + /* attach a disconnected dentry */ + spin_lock(&tmp->d_lock); + tmp->d_sb = inode->i_sb; + tmp->d_inode = inode; + tmp->d_flags |= DCACHE_DISCONNECTED; + tmp->d_flags &= ~DCACHE_UNHASHED; + list_add(&tmp->d_alias, &inode->i_dentry); + hlist_add_head(&tmp->d_hash, &inode->i_sb->s_anon); + spin_unlock(&tmp->d_lock); + spin_unlock(&dcache_lock); + return tmp; - if (inode) - iput(inode); - if (tmp) - dput(tmp); + out_iput: + iput(inode); return res; } - +EXPORT_SYMBOL_GPL(d_obtain_alias); /** * d_splice_alias - splice a disconnected dentry into the tree if one exists @@ -1200,17 +1211,14 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) new = __d_find_alias(inode, 1); if (new) { BUG_ON(!(new->d_flags & DCACHE_DISCONNECTED)); - fsnotify_d_instantiate(new, inode); spin_unlock(&dcache_lock); security_d_instantiate(new, inode); d_rehash(dentry); d_move(new, dentry); iput(inode); } else { - /* d_instantiate takes dcache_lock, so we do it by hand */ - list_add(&dentry->d_alias, &inode->i_dentry); - dentry->d_inode = inode; - fsnotify_d_instantiate(dentry, inode); + /* already taking dcache_lock, so d_add() by hand */ + __d_instantiate(dentry, inode); spin_unlock(&dcache_lock); security_d_instantiate(dentry, inode); d_rehash(dentry); @@ -1293,8 +1301,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, * d_instantiate() by hand because it takes dcache_lock which * we already hold. */ - list_add(&found->d_alias, &inode->i_dentry); - found->d_inode = inode; + __d_instantiate(found, inode); spin_unlock(&dcache_lock); security_d_instantiate(found, inode); return found; @@ -1456,8 +1463,6 @@ out: * d_validate - verify dentry provided from insecure source * @dentry: The dentry alleged to be valid child of @dparent * @dparent: The parent dentry (known to be valid) - * @hash: Hash of the dentry - * @len: Length of the name * * An insecure source has sent us a dentry, here we verify it and dget() it. * This is used by ncpfs in its readdir implementation. @@ -1714,18 +1719,23 @@ void d_move(struct dentry * dentry, struct dentry * target) spin_unlock(&dcache_lock); } -/* - * Helper that returns 1 if p1 is a parent of p2, else 0 +/** + * d_ancestor - search for an ancestor + * @p1: ancestor dentry + * @p2: child dentry + * + * Returns the ancestor dentry of p2 which is a child of p1, if p1 is + * an ancestor of p2, else NULL. */ -static int d_isparent(struct dentry *p1, struct dentry *p2) +struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2) { struct dentry *p; - for (p = p2; p->d_parent != p; p = p->d_parent) { + for (p = p2; !IS_ROOT(p); p = p->d_parent) { if (p->d_parent == p1) - return 1; + return p; } - return 0; + return NULL; } /* @@ -1749,7 +1759,7 @@ static struct dentry *__d_unalias(struct dentry *dentry, struct dentry *alias) /* Check for loops */ ret = ERR_PTR(-ELOOP); - if (d_isparent(alias, dentry)) + if (d_ancestor(alias, dentry)) goto out_err; /* See lock_rename() */ @@ -1822,7 +1832,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode) if (!inode) { actual = dentry; - dentry->d_inode = NULL; + __d_instantiate(dentry, NULL); goto found_lock; } @@ -2149,32 +2159,27 @@ out: * Caller must ensure that "new_dentry" is pinned before calling is_subdir() */ -int is_subdir(struct dentry * new_dentry, struct dentry * old_dentry) +int is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) { int result; - struct dentry * saved = new_dentry; unsigned long seq; - /* need rcu_readlock to protect against the d_parent trashing due to - * d_move + /* FIXME: This is old behavior, needed? Please check callers. */ + if (new_dentry == old_dentry) + return 1; + + /* + * Need rcu_readlock to protect against the d_parent trashing + * due to d_move */ rcu_read_lock(); - do { + do { /* for restarting inner loop in case of seq retry */ - new_dentry = saved; - result = 0; seq = read_seqbegin(&rename_lock); - for (;;) { - if (new_dentry != old_dentry) { - struct dentry * parent = new_dentry->d_parent; - if (parent == new_dentry) - break; - new_dentry = parent; - continue; - } + if (d_ancestor(old_dentry, new_dentry)) result = 1; - break; - } + else + result = 0; } while (read_seqretry(&rename_lock, seq)); rcu_read_unlock(); @@ -2344,7 +2349,6 @@ void __init vfs_caches_init(unsigned long mempages) } EXPORT_SYMBOL(d_alloc); -EXPORT_SYMBOL(d_alloc_anon); EXPORT_SYMBOL(d_alloc_root); EXPORT_SYMBOL(d_delete); EXPORT_SYMBOL(d_find_alias); diff --git a/fs/dquot.c b/fs/dquot.c index da30a27f2242..5e95261005b2 100644 --- a/fs/dquot.c +++ b/fs/dquot.c @@ -1805,19 +1805,19 @@ int vfs_quota_on_path(struct super_block *sb, int type, int format_id, } /* Actual function called from quotactl() */ -int vfs_quota_on(struct super_block *sb, int type, int format_id, char *path, +int vfs_quota_on(struct super_block *sb, int type, int format_id, char *name, int remount) { - struct nameidata nd; + struct path path; int error; if (remount) return vfs_quota_on_remount(sb, type); - error = path_lookup(path, LOOKUP_FOLLOW, &nd); + error = kern_path(name, LOOKUP_FOLLOW, &path); if (!error) { - error = vfs_quota_on_path(sb, type, format_id, &nd.path); - path_put(&nd.path); + error = vfs_quota_on_path(sb, type, format_id, &path); + path_put(&path); } return error; } diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c index 06db79d05c12..6046239465a1 100644 --- a/fs/ecryptfs/crypto.c +++ b/fs/ecryptfs/crypto.c @@ -1251,6 +1251,7 @@ struct kmem_cache *ecryptfs_header_cache_2; /** * ecryptfs_write_headers_virt * @page_virt: The virtual address to write the headers to + * @max: The size of memory allocated at page_virt * @size: Set to the number of bytes written by this function * @crypt_stat: The cryptographic context * @ecryptfs_dentry: The eCryptfs dentry @@ -1278,7 +1279,8 @@ struct kmem_cache *ecryptfs_header_cache_2; * * Returns zero on success */ -static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, +static int ecryptfs_write_headers_virt(char *page_virt, size_t max, + size_t *size, struct ecryptfs_crypt_stat *crypt_stat, struct dentry *ecryptfs_dentry) { @@ -1296,7 +1298,7 @@ static int ecryptfs_write_headers_virt(char *page_virt, size_t *size, offset += written; rc = ecryptfs_generate_key_packet_set((page_virt + offset), crypt_stat, ecryptfs_dentry, &written, - PAGE_CACHE_SIZE - offset); + max - offset); if (rc) ecryptfs_printk(KERN_WARNING, "Error generating key packet " "set; rc = [%d]\n", rc); @@ -1368,14 +1370,14 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) goto out; } /* Released in this function */ - virt = kzalloc(crypt_stat->num_header_bytes_at_front, GFP_KERNEL); + virt = (char *)get_zeroed_page(GFP_KERNEL); if (!virt) { printk(KERN_ERR "%s: Out of memory\n", __func__); rc = -ENOMEM; goto out; } - rc = ecryptfs_write_headers_virt(virt, &size, crypt_stat, - ecryptfs_dentry); + rc = ecryptfs_write_headers_virt(virt, PAGE_CACHE_SIZE, &size, + crypt_stat, ecryptfs_dentry); if (unlikely(rc)) { printk(KERN_ERR "%s: Error whilst writing headers; rc = [%d]\n", __func__, rc); @@ -1393,8 +1395,7 @@ int ecryptfs_write_metadata(struct dentry *ecryptfs_dentry) goto out_free; } out_free: - memset(virt, 0, crypt_stat->num_header_bytes_at_front); - kfree(virt); + free_page((unsigned long)virt); out: return rc; } diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 046e027a4cb1..64d2ba980df4 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c @@ -471,31 +471,26 @@ out: */ static int ecryptfs_read_super(struct super_block *sb, const char *dev_name) { + struct path path; int rc; - struct nameidata nd; - struct dentry *lower_root; - struct vfsmount *lower_mnt; - memset(&nd, 0, sizeof(struct nameidata)); - rc = path_lookup(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &nd); + rc = kern_path(dev_name, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path); if (rc) { ecryptfs_printk(KERN_WARNING, "path_lookup() failed\n"); goto out; } - lower_root = nd.path.dentry; - lower_mnt = nd.path.mnt; - ecryptfs_set_superblock_lower(sb, lower_root->d_sb); - sb->s_maxbytes = lower_root->d_sb->s_maxbytes; - sb->s_blocksize = lower_root->d_sb->s_blocksize; - ecryptfs_set_dentry_lower(sb->s_root, lower_root); - ecryptfs_set_dentry_lower_mnt(sb->s_root, lower_mnt); - rc = ecryptfs_interpose(lower_root, sb->s_root, sb, 0); + ecryptfs_set_superblock_lower(sb, path.dentry->d_sb); + sb->s_maxbytes = path.dentry->d_sb->s_maxbytes; + sb->s_blocksize = path.dentry->d_sb->s_blocksize; + ecryptfs_set_dentry_lower(sb->s_root, path.dentry); + ecryptfs_set_dentry_lower_mnt(sb->s_root, path.mnt); + rc = ecryptfs_interpose(path.dentry, sb->s_root, sb, 0); if (rc) goto out_free; rc = 0; goto out; out_free: - path_put(&nd.path); + path_put(&path); out: return rc; } diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 291abb11e20e..c3fb5f9c4a44 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c @@ -112,35 +112,14 @@ struct dentry *efs_fh_to_parent(struct super_block *sb, struct fid *fid, struct dentry *efs_get_parent(struct dentry *child) { - struct dentry *parent; - struct inode *inode; + struct dentry *parent = ERR_PTR(-ENOENT); efs_ino_t ino; - long error; lock_kernel(); - - error = -ENOENT; ino = efs_find_entry(child->d_inode, "..", 2); - if (!ino) - goto fail; - - inode = efs_iget(child->d_inode->i_sb, ino); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); - goto fail; - } - - error = -ENOMEM; - parent = d_alloc_anon(inode); - if (!parent) - goto fail_iput; - + if (ino) + parent = d_obtain_alias(efs_iget(child->d_inode->i_sb, ino)); unlock_kernel(); - return parent; - fail_iput: - iput(inode); - fail: - unlock_kernel(); - return ERR_PTR(error); + return parent; } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 99368bda0261..aec5c13f6341 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -930,8 +930,15 @@ errxit: * inside the main ready-list here. */ for (nepi = ep->ovflist; (epi = nepi) != NULL; - nepi = epi->next, epi->next = EP_UNACTIVE_PTR) - list_add_tail(&epi->rdllink, &ep->rdllist); + nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { + /* + * If the above loop quit with errors, the epoll item might still + * be linked to "txlist", and the list_splice() done below will + * take care of those cases. + */ + if (!ep_is_linked(&epi->rdllink)) + list_add_tail(&epi->rdllink, &ep->rdllist); + } /* * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after * releasing the lock, events will be queued in the normal way inside diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index cc91227d3bb8..80246bad1b7f 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -94,9 +94,8 @@ find_disconnected_root(struct dentry *dentry) * It may already be, as the flag isn't always updated when connection happens. */ static int -reconnect_path(struct vfsmount *mnt, struct dentry *target_dir) +reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) { - char nbuf[NAME_MAX+1]; int noprogress = 0; int err = -ESTALE; @@ -281,13 +280,14 @@ static int get_name(struct vfsmount *mnt, struct dentry *dentry, int old_seq = buffer.sequence; error = vfs_readdir(file, filldir_one, &buffer); + if (buffer.found) { + error = 0; + break; + } if (error < 0) break; - error = 0; - if (buffer.found) - break; error = -ENOENT; if (old_seq == buffer.sequence) break; @@ -360,14 +360,13 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, { const struct export_operations *nop = mnt->mnt_sb->s_export_op; struct dentry *result, *alias; + char nbuf[NAME_MAX+1]; int err; /* * Try to get any dentry for the given file handle from the filesystem. */ result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); - if (!result) - result = ERR_PTR(-ESTALE); if (IS_ERR(result)) return result; @@ -381,7 +380,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * filesystem root. */ if (result->d_flags & DCACHE_DISCONNECTED) { - err = reconnect_path(mnt, result); + err = reconnect_path(mnt, result, nbuf); if (err) goto err_result; } @@ -397,7 +396,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * It's not a directory. Life is a little more complicated. */ struct dentry *target_dir, *nresult; - char nbuf[NAME_MAX+1]; /* * See if either the dentry we just got from the filesystem @@ -422,8 +420,6 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, fh_len, fileid_type); - if (!target_dir) - goto err_result; err = PTR_ERR(target_dir); if (IS_ERR(target_dir)) goto err_result; @@ -433,7 +429,7 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * connected to the filesystem root. The VFS really doesn't * like disconnected directories.. */ - err = reconnect_path(mnt, target_dir); + err = reconnect_path(mnt, target_dir, nbuf); if (err) { dput(target_dir); goto err_result; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 11a49ce84392..9a0fc400f91c 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -354,11 +354,11 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) * (as a parameter - res_dir). Page is returned mapped and unlocked. * Entry is guaranteed to be valid. */ -struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, - struct dentry *dentry, struct page ** res_page) +struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir, + struct qstr *child, struct page ** res_page) { - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; + const char *name = child->name; + int namelen = child->len; unsigned reclen = EXT2_DIR_REC_LEN(namelen); unsigned long start, n; unsigned long npages = dir_pages(dir); @@ -431,13 +431,13 @@ struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) return de; } -ino_t ext2_inode_by_name(struct inode * dir, struct dentry *dentry) +ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) { ino_t res = 0; - struct ext2_dir_entry_2 * de; + struct ext2_dir_entry_2 *de; struct page *page; - de = ext2_find_entry (dir, dentry, &page); + de = ext2_find_entry (dir, child, &page); if (de) { res = le32_to_cpu(de->inode); ext2_put_page(page); diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index bae998c1e44e..3203042b36ef 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -105,9 +105,9 @@ extern void ext2_rsv_window_add(struct super_block *sb, struct ext2_reserve_wind /* dir.c */ extern int ext2_add_link (struct dentry *, struct inode *); -extern ino_t ext2_inode_by_name(struct inode *, struct dentry *); +extern ino_t ext2_inode_by_name(struct inode *, struct qstr *); extern int ext2_make_empty(struct inode *, struct inode *); -extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct dentry *, struct page **); +extern struct ext2_dir_entry_2 * ext2_find_entry (struct inode *,struct qstr *, struct page **); extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *); extern int ext2_empty_dir (struct inode *); extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 80c97fd8c571..2a747252ec12 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -60,7 +60,7 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str if (dentry->d_name.len > EXT2_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ino = ext2_inode_by_name(dir, dentry); + ino = ext2_inode_by_name(dir, &dentry->d_name); inode = NULL; if (ino) { inode = ext2_iget(dir->i_sb, ino); @@ -72,27 +72,11 @@ static struct dentry *ext2_lookup(struct inode * dir, struct dentry *dentry, str struct dentry *ext2_get_parent(struct dentry *child) { - unsigned long ino; - struct dentry *parent; - struct inode *inode; - struct dentry dotdot; - - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - - ino = ext2_inode_by_name(child->d_inode, &dotdot); + struct qstr dotdot = {.name = "..", .len = 2}; + unsigned long ino = ext2_inode_by_name(child->d_inode, &dotdot); if (!ino) return ERR_PTR(-ENOENT); - inode = ext2_iget(child->d_inode->i_sb, ino); - - if (IS_ERR(inode)) - return ERR_CAST(inode); - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - return parent; + return d_obtain_alias(ext2_iget(child->d_inode->i_sb, ino)); } /* @@ -257,7 +241,7 @@ static int ext2_unlink(struct inode * dir, struct dentry *dentry) struct page * page; int err = -ENOENT; - de = ext2_find_entry (dir, dentry, &page); + de = ext2_find_entry (dir, &dentry->d_name, &page); if (!de) goto out; @@ -299,7 +283,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, struct ext2_dir_entry_2 * old_de; int err = -ENOENT; - old_de = ext2_find_entry (old_dir, old_dentry, &old_page); + old_de = ext2_find_entry (old_dir, &old_dentry->d_name, &old_page); if (!old_de) goto out; @@ -319,7 +303,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry, goto out_dir; err = -ENOENT; - new_de = ext2_find_entry (new_dir, new_dentry, &new_page); + new_de = ext2_find_entry (new_dir, &new_dentry->d_name, &new_page); if (!new_de) goto out_dir; inode_inc_link_count(old_inode); diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c index 4fb94c20041b..b72b85884223 100644 --- a/fs/ext2/xip.c +++ b/fs/ext2/xip.c @@ -11,6 +11,7 @@ #include <linux/buffer_head.h> #include <linux/ext2_fs_sb.h> #include <linux/ext2_fs.h> +#include <linux/blkdev.h> #include "ext2.h" #include "xip.h" diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 4c82531ea0a8..5853f4440af4 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c @@ -456,17 +456,8 @@ static int ext3_dx_readdir(struct file * filp, if (info->extra_fname) { if (call_filldir(filp, dirent, filldir, info->extra_fname)) goto finished; - info->extra_fname = NULL; - info->curr_node = rb_next(info->curr_node); - if (!info->curr_node) { - if (info->next_hash == ~0) { - filp->f_pos = EXT3_HTREE_EOF; - goto finished; - } - info->curr_hash = info->next_hash; - info->curr_minor_hash = 0; - } + goto next_node; } else if (!info->curr_node) info->curr_node = rb_first(&info->root); @@ -498,9 +489,14 @@ static int ext3_dx_readdir(struct file * filp, info->curr_minor_hash = fname->minor_hash; if (call_filldir(filp, dirent, filldir, fname)) break; - + next_node: info->curr_node = rb_next(info->curr_node); - if (!info->curr_node) { + if (info->curr_node) { + fname = rb_entry(info->curr_node, struct fname, + rb_hash); + info->curr_hash = fname->hash; + info->curr_minor_hash = fname->minor_hash; + } else { if (info->next_hash == ~0) { filp->f_pos = EXT3_HTREE_EOF; break; diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index 0d0c70151642..b7394d05ee8e 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -239,7 +239,7 @@ setrsvsz_out: case EXT3_IOC_GROUP_EXTEND: { ext3_fsblk_t n_blocks_count; struct super_block *sb = inode->i_sb; - int err; + int err, err2; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -254,8 +254,10 @@ setrsvsz_out: } err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count); journal_lock_updates(EXT3_SB(sb)->s_journal); - journal_flush(EXT3_SB(sb)->s_journal); + err2 = journal_flush(EXT3_SB(sb)->s_journal); journal_unlock_updates(EXT3_SB(sb)->s_journal); + if (err == 0) + err = err2; group_extend_out: mnt_drop_write(filp->f_path.mnt); return err; @@ -263,7 +265,7 @@ group_extend_out: case EXT3_IOC_GROUP_ADD: { struct ext3_new_group_data input; struct super_block *sb = inode->i_sb; - int err; + int err, err2; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; @@ -280,8 +282,10 @@ group_extend_out: err = ext3_group_add(sb, &input); journal_lock_updates(EXT3_SB(sb)->s_journal); - journal_flush(EXT3_SB(sb)->s_journal); + err2 = journal_flush(EXT3_SB(sb)->s_journal); journal_unlock_updates(EXT3_SB(sb)->s_journal); + if (err == 0) + err = err2; group_add_out: mnt_drop_write(filp->f_path.mnt); return err; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index de13e919cd81..3e5edc92aa0b 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -159,7 +159,7 @@ static void dx_set_count (struct dx_entry *entries, unsigned value); static void dx_set_limit (struct dx_entry *entries, unsigned value); static unsigned dx_root_limit (struct inode *dir, unsigned infosize); static unsigned dx_node_limit (struct inode *dir); -static struct dx_frame *dx_probe(struct dentry *dentry, +static struct dx_frame *dx_probe(struct qstr *entry, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame, @@ -176,8 +176,9 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash, struct dx_frame *frame, struct dx_frame *frames, __u32 *start_hash); -static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, - struct ext3_dir_entry_2 **res_dir, int *err); +static struct buffer_head * ext3_dx_find_entry(struct inode *dir, + struct qstr *entry, struct ext3_dir_entry_2 **res_dir, + int *err); static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, struct inode *inode); @@ -342,7 +343,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, * back to userspace. */ static struct dx_frame * -dx_probe(struct dentry *dentry, struct inode *dir, +dx_probe(struct qstr *entry, struct inode *dir, struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) { unsigned count, indirect; @@ -353,8 +354,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, u32 hash; frame->bh = NULL; - if (dentry) - dir = dentry->d_parent->d_inode; if (!(bh = ext3_bread (NULL,dir, 0, 0, err))) goto fail; root = (struct dx_root *) bh->b_data; @@ -370,8 +369,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, } hinfo->hash_version = root->info.hash_version; hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed; - if (dentry) - ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); + if (entry) + ext3fs_dirhash(entry->name, entry->len, hinfo); hash = hinfo->hash; if (root->info.unused_flags & 1) { @@ -803,15 +802,15 @@ static inline int ext3_match (int len, const char * const name, */ static inline int search_dirblock(struct buffer_head * bh, struct inode *dir, - struct dentry *dentry, + struct qstr *child, unsigned long offset, struct ext3_dir_entry_2 ** res_dir) { struct ext3_dir_entry_2 * de; char * dlimit; int de_len; - const char *name = dentry->d_name.name; - int namelen = dentry->d_name.len; + const char *name = child->name; + int namelen = child->len; de = (struct ext3_dir_entry_2 *) bh->b_data; dlimit = bh->b_data + dir->i_sb->s_blocksize; @@ -850,8 +849,9 @@ static inline int search_dirblock(struct buffer_head * bh, * The returned buffer_head has ->b_count elevated. The caller is expected * to brelse() it when appropriate. */ -static struct buffer_head * ext3_find_entry (struct dentry *dentry, - struct ext3_dir_entry_2 ** res_dir) +static struct buffer_head *ext3_find_entry(struct inode *dir, + struct qstr *entry, + struct ext3_dir_entry_2 **res_dir) { struct super_block * sb; struct buffer_head * bh_use[NAMEI_RA_SIZE]; @@ -863,16 +863,15 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry, buffer */ int num = 0; int nblocks, i, err; - struct inode *dir = dentry->d_parent->d_inode; int namelen; *res_dir = NULL; sb = dir->i_sb; - namelen = dentry->d_name.len; + namelen = entry->len; if (namelen > EXT3_NAME_LEN) return NULL; if (is_dx(dir)) { - bh = ext3_dx_find_entry(dentry, res_dir, &err); + bh = ext3_dx_find_entry(dir, entry, res_dir, &err); /* * On success, or if the error was file not found, * return. Otherwise, fall back to doing a search the @@ -923,7 +922,7 @@ restart: brelse(bh); goto next; } - i = search_dirblock(bh, dir, dentry, + i = search_dirblock(bh, dir, entry, block << EXT3_BLOCK_SIZE_BITS(sb), res_dir); if (i == 1) { EXT3_I(dir)->i_dir_start_lookup = block; @@ -957,8 +956,9 @@ cleanup_and_exit: return ret; } -static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, - struct ext3_dir_entry_2 **res_dir, int *err) +static struct buffer_head * ext3_dx_find_entry(struct inode *dir, + struct qstr *entry, struct ext3_dir_entry_2 **res_dir, + int *err) { struct super_block * sb; struct dx_hash_info hinfo; @@ -968,14 +968,13 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry, struct buffer_head *bh; unsigned long block; int retval; - int namelen = dentry->d_name.len; - const u8 *name = dentry->d_name.name; - struct inode *dir = dentry->d_parent->d_inode; + int namelen = entry->len; + const u8 *name = entry->name; sb = dir->i_sb; /* NFS may look up ".." - look at dx_root directory block */ - if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ - if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) + if (namelen > 2 || name[0] != '.'|| (namelen == 2 && name[1] != '.')) { + if (!(frame = dx_probe(entry, dir, &hinfo, frames, err))) return NULL; } else { frame = frames; @@ -1036,7 +1035,7 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str if (dentry->d_name.len > EXT3_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - bh = ext3_find_entry(dentry, &de); + bh = ext3_find_entry(dir, &dentry->d_name, &de); inode = NULL; if (bh) { unsigned long ino = le32_to_cpu(de->inode); @@ -1057,18 +1056,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str struct dentry *ext3_get_parent(struct dentry *child) { unsigned long ino; - struct dentry *parent; - struct inode *inode; - struct dentry dotdot; + struct qstr dotdot = {.name = "..", .len = 2}; struct ext3_dir_entry_2 * de; struct buffer_head *bh; - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - dotdot.d_parent = child; /* confusing, isn't it! */ - - bh = ext3_find_entry(&dotdot, &de); - inode = NULL; + bh = ext3_find_entry(child->d_inode, &dotdot, &de); if (!bh) return ERR_PTR(-ENOENT); ino = le32_to_cpu(de->inode); @@ -1080,16 +1072,7 @@ struct dentry *ext3_get_parent(struct dentry *child) return ERR_PTR(-EIO); } - inode = ext3_iget(child->d_inode->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - return parent; + return d_obtain_alias(ext3_iget(child->d_inode->i_sb, ino)); } #define S_SHIFT 12 @@ -1503,7 +1486,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry, struct ext3_dir_entry_2 *de; int err; - frame = dx_probe(dentry, NULL, &hinfo, frames, &err); + frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); if (!frame) return err; entries = frame->entries; @@ -2056,7 +2039,7 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry) return PTR_ERR(handle); retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); + bh = ext3_find_entry(dir, &dentry->d_name, &de); if (!bh) goto end_rmdir; @@ -2118,7 +2101,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) handle->h_sync = 1; retval = -ENOENT; - bh = ext3_find_entry (dentry, &de); + bh = ext3_find_entry(dir, &dentry->d_name, &de); if (!bh) goto end_unlink; @@ -2276,7 +2259,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) handle->h_sync = 1; - old_bh = ext3_find_entry (old_dentry, &old_de); + old_bh = ext3_find_entry(old_dir, &old_dentry->d_name, &old_de); /* * Check for inode number is _not_ due to possible IO errors. * We might rmdir the source, keep it as pwd of some process @@ -2289,7 +2272,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, goto end_rename; new_inode = new_dentry->d_inode; - new_bh = ext3_find_entry (new_dentry, &new_de); + new_bh = ext3_find_entry(new_dir, &new_dentry->d_name, &new_de); if (new_bh) { if (!new_inode) { brelse (new_bh); @@ -2355,7 +2338,8 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry, struct buffer_head *old_bh2; struct ext3_dir_entry_2 *old_de2; - old_bh2 = ext3_find_entry(old_dentry, &old_de2); + old_bh2 = ext3_find_entry(old_dir, &old_dentry->d_name, + &old_de2); if (old_bh2) { retval = ext3_delete_entry(handle, old_dir, old_de2, old_bh2); diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 3a260af5544d..5dec6d1356c4 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -281,7 +281,8 @@ void ext3_abort (struct super_block * sb, const char * function, EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS; sb->s_flags |= MS_RDONLY; EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT; - journal_abort(EXT3_SB(sb)->s_journal, -EIO); + if (EXT3_SB(sb)->s_journal) + journal_abort(EXT3_SB(sb)->s_journal, -EIO); } void ext3_warning (struct super_block * sb, const char * function, @@ -347,7 +348,7 @@ fail: static int ext3_blkdev_put(struct block_device *bdev) { bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } static int ext3_blkdev_remove(struct ext3_sb_info *sbi) @@ -390,10 +391,14 @@ static void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); struct ext3_super_block *es = sbi->s_es; - int i; + int i, err; ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); + err = journal_destroy(sbi->s_journal); + sbi->s_journal = NULL; + if (err < 0) + ext3_abort(sb, __func__, "Couldn't clean up the journal"); + if (!(sb->s_flags & MS_RDONLY)) { EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); @@ -2066,7 +2071,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR "EXT3: failed to claim external journal device.\n"); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } @@ -2296,7 +2301,9 @@ static void ext3_mark_recovery_complete(struct super_block * sb, journal_t *journal = EXT3_SB(sb)->s_journal; journal_lock_updates(journal); - journal_flush(journal); + if (journal_flush(journal) < 0) + goto out; + lock_super(sb); if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) && sb->s_flags & MS_RDONLY) { @@ -2305,6 +2312,8 @@ static void ext3_mark_recovery_complete(struct super_block * sb, ext3_commit_super(sb, es, 1); } unlock_super(sb); + +out: journal_unlock_updates(journal); } @@ -2381,13 +2390,12 @@ static void ext3_write_super (struct super_block * sb) static int ext3_sync_fs(struct super_block *sb, int wait) { - tid_t target; - sb->s_dirt = 0; - if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { - if (wait) - log_wait_commit(EXT3_SB(sb)->s_journal, target); - } + if (wait) + ext3_force_commit(sb); + else + journal_start_commit(EXT3_SB(sb)->s_journal, NULL); + return 0; } @@ -2404,7 +2412,13 @@ static void ext3_write_super_lockfs(struct super_block *sb) /* Now we set up the journal barrier. */ journal_lock_updates(journal); - journal_flush(journal); + + /* + * We don't want to clear needs_recovery flag when we failed + * to flush the journal. + */ + if (journal_flush(journal) < 0) + return; /* Journal blocked and flushed, clear needs_recovery flag. */ EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER); @@ -2783,30 +2797,30 @@ static int ext3_quota_on_mount(struct super_block *sb, int type) * Standard function to be called on quota_on */ static int ext3_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount) + char *name, int remount) { int err; - struct nameidata nd; + struct path path; if (!test_opt(sb, QUOTA)) return -EINVAL; - /* When remounting, no checks are needed and in fact, path is NULL */ + /* When remounting, no checks are needed and in fact, name is NULL */ if (remount) - return vfs_quota_on(sb, type, format_id, path, remount); + return vfs_quota_on(sb, type, format_id, name, remount); - err = path_lookup(path, LOOKUP_FOLLOW, &nd); + err = kern_path(name, LOOKUP_FOLLOW, &path); if (err) return err; /* Quotafile not on the same filesystem? */ - if (nd.path.mnt->mnt_sb != sb) { - path_put(&nd.path); + if (path.mnt->mnt_sb != sb) { + path_put(&path); return -EXDEV; } /* Journaling quota? */ if (EXT3_SB(sb)->s_qf_names[type]) { /* Quotafile not of fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + if (path.dentry->d_parent != sb->s_root) printk(KERN_WARNING "EXT3-fs: Quota file not on filesystem root. " "Journaled quota will not work.\n"); @@ -2816,18 +2830,22 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id, * When we journal data on quota file, we have to flush journal to see * all updates to the file when we bypass pagecache... */ - if (ext3_should_journal_data(nd.path.dentry->d_inode)) { + if (ext3_should_journal_data(path.dentry->d_inode)) { /* * We don't need to lock updates but journal_flush() could * otherwise be livelocked... */ journal_lock_updates(EXT3_SB(sb)->s_journal); - journal_flush(EXT3_SB(sb)->s_journal); + err = journal_flush(EXT3_SB(sb)->s_journal); journal_unlock_updates(EXT3_SB(sb)->s_journal); + if (err) { + path_put(&path); + return err; + } } - err = vfs_quota_on_path(sb, type, format_id, &nd.path); - path_put(&nd.path); + err = vfs_quota_on_path(sb, type, format_id, &path); + path_put(&path); return err; } diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index b9821be709bd..d2003cdc36aa 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -589,21 +589,23 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, return; } -int ext4_claim_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks) +/** + * ext4_has_free_blocks() + * @sbi: in-core super block structure. + * @nblocks: number of needed blocks + * + * Check if filesystem has nblocks free & available for allocation. + * On success return 1, return 0 on failure. + */ +int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) { - s64 free_blocks, dirty_blocks; - s64 root_blocks = 0; + s64 free_blocks, dirty_blocks, root_blocks; struct percpu_counter *fbc = &sbi->s_freeblocks_counter; struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; free_blocks = percpu_counter_read_positive(fbc); dirty_blocks = percpu_counter_read_positive(dbc); - - if (!capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) - root_blocks = ext4_r_blocks_count(sbi->s_es); + root_blocks = ext4_r_blocks_count(sbi->s_es); if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { @@ -616,57 +618,32 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, } } /* Check whether we have space after - * accounting for current dirty blocks + * accounting for current dirty blocks & root reserved blocks. */ - if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) - /* we don't have free space */ - return -ENOSPC; + if (free_blocks >= ((root_blocks + nblocks) + dirty_blocks)) + return 1; + + /* Hm, nope. Are (enough) root reserved blocks available? */ + if (sbi->s_resuid == current->fsuid || + ((sbi->s_resgid != 0) && in_group_p(sbi->s_resgid)) || + capable(CAP_SYS_RESOURCE)) { + if (free_blocks >= (nblocks + dirty_blocks)) + return 1; + } - /* Add the blocks to nblocks */ - percpu_counter_add(dbc, nblocks); return 0; } -/** - * ext4_has_free_blocks() - * @sbi: in-core super block structure. - * @nblocks: number of neeed blocks - * - * Check if filesystem has free blocks available for allocation. - * Return the number of blocks avaible for allocation for this request - * On success, return nblocks - */ -ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, +int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) { - s64 free_blocks, dirty_blocks; - s64 root_blocks = 0; - struct percpu_counter *fbc = &sbi->s_freeblocks_counter; - struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; - - free_blocks = percpu_counter_read_positive(fbc); - dirty_blocks = percpu_counter_read_positive(dbc); - - if (!capable(CAP_SYS_RESOURCE) && - sbi->s_resuid != current->fsuid && - (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) - root_blocks = ext4_r_blocks_count(sbi->s_es); - - if (free_blocks - (nblocks + root_blocks + dirty_blocks) < - EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); - } - if (free_blocks <= (root_blocks + dirty_blocks)) - /* we don't have free space */ + if (ext4_has_free_blocks(sbi, nblocks)) { + percpu_counter_add(&sbi->s_dirtyblocks_counter, nblocks); return 0; - - if (free_blocks - (root_blocks + dirty_blocks) < nblocks) - return free_blocks - (root_blocks + dirty_blocks); - return nblocks; + } else + return -ENOSPC; } - /** * ext4_should_retry_alloc() * @sb: super block diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index 3ca6a2b7632d..fed5b610df5a 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c @@ -459,17 +459,8 @@ static int ext4_dx_readdir(struct file *filp, if (info->extra_fname) { if (call_filldir(filp, dirent, filldir, info->extra_fname)) goto finished; - info->extra_fname = NULL; - info->curr_node = rb_next(info->curr_node); - if (!info->curr_node) { - if (info->next_hash == ~0) { - filp->f_pos = EXT4_HTREE_EOF; - goto finished; - } - info->curr_hash = info->next_hash; - info->curr_minor_hash = 0; - } + goto next_node; } else if (!info->curr_node) info->curr_node = rb_first(&info->root); @@ -501,9 +492,14 @@ static int ext4_dx_readdir(struct file *filp, info->curr_minor_hash = fname->minor_hash; if (call_filldir(filp, dirent, filldir, fname)) break; - + next_node: info->curr_node = rb_next(info->curr_node); - if (!info->curr_node) { + if (info->curr_node) { + fname = rb_entry(info->curr_node, struct fname, + rb_hash); + info->curr_hash = fname->hash; + info->curr_minor_hash = fname->minor_hash; + } else { if (info->next_hash == ~0) { filp->f_pos = EXT4_HTREE_EOF; break; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 4880cc3e6727..b0537c827024 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1003,8 +1003,7 @@ extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t iblock, ext4_fsblk_t goal, unsigned long *count, int *errp); extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); -extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, - s64 nblocks); +extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); extern void ext4_free_blocks(handle_t *handle, struct inode *inode, ext4_fsblk_t block, unsigned long count, int metadata); extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 92db9e945147..63adcb792988 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1061,7 +1061,6 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, stru struct dentry *ext4_get_parent(struct dentry *child) { unsigned long ino; - struct dentry *parent; struct inode *inode; static const struct qstr dotdot = { .name = "..", @@ -1083,16 +1082,7 @@ struct dentry *ext4_get_parent(struct dentry *child) return ERR_PTR(-EIO); } - inode = ext4_iget(child->d_inode->i_sb, ino); - if (IS_ERR(inode)) - return ERR_CAST(inode); - - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - return parent; + return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); } #define S_SHIFT 12 diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9b2b2bc4ec17..994859df010e 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -333,7 +333,8 @@ void ext4_abort(struct super_block *sb, const char *function, EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; sb->s_flags |= MS_RDONLY; EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT; - jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); + if (EXT4_SB(sb)->s_journal) + jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); } void ext4_warning(struct super_block *sb, const char *function, @@ -399,7 +400,7 @@ fail: static int ext4_blkdev_put(struct block_device *bdev) { bd_release(bdev); - return blkdev_put(bdev); + return blkdev_put(bdev, FMODE_READ|FMODE_WRITE); } static int ext4_blkdev_remove(struct ext4_sb_info *sbi) @@ -442,14 +443,16 @@ static void ext4_put_super(struct super_block *sb) { struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_super_block *es = sbi->s_es; - int i; + int i, err; ext4_mb_release(sb); ext4_ext_release(sb); ext4_xattr_put_super(sb); - if (jbd2_journal_destroy(sbi->s_journal) < 0) - ext4_abort(sb, __func__, "Couldn't clean up the journal"); + err = jbd2_journal_destroy(sbi->s_journal); sbi->s_journal = NULL; + if (err < 0) + ext4_abort(sb, __func__, "Couldn't clean up the journal"); + if (!(sb->s_flags & MS_RDONLY)) { EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); es->s_state = cpu_to_le16(sbi->s_mount_state); @@ -2553,7 +2556,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, if (bd_claim(bdev, sb)) { printk(KERN_ERR "EXT4: failed to claim external journal device.\n"); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); return NULL; } @@ -3328,30 +3331,30 @@ static int ext4_quota_on_mount(struct super_block *sb, int type) * Standard function to be called on quota_on */ static int ext4_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount) + char *name, int remount) { int err; - struct nameidata nd; + struct path path; if (!test_opt(sb, QUOTA)) return -EINVAL; - /* When remounting, no checks are needed and in fact, path is NULL */ + /* When remounting, no checks are needed and in fact, name is NULL */ if (remount) - return vfs_quota_on(sb, type, format_id, path, remount); + return vfs_quota_on(sb, type, format_id, name, remount); - err = path_lookup(path, LOOKUP_FOLLOW, &nd); + err = kern_path(name, LOOKUP_FOLLOW, &path); if (err) return err; /* Quotafile not on the same filesystem? */ - if (nd.path.mnt->mnt_sb != sb) { - path_put(&nd.path); + if (path.mnt->mnt_sb != sb) { + path_put(&path); return -EXDEV; } /* Journaling quota? */ if (EXT4_SB(sb)->s_qf_names[type]) { /* Quotafile not in fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + if (path.dentry->d_parent != sb->s_root) printk(KERN_WARNING "EXT4-fs: Quota file not on filesystem root. " "Journaled quota will not work.\n"); @@ -3361,7 +3364,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, * When we journal data on quota file, we have to flush journal to see * all updates to the file when we bypass pagecache... */ - if (ext4_should_journal_data(nd.path.dentry->d_inode)) { + if (ext4_should_journal_data(path.dentry->d_inode)) { /* * We don't need to lock updates but journal_flush() could * otherwise be livelocked... @@ -3370,13 +3373,13 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); if (err) { - path_put(&nd.path); + path_put(&path); return err; } } - err = vfs_quota_on_path(sb, type, format_id, &nd.path); - path_put(&nd.path); + err = vfs_quota_on_path(sb, type, format_id, &path); + path_put(&path); return err; } diff --git a/fs/fat/Makefile b/fs/fat/Makefile index bfb5f06cf2c8..e06190322c1c 100644 --- a/fs/fat/Makefile +++ b/fs/fat/Makefile @@ -3,5 +3,9 @@ # obj-$(CONFIG_FAT_FS) += fat.o +obj-$(CONFIG_VFAT_FS) += vfat.o +obj-$(CONFIG_MSDOS_FS) += msdos.o -fat-objs := cache.o dir.o fatent.o file.o inode.o misc.o +fat-y := cache.o dir.o fatent.o file.o inode.o misc.o +vfat-y := namei_vfat.o +msdos-y := namei_msdos.o diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 3222f51c41cf..b42602298087 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -9,8 +9,8 @@ */ #include <linux/fs.h> -#include <linux/msdos_fs.h> #include <linux/buffer_head.h> +#include "fat.h" /* this must be > 0. */ #define FAT_MAX_CACHE 8 @@ -293,10 +293,12 @@ static int fat_bmap_cluster(struct inode *inode, int cluster) } int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, - unsigned long *mapped_blocks) + unsigned long *mapped_blocks, int create) { struct super_block *sb = inode->i_sb; struct msdos_sb_info *sbi = MSDOS_SB(sb); + const unsigned long blocksize = sb->s_blocksize; + const unsigned char blocksize_bits = sb->s_blocksize_bits; sector_t last_block; int cluster, offset; @@ -309,10 +311,21 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, } return 0; } - last_block = (MSDOS_I(inode)->mmu_private + (sb->s_blocksize - 1)) - >> sb->s_blocksize_bits; - if (sector >= last_block) - return 0; + + last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits; + if (sector >= last_block) { + if (!create) + return 0; + + /* + * ->mmu_private can access on only allocation path. + * (caller must hold ->i_mutex) + */ + last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1)) + >> blocksize_bits; + if (sector >= last_block) + return 0; + } cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits); offset = sector & (sbi->sec_per_clus - 1); diff --git a/fs/fat/dir.c b/fs/fat/dir.c index cd4a0162e10d..67e058357098 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -16,11 +16,11 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/time.h> -#include <linux/msdos_fs.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/compat.h> #include <asm/uaccess.h> +#include "fat.h" static inline loff_t fat_make_i_pos(struct super_block *sb, struct buffer_head *bh, @@ -77,7 +77,7 @@ next: *bh = NULL; iblock = *pos >> sb->s_blocksize_bits; - err = fat_bmap(dir, iblock, &phys, &mapped_blocks); + err = fat_bmap(dir, iblock, &phys, &mapped_blocks, 0); if (err || !phys) return -1; /* beyond EOF or error */ @@ -86,7 +86,7 @@ next: *bh = sb_bread(sb, phys); if (*bh == NULL) { printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", - (unsigned long long)phys); + (llu)phys); /* skip this block */ *pos = (iblock + 1) << sb->s_blocksize_bits; goto next; @@ -373,9 +373,10 @@ parse_record: if (de->attr == ATTR_EXT) { int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); - if (status < 0) - return status; - else if (status == PARSE_INVALID) + if (status < 0) { + err = status; + goto end_of_dir; + } else if (status == PARSE_INVALID) continue; else if (status == PARSE_NOT_LONGNAME) goto parse_record; @@ -832,6 +833,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, #endif /* CONFIG_COMPAT */ const struct file_operations fat_dir_operations = { + .llseek = generic_file_llseek, .read = generic_read_dir, .readdir = fat_readdir, .ioctl = fat_dir_ioctl, @@ -839,6 +841,7 @@ const struct file_operations fat_dir_operations = { .compat_ioctl = fat_compat_dir_ioctl, #endif .fsync = file_fsync, + .llseek = generic_file_llseek, }; static int fat_get_short_entry(struct inode *dir, loff_t *pos, @@ -1088,6 +1091,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) struct msdos_dir_entry *de; sector_t blknr; __le16 date, time; + u8 time_cs; int err, cluster; err = fat_alloc_clusters(dir, &cluster, 1); @@ -1101,7 +1105,7 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) goto error_free; } - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de = (struct msdos_dir_entry *)bhs[0]->b_data; /* filling the new directory slots ("." and ".." entries) */ @@ -1111,13 +1115,14 @@ int fat_alloc_new_dir(struct inode *dir, struct timespec *ts) de[0].lcase = de[1].lcase = 0; de[0].time = de[1].time = time; de[0].date = de[1].date = date; - de[0].ctime_cs = de[1].ctime_cs = 0; if (sbi->options.isvfat) { /* extra timestamps */ de[0].ctime = de[1].ctime = time; + de[0].ctime_cs = de[1].ctime_cs = time_cs; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = date; } else { de[0].ctime = de[1].ctime = 0; + de[0].ctime_cs = de[1].ctime_cs = 0; de[0].adate = de[0].cdate = de[1].adate = de[1].cdate = 0; } de[0].start = cpu_to_le16(cluster); diff --git a/fs/fat/fat.h b/fs/fat/fat.h new file mode 100644 index 000000000000..ea440d65819c --- /dev/null +++ b/fs/fat/fat.h @@ -0,0 +1,329 @@ +#ifndef _FAT_H +#define _FAT_H + +#include <linux/buffer_head.h> +#include <linux/string.h> +#include <linux/nls.h> +#include <linux/fs.h> +#include <linux/mutex.h> +#include <linux/msdos_fs.h> + +/* + * vfat shortname flags + */ +#define VFAT_SFN_DISPLAY_LOWER 0x0001 /* convert to lowercase for display */ +#define VFAT_SFN_DISPLAY_WIN95 0x0002 /* emulate win95 rule for display */ +#define VFAT_SFN_DISPLAY_WINNT 0x0004 /* emulate winnt rule for display */ +#define VFAT_SFN_CREATE_WIN95 0x0100 /* emulate win95 rule for create */ +#define VFAT_SFN_CREATE_WINNT 0x0200 /* emulate winnt rule for create */ + +struct fat_mount_options { + uid_t fs_uid; + gid_t fs_gid; + unsigned short fs_fmask; + unsigned short fs_dmask; + unsigned short codepage; /* Codepage for shortname conversions */ + char *iocharset; /* Charset used for filename input/display */ + unsigned short shortname; /* flags for shortname display/create rule */ + unsigned char name_check; /* r = relaxed, n = normal, s = strict */ + unsigned short allow_utime;/* permission for setting the [am]time */ + unsigned quiet:1, /* set = fake successful chmods and chowns */ + showexec:1, /* set = only set x bit for com/exe/bat */ + sys_immutable:1, /* set = system files are immutable */ + dotsOK:1, /* set = hidden and system files are named '.filename' */ + isvfat:1, /* 0=no vfat long filename support, 1=vfat support */ + utf8:1, /* Use of UTF-8 character set (Default) */ + unicode_xlate:1, /* create escape sequences for unhandled Unicode */ + numtail:1, /* Does first alias have a numeric '~1' type tail? */ + flush:1, /* write things quickly */ + nocase:1, /* Does this need case conversion? 0=need case conversion*/ + usefree:1, /* Use free_clusters for FAT32 */ + tz_utc:1, /* Filesystem timestamps are in UTC */ + rodir:1; /* allow ATTR_RO for directory */ +}; + +#define FAT_HASH_BITS 8 +#define FAT_HASH_SIZE (1UL << FAT_HASH_BITS) + +/* + * MS-DOS file system in-core superblock data + */ +struct msdos_sb_info { + unsigned short sec_per_clus; /* sectors/cluster */ + unsigned short cluster_bits; /* log2(cluster_size) */ + unsigned int cluster_size; /* cluster size */ + unsigned char fats,fat_bits; /* number of FATs, FAT bits (12 or 16) */ + unsigned short fat_start; + unsigned long fat_length; /* FAT start & length (sec.) */ + unsigned long dir_start; + unsigned short dir_entries; /* root dir start & entries */ + unsigned long data_start; /* first data sector */ + unsigned long max_cluster; /* maximum cluster number */ + unsigned long root_cluster; /* first cluster of the root directory */ + unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */ + struct mutex fat_lock; + unsigned int prev_free; /* previously allocated cluster number */ + unsigned int free_clusters; /* -1 if undefined */ + unsigned int free_clus_valid; /* is free_clusters valid? */ + struct fat_mount_options options; + struct nls_table *nls_disk; /* Codepage used on disk */ + struct nls_table *nls_io; /* Charset used for input and display */ + const void *dir_ops; /* Opaque; default directory operations */ + int dir_per_block; /* dir entries per block */ + int dir_per_block_bits; /* log2(dir_per_block) */ + + int fatent_shift; + struct fatent_operations *fatent_ops; + + spinlock_t inode_hash_lock; + struct hlist_head inode_hashtable[FAT_HASH_SIZE]; +}; + +#define FAT_CACHE_VALID 0 /* special case for valid cache */ + +/* + * MS-DOS file system inode data in memory + */ +struct msdos_inode_info { + spinlock_t cache_lru_lock; + struct list_head cache_lru; + int nr_caches; + /* for avoiding the race between fat_free() and fat_get_cluster() */ + unsigned int cache_valid_id; + + /* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */ + loff_t mmu_private; /* physically allocated size */ + + int i_start; /* first cluster or 0 */ + int i_logstart; /* logical first cluster */ + int i_attrs; /* unused attribute bits */ + loff_t i_pos; /* on-disk position of directory entry or 0 */ + struct hlist_node i_fat_hash; /* hash by i_location */ + struct inode vfs_inode; +}; + +struct fat_slot_info { + loff_t i_pos; /* on-disk position of directory entry */ + loff_t slot_off; /* offset for slot or de start */ + int nr_slots; /* number of slots + 1(de) in filename */ + struct msdos_dir_entry *de; + struct buffer_head *bh; +}; + +static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) +{ + return container_of(inode, struct msdos_inode_info, vfs_inode); +} + +/* + * If ->i_mode can't hold S_IWUGO (i.e. ATTR_RO), we use ->i_attrs to + * save ATTR_RO instead of ->i_mode. + * + * If it's directory and !sbi->options.rodir, ATTR_RO isn't read-only + * bit, it's just used as flag for app. + */ +static inline int fat_mode_can_hold_ro(struct inode *inode) +{ + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + mode_t mask; + + if (S_ISDIR(inode->i_mode)) { + if (!sbi->options.rodir) + return 0; + mask = ~sbi->options.fs_dmask; + } else + mask = ~sbi->options.fs_fmask; + + if (!(mask & S_IWUGO)) + return 0; + return 1; +} + +/* Convert attribute bits and a mask to the UNIX mode. */ +static inline mode_t fat_make_mode(struct msdos_sb_info *sbi, + u8 attrs, mode_t mode) +{ + if (attrs & ATTR_RO && !((attrs & ATTR_DIR) && !sbi->options.rodir)) + mode &= ~S_IWUGO; + + if (attrs & ATTR_DIR) + return (mode & ~sbi->options.fs_dmask) | S_IFDIR; + else + return (mode & ~sbi->options.fs_fmask) | S_IFREG; +} + +/* Return the FAT attribute byte for this inode */ +static inline u8 fat_make_attrs(struct inode *inode) +{ + u8 attrs = MSDOS_I(inode)->i_attrs; + if (S_ISDIR(inode->i_mode)) + attrs |= ATTR_DIR; + if (fat_mode_can_hold_ro(inode) && !(inode->i_mode & S_IWUGO)) + attrs |= ATTR_RO; + return attrs; +} + +static inline void fat_save_attrs(struct inode *inode, u8 attrs) +{ + if (fat_mode_can_hold_ro(inode)) + MSDOS_I(inode)->i_attrs = attrs & ATTR_UNUSED; + else + MSDOS_I(inode)->i_attrs = attrs & (ATTR_UNUSED | ATTR_RO); +} + +static inline unsigned char fat_checksum(const __u8 *name) +{ + unsigned char s = name[0]; + s = (s<<7) + (s>>1) + name[1]; s = (s<<7) + (s>>1) + name[2]; + s = (s<<7) + (s>>1) + name[3]; s = (s<<7) + (s>>1) + name[4]; + s = (s<<7) + (s>>1) + name[5]; s = (s<<7) + (s>>1) + name[6]; + s = (s<<7) + (s>>1) + name[7]; s = (s<<7) + (s>>1) + name[8]; + s = (s<<7) + (s>>1) + name[9]; s = (s<<7) + (s>>1) + name[10]; + return s; +} + +static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus) +{ + return ((sector_t)clus - FAT_START_ENT) * sbi->sec_per_clus + + sbi->data_start; +} + +static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + *dst++ = src[0] | (src[1] << 8); + src += 2; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len) +{ +#ifdef __BIG_ENDIAN + while (len--) { + dst[0] = *src & 0x00FF; + dst[1] = (*src & 0xFF00) >> 8; + dst += 2; + src++; + } +#else + memcpy(dst, src, len * 2); +#endif +} + +/* fat/cache.c */ +extern void fat_cache_inval_inode(struct inode *inode); +extern int fat_get_cluster(struct inode *inode, int cluster, + int *fclus, int *dclus); +extern int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, + unsigned long *mapped_blocks, int create); + +/* fat/dir.c */ +extern const struct file_operations fat_dir_operations; +extern int fat_search_long(struct inode *inode, const unsigned char *name, + int name_len, struct fat_slot_info *sinfo); +extern int fat_dir_empty(struct inode *dir); +extern int fat_subdirs(struct inode *dir); +extern int fat_scan(struct inode *dir, const unsigned char *name, + struct fat_slot_info *sinfo); +extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh, + struct msdos_dir_entry **de, loff_t *i_pos); +extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts); +extern int fat_add_entries(struct inode *dir, void *slots, int nr_slots, + struct fat_slot_info *sinfo); +extern int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo); + +/* fat/fatent.c */ +struct fat_entry { + int entry; + union { + u8 *ent12_p[2]; + __le16 *ent16_p; + __le32 *ent32_p; + } u; + int nr_bhs; + struct buffer_head *bhs[2]; +}; + +static inline void fatent_init(struct fat_entry *fatent) +{ + fatent->nr_bhs = 0; + fatent->entry = 0; + fatent->u.ent32_p = NULL; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +static inline void fatent_set_entry(struct fat_entry *fatent, int entry) +{ + fatent->entry = entry; + fatent->u.ent32_p = NULL; +} + +static inline void fatent_brelse(struct fat_entry *fatent) +{ + int i; + fatent->u.ent32_p = NULL; + for (i = 0; i < fatent->nr_bhs; i++) + brelse(fatent->bhs[i]); + fatent->nr_bhs = 0; + fatent->bhs[0] = fatent->bhs[1] = NULL; +} + +extern void fat_ent_access_init(struct super_block *sb); +extern int fat_ent_read(struct inode *inode, struct fat_entry *fatent, + int entry); +extern int fat_ent_write(struct inode *inode, struct fat_entry *fatent, + int new, int wait); +extern int fat_alloc_clusters(struct inode *inode, int *cluster, + int nr_cluster); +extern int fat_free_clusters(struct inode *inode, int cluster); +extern int fat_count_free_clusters(struct super_block *sb); + +/* fat/file.c */ +extern int fat_generic_ioctl(struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg); +extern const struct file_operations fat_file_operations; +extern const struct inode_operations fat_file_inode_operations; +extern int fat_setattr(struct dentry * dentry, struct iattr * attr); +extern void fat_truncate(struct inode *inode); +extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, + struct kstat *stat); + +/* fat/inode.c */ +extern void fat_attach(struct inode *inode, loff_t i_pos); +extern void fat_detach(struct inode *inode); +extern struct inode *fat_iget(struct super_block *sb, loff_t i_pos); +extern struct inode *fat_build_inode(struct super_block *sb, + struct msdos_dir_entry *de, loff_t i_pos); +extern int fat_sync_inode(struct inode *inode); +extern int fat_fill_super(struct super_block *sb, void *data, int silent, + const struct inode_operations *fs_dir_inode_ops, int isvfat); + +extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); +/* fat/misc.c */ +extern void fat_fs_panic(struct super_block *s, const char *fmt, ...) + __attribute__ ((format (printf, 2, 3))) __cold; +extern void fat_clusters_flush(struct super_block *sb); +extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); +extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs); +extern void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs); +extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs); + +int fat_cache_init(void); +void fat_cache_destroy(void); + +/* helper for printk */ +typedef unsigned long long llu; + +#endif /* !_FAT_H */ diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index fb98b3d847ed..da6eea47872f 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -7,6 +7,7 @@ #include <linux/fs.h> #include <linux/msdos_fs.h> #include <linux/blkdev.h> +#include "fat.h" struct fatent_operations { void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); @@ -92,8 +93,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, err_brelse: brelse(bhs[0]); err: - printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); return -EIO; } @@ -106,7 +106,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, fatent->bhs[0] = sb_bread(sb, blocknr); if (!fatent->bhs[0]) { printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", - (unsigned long long)blocknr); + (llu)blocknr); return -EIO; } fatent->nr_bhs = 1; @@ -316,10 +316,20 @@ static inline int fat_ent_update_ptr(struct super_block *sb, /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; - /* Does this entry need the next block? */ - if (sbi->fat_bits == 12 && (offset + 1) >= sb->s_blocksize) { - if (fatent->nr_bhs != 2 || bhs[1]->b_blocknr != (blocknr + 1)) - return 0; + if (sbi->fat_bits == 12) { + if ((offset + 1) < sb->s_blocksize) { + /* This entry is on bhs[0]. */ + if (fatent->nr_bhs == 2) { + brelse(bhs[1]); + fatent->nr_bhs = 1; + } + } else { + /* This entry needs the next block. */ + if (fatent->nr_bhs != 2) + return 0; + if (bhs[1]->b_blocknr != (blocknr + 1)) + return 0; + } } ops->ent_set_ptr(fatent, offset); return 1; diff --git a/fs/fat/file.c b/fs/fat/file.c index ddde37025ca6..f06a4e525ece 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -10,13 +10,13 @@ #include <linux/module.h> #include <linux/mount.h> #include <linux/time.h> -#include <linux/msdos_fs.h> #include <linux/buffer_head.h> #include <linux/writeback.h> #include <linux/backing-dev.h> #include <linux/blkdev.h> #include <linux/fsnotify.h> #include <linux/security.h> +#include "fat.h" int fat_generic_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg) @@ -29,10 +29,9 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, { u32 attr; - if (inode->i_ino == MSDOS_ROOT_INO) - attr = ATTR_DIR; - else - attr = fat_attr(inode); + mutex_lock(&inode->i_mutex); + attr = fat_make_attrs(inode); + mutex_unlock(&inode->i_mutex); return put_user(attr, user_attr); } @@ -62,20 +61,16 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, /* Merge in ATTR_VOLUME and ATTR_DIR */ attr |= (MSDOS_I(inode)->i_attrs & ATTR_VOLUME) | (is_dir ? ATTR_DIR : 0); - oldattr = fat_attr(inode); + oldattr = fat_make_attrs(inode); /* Equivalent to a chmod() */ ia.ia_valid = ATTR_MODE | ATTR_CTIME; ia.ia_ctime = current_fs_time(inode->i_sb); - if (is_dir) { - ia.ia_mode = MSDOS_MKMODE(attr, - S_IRWXUGO & ~sbi->options.fs_dmask) - | S_IFDIR; - } else { - ia.ia_mode = MSDOS_MKMODE(attr, - (S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)) - & ~sbi->options.fs_fmask) - | S_IFREG; + if (is_dir) + ia.ia_mode = fat_make_mode(sbi, attr, S_IRWXUGO); + else { + ia.ia_mode = fat_make_mode(sbi, attr, + S_IRUGO | S_IWUGO | (inode->i_mode & S_IXUGO)); } /* The root directory has no attributes */ @@ -115,7 +110,7 @@ int fat_generic_ioctl(struct inode *inode, struct file *filp, inode->i_flags &= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = attr & ATTR_UNUSED; + fat_save_attrs(inode, attr); mark_inode_dirty(inode); up: mnt_drop_write(filp->f_path.mnt); @@ -274,7 +269,7 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Note, the basic check is already done by a caller of - * (attr->ia_mode & ~MSDOS_VALID_MODE) + * (attr->ia_mode & ~FAT_VALID_MODE) */ if (S_ISREG(inode->i_mode)) @@ -287,11 +282,18 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi, /* * Of the r and x bits, all (subject to umask) must be present. Of the * w bits, either all (subject to umask) or none must be present. + * + * If fat_mode_can_hold_ro(inode) is false, can't change w bits. */ if ((perm & (S_IRUGO | S_IXUGO)) != (inode->i_mode & (S_IRUGO|S_IXUGO))) return -EPERM; - if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) - return -EPERM; + if (fat_mode_can_hold_ro(inode)) { + if ((perm & S_IWUGO) && ((perm & S_IWUGO) != (S_IWUGO & ~mask))) + return -EPERM; + } else { + if ((perm & S_IWUGO) != (S_IWUGO & ~mask)) + return -EPERM; + } *mode_ptr &= S_IFMT | perm; @@ -314,13 +316,15 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode) } #define TIMES_SET_FLAGS (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET) +/* valid file mode bits */ +#define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO) int fat_setattr(struct dentry *dentry, struct iattr *attr) { struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); struct inode *inode = dentry->d_inode; - int error = 0; unsigned int ia_valid; + int error; /* * Expand the file. Since inode_setattr() updates ->i_size @@ -356,7 +360,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) ((attr->ia_valid & ATTR_GID) && (attr->ia_gid != sbi->options.fs_gid)) || ((attr->ia_valid & ATTR_MODE) && - (attr->ia_mode & ~MSDOS_VALID_MODE))) + (attr->ia_mode & ~FAT_VALID_MODE))) error = -EPERM; if (error) { @@ -374,7 +378,8 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr) attr->ia_valid &= ~ATTR_MODE; } - error = inode_setattr(inode, attr); + if (attr->ia_valid) + error = inode_setattr(inode, attr); out: return error; } diff --git a/fs/fat/inode.c b/fs/fat/inode.c index d12cdf2a0406..bdd8fb7be2ca 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -16,7 +16,6 @@ #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/seq_file.h> -#include <linux/msdos_fs.h> #include <linux/pagemap.h> #include <linux/mpage.h> #include <linux/buffer_head.h> @@ -27,7 +26,9 @@ #include <linux/uio.h> #include <linux/writeback.h> #include <linux/log2.h> +#include <linux/hash.h> #include <asm/unaligned.h> +#include "fat.h" #ifndef CONFIG_FAT_DEFAULT_IOCHARSET /* if user don't select VFAT, this is undefined. */ @@ -63,7 +64,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, sector_t phys; int err, offset; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; if (phys) { @@ -93,7 +94,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock, *max_blocks = min(mapped_blocks, *max_blocks); MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits; - err = fat_bmap(inode, iblock, &phys, &mapped_blocks); + err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create); if (err) return err; @@ -175,7 +176,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, if (rw == WRITE) { /* - * FIXME: blockdev_direct_IO() doesn't use ->prepare_write(), + * FIXME: blockdev_direct_IO() doesn't use ->write_begin(), * so we need to update the ->mmu_private to block boundary. * * But we must fill the remaining area or hole by nul for @@ -198,7 +199,14 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, static sector_t _fat_bmap(struct address_space *mapping, sector_t block) { - return generic_block_bmap(mapping, block, fat_get_block); + sector_t blocknr; + + /* fat_get_cluster() assumes the requested blocknr isn't truncated. */ + mutex_lock(&mapping->host->i_mutex); + blocknr = generic_block_bmap(mapping, block, fat_get_block); + mutex_unlock(&mapping->host->i_mutex); + + return blocknr; } static const struct address_space_operations fat_aops = { @@ -247,25 +255,21 @@ static void fat_hash_init(struct super_block *sb) INIT_HLIST_HEAD(&sbi->inode_hashtable[i]); } -static inline unsigned long fat_hash(struct super_block *sb, loff_t i_pos) +static inline unsigned long fat_hash(loff_t i_pos) { - unsigned long tmp = (unsigned long)i_pos | (unsigned long) sb; - tmp = tmp + (tmp >> FAT_HASH_BITS) + (tmp >> FAT_HASH_BITS * 2); - return tmp & FAT_HASH_MASK; + return hash_32(i_pos, FAT_HASH_BITS); } void fat_attach(struct inode *inode, loff_t i_pos) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); + struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); spin_lock(&sbi->inode_hash_lock); MSDOS_I(inode)->i_pos = i_pos; - hlist_add_head(&MSDOS_I(inode)->i_fat_hash, - sbi->inode_hashtable + fat_hash(sb, i_pos)); + hlist_add_head(&MSDOS_I(inode)->i_fat_hash, head); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_attach); void fat_detach(struct inode *inode) @@ -276,13 +280,12 @@ void fat_detach(struct inode *inode) hlist_del_init(&MSDOS_I(inode)->i_fat_hash); spin_unlock(&sbi->inode_hash_lock); } - EXPORT_SYMBOL_GPL(fat_detach); struct inode *fat_iget(struct super_block *sb, loff_t i_pos) { struct msdos_sb_info *sbi = MSDOS_SB(sb); - struct hlist_head *head = sbi->inode_hashtable + fat_hash(sb, i_pos); + struct hlist_head *head = sbi->inode_hashtable + fat_hash(i_pos); struct hlist_node *_p; struct msdos_inode_info *i; struct inode *inode = NULL; @@ -341,8 +344,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if ((de->attr & ATTR_DIR) && !IS_FREE(de->name)) { inode->i_generation &= ~1; - inode->i_mode = MSDOS_MKMODE(de->attr, - S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, de->attr, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; @@ -359,10 +361,9 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) inode->i_nlink = fat_subdirs(inode); } else { /* not a directory */ inode->i_generation |= 1; - inode->i_mode = MSDOS_MKMODE(de->attr, - ((sbi->options.showexec && !is_exec(de->name + 8)) - ? S_IRUGO|S_IWUGO : S_IRWXUGO) - & ~sbi->options.fs_fmask) | S_IFREG; + inode->i_mode = fat_make_mode(sbi, de->attr, + ((sbi->options.showexec && !is_exec(de->name + 8)) + ? S_IRUGO|S_IWUGO : S_IRWXUGO)); MSDOS_I(inode)->i_start = le16_to_cpu(de->start); if (sbi->fat_bits == 32) MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16); @@ -378,25 +379,16 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de) if (sbi->options.sys_immutable) inode->i_flags |= S_IMMUTABLE; } - MSDOS_I(inode)->i_attrs = de->attr & ATTR_UNUSED; + fat_save_attrs(inode, de->attr); + inode->i_blocks = ((inode->i_size + (sbi->cluster_size - 1)) & ~((loff_t)sbi->cluster_size - 1)) >> 9; - inode->i_mtime.tv_sec = - date_dos2unix(le16_to_cpu(de->time), le16_to_cpu(de->date), - sbi->options.tz_utc); - inode->i_mtime.tv_nsec = 0; + + fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0); if (sbi->options.isvfat) { - int secs = de->ctime_cs / 100; - int csecs = de->ctime_cs % 100; - inode->i_ctime.tv_sec = - date_dos2unix(le16_to_cpu(de->ctime), - le16_to_cpu(de->cdate), - sbi->options.tz_utc) + secs; - inode->i_ctime.tv_nsec = csecs * 10000000; - inode->i_atime.tv_sec = - date_dos2unix(0, le16_to_cpu(de->adate), - sbi->options.tz_utc); - inode->i_atime.tv_nsec = 0; + fat_time_fat2unix(sbi, &inode->i_ctime, de->ctime, + de->cdate, de->ctime_cs); + fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0); } else inode->i_ctime = inode->i_atime = inode->i_mtime; @@ -443,13 +435,8 @@ static void fat_delete_inode(struct inode *inode) static void fat_clear_inode(struct inode *inode) { - struct super_block *sb = inode->i_sb; - struct msdos_sb_info *sbi = MSDOS_SB(sb); - - spin_lock(&sbi->inode_hash_lock); fat_cache_inval_inode(inode); - hlist_del_init(&MSDOS_I(inode)->i_fat_hash); - spin_unlock(&sbi->inode_hash_lock); + fat_detach(inode); } static void fat_write_super(struct super_block *sb) @@ -555,6 +542,20 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi, + struct inode *inode) +{ + loff_t i_pos; +#if BITS_PER_LONG == 32 + spin_lock(&sbi->inode_hash_lock); +#endif + i_pos = MSDOS_I(inode)->i_pos; +#if BITS_PER_LONG == 32 + spin_unlock(&sbi->inode_hash_lock); +#endif + return i_pos; +} + static int fat_write_inode(struct inode *inode, int wait) { struct super_block *sb = inode->i_sb; @@ -564,9 +565,12 @@ static int fat_write_inode(struct inode *inode, int wait) loff_t i_pos; int err; + if (inode->i_ino == MSDOS_ROOT_INO) + return 0; + retry: - i_pos = MSDOS_I(inode)->i_pos; - if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) + i_pos = fat_i_pos_read(sbi, inode); + if (!i_pos) return 0; bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); @@ -588,19 +592,17 @@ retry: raw_entry->size = 0; else raw_entry->size = cpu_to_le32(inode->i_size); - raw_entry->attr = fat_attr(inode); + raw_entry->attr = fat_make_attrs(inode); raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart); raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16); - fat_date_unix2dos(inode->i_mtime.tv_sec, &raw_entry->time, - &raw_entry->date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time, + &raw_entry->date, NULL); if (sbi->options.isvfat) { __le16 atime; - fat_date_unix2dos(inode->i_ctime.tv_sec, &raw_entry->ctime, - &raw_entry->cdate, sbi->options.tz_utc); - fat_date_unix2dos(inode->i_atime.tv_sec, &atime, - &raw_entry->adate, sbi->options.tz_utc); - raw_entry->ctime_cs = (inode->i_ctime.tv_sec & 1) * 100 + - inode->i_ctime.tv_nsec / 10000000; + fat_time_unix2fat(sbi, &inode->i_ctime, &raw_entry->ctime, + &raw_entry->cdate, &raw_entry->ctime_cs); + fat_time_unix2fat(sbi, &inode->i_atime, &atime, + &raw_entry->adate, NULL); } spin_unlock(&sbi->inode_hash_lock); mark_buffer_dirty(bh); @@ -681,33 +683,24 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb, inode = NULL; } } - if (!inode) { - /* For now, do nothing - * What we could do is: - * follow the file starting at fh[4], and record - * the ".." entry, and the name of the fh[2] entry. - * The follow the ".." file finding the next step up. - * This way we build a path to the root of - * the tree. If this works, we lookup the path and so - * get this inode into the cache. - * Finally try the fat_iget lookup again - * If that fails, then weare totally out of luck - * But all that is for another day - */ - } - if (!inode) - return ERR_PTR(-ESTALE); - - /* now to find a dentry. - * If possible, get a well-connected one + /* + * For now, do nothing if the inode is not found. + * + * What we could do is: + * + * - follow the file starting at fh[4], and record the ".." entry, + * and the name of the fh[2] entry. + * - then follow the ".." file finding the next step up. + * + * This way we build a path to the root of the tree. If this works, we + * lookup the path and so get this inode into the cache. Finally try + * the fat_iget lookup again. If that fails, then we are totally out + * of luck. But all that is for another day */ - result = d_alloc_anon(inode); - if (result == NULL) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - result->d_op = sb->s_root->d_op; + result = d_obtain_alias(inode); + if (!IS_ERR(result)) + result->d_op = sb->s_root->d_op; return result; } @@ -754,15 +747,8 @@ static struct dentry *fat_get_parent(struct dentry *child) } inode = fat_build_inode(sb, de, i_pos); brelse(bh); - if (IS_ERR(inode)) { - parent = ERR_CAST(inode); - goto out; - } - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } + + parent = d_obtain_alias(inode); out: unlock_super(sb); @@ -835,8 +821,10 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt) seq_puts(m, ",uni_xlate"); if (!opts->numtail) seq_puts(m, ",nonumtail"); + if (opts->rodir) + seq_puts(m, ",rodir"); } - if (sbi->options.flush) + if (opts->flush) seq_puts(m, ",flush"); if (opts->tz_utc) seq_puts(m, ",tz=UTC"); @@ -852,7 +840,7 @@ enum { Opt_charset, Opt_shortname_lower, Opt_shortname_win95, Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes, Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes, - Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, + Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err, }; static const match_table_t fat_tokens = { @@ -924,6 +912,7 @@ static const match_table_t vfat_tokens = { {Opt_nonumtail_yes, "nonumtail=yes"}, {Opt_nonumtail_yes, "nonumtail=true"}, {Opt_nonumtail_yes, "nonumtail"}, + {Opt_rodir, "rodir"}, {Opt_err, NULL} }; @@ -943,10 +932,13 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, opts->allow_utime = -1; opts->codepage = fat_default_codepage; opts->iocharset = fat_default_iocharset; - if (is_vfat) + if (is_vfat) { opts->shortname = VFAT_SFN_DISPLAY_LOWER|VFAT_SFN_CREATE_WIN95; - else + opts->rodir = 0; + } else { opts->shortname = 0; + opts->rodir = 1; + } opts->name_check = 'n'; opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0; opts->utf8 = opts->unicode_xlate = 0; @@ -1097,6 +1089,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, case Opt_nonumtail_yes: /* empty or 1 or yes or true */ opts->numtail = 0; /* negated option */ break; + case Opt_rodir: + opts->rodir = 1; + break; /* obsolete mount options */ case Opt_obsolate: @@ -1142,7 +1137,7 @@ static int fat_read_root(struct inode *inode) inode->i_gid = sbi->options.fs_gid; inode->i_version++; inode->i_generation = 0; - inode->i_mode = (S_IRWXUGO & ~sbi->options.fs_dmask) | S_IFDIR; + inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; if (sbi->fat_bits == 32) { @@ -1159,7 +1154,7 @@ static int fat_read_root(struct inode *inode) MSDOS_I(inode)->i_logstart = 0; MSDOS_I(inode)->mmu_private = inode->i_size; - MSDOS_I(inode)->i_attrs = ATTR_NONE; + fat_save_attrs(inode, ATTR_DIR); inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0; inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0; inode->i_nlink = fat_subdirs(inode)+2; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 79fb98ad36d4..ac39ebcc1496 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -8,8 +8,8 @@ #include <linux/module.h> #include <linux/fs.h> -#include <linux/msdos_fs.h> #include <linux/buffer_head.h> +#include "fat.h" /* * fat_fs_panic reports a severe file system problem and sets the file system @@ -124,8 +124,9 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) mark_inode_dirty(inode); } if (new_fclus != (inode->i_blocks >> (sbi->cluster_bits - 9))) { - fat_fs_panic(sb, "clusters badly computed (%d != %lu)", - new_fclus, inode->i_blocks >> (sbi->cluster_bits - 9)); + fat_fs_panic(sb, "clusters badly computed (%d != %llu)", + new_fclus, + (llu)(inode->i_blocks >> (sbi->cluster_bits - 9))); fat_cache_inval_inode(inode); } inode->i_blocks += nr_cluster << (sbi->cluster_bits - 9); @@ -135,65 +136,131 @@ int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster) extern struct timezone sys_tz; +/* + * The epoch of FAT timestamp is 1980. + * : bits : value + * date: 0 - 4: day (1 - 31) + * date: 5 - 8: month (1 - 12) + * date: 9 - 15: year (0 - 127) from 1980 + * time: 0 - 4: sec (0 - 29) 2sec counts + * time: 5 - 10: min (0 - 59) + * time: 11 - 15: hour (0 - 23) + */ +#define SECS_PER_MIN 60 +#define SECS_PER_HOUR (60 * 60) +#define SECS_PER_DAY (SECS_PER_HOUR * 24) +#define UNIX_SECS_1980 315532800L +#if BITS_PER_LONG == 64 +#define UNIX_SECS_2108 4354819200L +#endif +/* days between 1.1.70 and 1.1.80 (2 leap days) */ +#define DAYS_DELTA (365 * 10 + 2) +/* 120 (2100 - 1980) isn't leap year */ +#define YEAR_2100 120 +#define IS_LEAP_YEAR(y) (!((y) & 3) && (y) != YEAR_2100) + /* Linear day numbers of the respective 1sts in non-leap years. */ -static int day_n[] = { - /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ - 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0 +static time_t days_in_year[] = { + /* Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec */ + 0, 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, }; -/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */ -int date_dos2unix(unsigned short time, unsigned short date, int tz_utc) +/* Convert a FAT time/date pair to a UNIX date (seconds since 1 1 70). */ +void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 __time, __le16 __date, u8 time_cs) { - int month, year, secs; + u16 time = le16_to_cpu(__time), date = le16_to_cpu(__date); + time_t second, day, leap_day, month, year; - /* - * first subtract and mask after that... Otherwise, if - * date == 0, bad things happen - */ - month = ((date >> 5) - 1) & 15; - year = date >> 9; - secs = (time & 31)*2+60*((time >> 5) & 63)+(time >> 11)*3600+86400* - ((date & 31)-1+day_n[month]+(year/4)+year*365-((year & 3) == 0 && - month < 2 ? 1 : 0)+3653); - /* days since 1.1.70 plus 80's leap day */ - if (!tz_utc) - secs += sys_tz.tz_minuteswest*60; - return secs; + year = date >> 9; + month = max(1, (date >> 5) & 0xf); + day = max(1, date & 0x1f) - 1; + + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (IS_LEAP_YEAR(year) && month > 2) + leap_day++; + + second = (time & 0x1f) << 1; + second += ((time >> 5) & 0x3f) * SECS_PER_MIN; + second += (time >> 11) * SECS_PER_HOUR; + second += (year * 365 + leap_day + + days_in_year[month] + day + + DAYS_DELTA) * SECS_PER_DAY; + + if (!sbi->options.tz_utc) + second += sys_tz.tz_minuteswest * SECS_PER_MIN; + + if (time_cs) { + ts->tv_sec = second + (time_cs / 100); + ts->tv_nsec = (time_cs % 100) * 10000000; + } else { + ts->tv_sec = second; + ts->tv_nsec = 0; + } } -/* Convert linear UNIX date to a MS-DOS time/date pair. */ -void fat_date_unix2dos(int unix_date, __le16 *time, __le16 *date, int tz_utc) +/* Convert linear UNIX date to a FAT time/date pair. */ +void fat_time_unix2fat(struct msdos_sb_info *sbi, struct timespec *ts, + __le16 *time, __le16 *date, u8 *time_cs) { - int day, year, nl_day, month; + time_t second = ts->tv_sec; + time_t day, leap_day, month, year; - if (!tz_utc) - unix_date -= sys_tz.tz_minuteswest*60; + if (!sbi->options.tz_utc) + second -= sys_tz.tz_minuteswest * SECS_PER_MIN; /* Jan 1 GMT 00:00:00 1980. But what about another time zone? */ - if (unix_date < 315532800) - unix_date = 315532800; - - *time = cpu_to_le16((unix_date % 60)/2+(((unix_date/60) % 60) << 5)+ - (((unix_date/3600) % 24) << 11)); - day = unix_date/86400-3652; - year = day/365; - if ((year+3)/4+365*year > day) + if (second < UNIX_SECS_1980) { + *time = 0; + *date = cpu_to_le16((0 << 9) | (1 << 5) | 1); + if (time_cs) + *time_cs = 0; + return; + } +#if BITS_PER_LONG == 64 + if (second >= UNIX_SECS_2108) { + *time = cpu_to_le16((23 << 11) | (59 << 5) | 29); + *date = cpu_to_le16((127 << 9) | (12 << 5) | 31); + if (time_cs) + *time_cs = 199; + return; + } +#endif + + day = second / SECS_PER_DAY - DAYS_DELTA; + year = day / 365; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + if (year * 365 + leap_day > day) year--; - day -= (year+3)/4+365*year; - if (day == 59 && !(year & 3)) { - nl_day = day; + leap_day = (year + 3) / 4; + if (year > YEAR_2100) /* 2100 isn't leap year */ + leap_day--; + day -= year * 365 + leap_day; + + if (IS_LEAP_YEAR(year) && day == days_in_year[3]) { month = 2; } else { - nl_day = (year & 3) || day <= 59 ? day : day-1; - for (month = 0; month < 12; month++) { - if (day_n[month] > nl_day) + if (IS_LEAP_YEAR(year) && day > days_in_year[3]) + day--; + for (month = 1; month < 12; month++) { + if (days_in_year[month + 1] > day) break; } } - *date = cpu_to_le16(nl_day-day_n[month-1]+1+(month << 5)+(year << 9)); -} + day -= days_in_year[month]; -EXPORT_SYMBOL_GPL(fat_date_unix2dos); + *time = cpu_to_le16(((second / SECS_PER_HOUR) % 24) << 11 + | ((second / SECS_PER_MIN) % 60) << 5 + | (second % SECS_PER_MIN) >> 1); + *date = cpu_to_le16((year << 9) | (month << 5) | (day + 1)); + if (time_cs) + *time_cs = (ts->tv_sec & 1) * 100 + ts->tv_nsec / 10000000; +} +EXPORT_SYMBOL_GPL(fat_time_unix2fat); int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { diff --git a/fs/msdos/namei.c b/fs/fat/namei_msdos.c index e844b9809d27..7ba03a4acbe0 100644 --- a/fs/msdos/namei.c +++ b/fs/fat/namei_msdos.c @@ -9,8 +9,8 @@ #include <linux/module.h> #include <linux/time.h> #include <linux/buffer_head.h> -#include <linux/msdos_fs.h> #include <linux/smp_lock.h> +#include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; @@ -203,33 +203,37 @@ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; - int res; - - dentry->d_op = &msdos_dentry_operations; + struct inode *inode; + int err; lock_super(sb); - res = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); - if (res == -ENOENT) - goto add; - if (res < 0) - goto out; + + err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); + if (err) { + if (err == -ENOENT) { + inode = NULL; + goto out; + } + goto error; + } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - res = PTR_ERR(inode); - goto out; + err = PTR_ERR(inode); + goto error; } -add: - res = 0; +out: + unlock_super(sb); + dentry->d_op = &msdos_dentry_operations; dentry = d_splice_alias(inode, dentry); if (dentry) dentry->d_op = &msdos_dentry_operations; -out: + return dentry; + +error: unlock_super(sb); - if (!res) - return dentry; - return ERR_PTR(res); + return ERR_PTR(err); } /***** Creates a directory entry (name is already formatted). */ @@ -247,7 +251,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name, if (is_hid) de.attr |= ATTR_HIDDEN; de.lcase = 0; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, NULL); de.cdate = de.adate = 0; de.ctime = 0; de.ctime_cs = 0; diff --git a/fs/vfat/namei.c b/fs/fat/namei_vfat.c index 155c10b4adbd..bf326d4356a3 100644 --- a/fs/vfat/namei.c +++ b/fs/fat/namei_vfat.c @@ -16,36 +16,75 @@ */ #include <linux/module.h> - #include <linux/jiffies.h> -#include <linux/msdos_fs.h> #include <linux/ctype.h> #include <linux/slab.h> #include <linux/smp_lock.h> #include <linux/buffer_head.h> #include <linux/namei.h> +#include "fat.h" -static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +/* + * If new entry was created in the parent, it could create the 8.3 + * alias (the shortname of logname). So, the parent may have the + * negative-dentry which matches the created 8.3 alias. + * + * If it happened, the negative dentry isn't actually negative + * anymore. So, drop it. + */ +static int vfat_revalidate_shortname(struct dentry *dentry) { int ret = 1; - - if (!dentry->d_inode && - nd && !(nd->flags & LOOKUP_CONTINUE) && (nd->flags & LOOKUP_CREATE)) - /* - * negative dentry is dropped, in order to make sure - * to use the name which a user desires if this is - * create path. - */ + spin_lock(&dentry->d_lock); + if (dentry->d_time != dentry->d_parent->d_inode->i_version) ret = 0; - else { - spin_lock(&dentry->d_lock); - if (dentry->d_time != dentry->d_parent->d_inode->i_version) - ret = 0; - spin_unlock(&dentry->d_lock); - } + spin_unlock(&dentry->d_lock); return ret; } +static int vfat_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + /* This is not negative dentry. Always valid. */ + if (dentry->d_inode) + return 1; + return vfat_revalidate_shortname(dentry); +} + +static int vfat_revalidate_ci(struct dentry *dentry, struct nameidata *nd) +{ + /* + * This is not negative dentry. Always valid. + * + * Note, rename() to existing directory entry will have ->d_inode, + * and will use existing name which isn't specified name by user. + * + * We may be able to drop this positive dentry here. But dropping + * positive dentry isn't good idea. So it's unsupported like + * rename("filename", "FILENAME") for now. + */ + if (dentry->d_inode) + return 1; + + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!nd) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { + if (nd->flags & LOOKUP_CREATE) + return 0; + } + + return vfat_revalidate_shortname(dentry); +} + /* returns the length of a struct qstr, ignoring trailing dots */ static unsigned int vfat_striptail_len(struct qstr *qstr) { @@ -127,25 +166,16 @@ static int vfat_cmp(struct dentry *dentry, struct qstr *a, struct qstr *b) return 1; } -static struct dentry_operations vfat_dentry_ops[4] = { - { - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hashi, - .d_compare = vfat_cmpi, - }, - { - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - }, - { - .d_revalidate = vfat_revalidate, - .d_hash = vfat_hash, - .d_compare = vfat_cmp, - } +static struct dentry_operations vfat_ci_dentry_ops = { + .d_revalidate = vfat_revalidate_ci, + .d_hash = vfat_hashi, + .d_compare = vfat_cmpi, +}; + +static struct dentry_operations vfat_dentry_ops = { + .d_revalidate = vfat_revalidate, + .d_hash = vfat_hash, + .d_compare = vfat_cmp, }; /* Characters that are undesirable in an MS-DOS file name */ @@ -569,6 +599,7 @@ static int vfat_build_slots(struct inode *dir, const unsigned char *name, unsigned char msdos_name[MSDOS_NAME]; wchar_t *uname; __le16 time, date; + u8 time_cs; int err, ulen, usize, i; loff_t offset; @@ -621,10 +652,10 @@ shortname: memcpy(de->name, msdos_name, MSDOS_NAME); de->attr = is_dir ? ATTR_DIR : ATTR_ARCH; de->lcase = lcase; - fat_date_unix2dos(ts->tv_sec, &time, &date, sbi->options.tz_utc); + fat_time_unix2fat(sbi, ts, &time, &date, &time_cs); de->time = de->ctime = time; de->date = de->cdate = de->adate = date; - de->ctime_cs = 0; + de->ctime_cs = time_cs; de->start = cpu_to_le16(cluster); de->starthi = cpu_to_le16(cluster >> 16); de->size = 0; @@ -683,46 +714,58 @@ static struct dentry *vfat_lookup(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; - struct inode *inode = NULL; + struct inode *inode; struct dentry *alias; - int err, table; + int err; lock_super(sb); - table = (MSDOS_SB(sb)->options.name_check == 's') ? 2 : 0; - dentry->d_op = &vfat_dentry_ops[table]; err = vfat_find(dir, &dentry->d_name, &sinfo); if (err) { - table++; + if (err == -ENOENT) { + inode = NULL; + goto out; + } goto error; } + inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { - unlock_super(sb); - return ERR_CAST(inode); + err = PTR_ERR(inode); + goto error; } - alias = d_find_alias(inode); - if (alias) { - if (d_invalidate(alias) == 0) - dput(alias); - else { - iput(inode); - unlock_super(sb); - return alias; - } + alias = d_find_alias(inode); + if (alias && !(alias->d_flags & DCACHE_DISCONNECTED)) { + /* + * This inode has non DCACHE_DISCONNECTED dentry. This + * means, the user did ->lookup() by an another name + * (longname vs 8.3 alias of it) in past. + * + * Switch to new one for reason of locality if possible. + */ + BUG_ON(d_unhashed(alias)); + if (!S_ISDIR(inode->i_mode)) + d_move(alias, dentry); + iput(inode); + unlock_super(sb); + return alias; } -error: +out: unlock_super(sb); - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; dentry = d_splice_alias(inode, dentry); if (dentry) { - dentry->d_op = &vfat_dentry_ops[table]; + dentry->d_op = sb->s_root->d_op; dentry->d_time = dentry->d_parent->d_inode->i_version; } return dentry; + +error: + unlock_super(sb); + return ERR_PTR(err); } static int vfat_create(struct inode *dir, struct dentry *dentry, int mode, @@ -1014,9 +1057,9 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) return res; if (MSDOS_SB(sb)->options.name_check != 's') - sb->s_root->d_op = &vfat_dentry_ops[0]; + sb->s_root->d_op = &vfat_ci_dentry_ops; else - sb->s_root->d_op = &vfat_dentry_ops[2]; + sb->s_root->d_op = &vfat_dentry_ops; return 0; } diff --git a/fs/fifo.c b/fs/fifo.c index 987bf9411495..f8f97b8b6d44 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -51,7 +51,7 @@ static int fifo_open(struct inode *inode, struct file *filp) filp->f_mode &= (FMODE_READ | FMODE_WRITE); switch (filp->f_mode) { - case 1: + case FMODE_READ: /* * O_RDONLY * POSIX.1 says that O_NONBLOCK means return with the FIFO @@ -76,7 +76,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } break; - case 2: + case FMODE_WRITE: /* * O_WRONLY * POSIX.1 says that O_NONBLOCK means return -1 with @@ -98,7 +98,7 @@ static int fifo_open(struct inode *inode, struct file *filp) } break; - case 3: + case FMODE_READ | FMODE_WRITE: /* * O_RDWR * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. diff --git a/fs/file_table.c b/fs/file_table.c index f45a4493f9e7..5ad0eca6eea2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -161,7 +161,7 @@ EXPORT_SYMBOL(get_empty_filp); * code should be moved into this function. */ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry, - mode_t mode, const struct file_operations *fop) + fmode_t mode, const struct file_operations *fop) { struct file *file; struct path; @@ -193,7 +193,7 @@ EXPORT_SYMBOL(alloc_file); * of this should be moving to alloc_file(). */ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry, - mode_t mode, const struct file_operations *fop) + fmode_t mode, const struct file_operations *fop) { int error = 0; file->f_path.dentry = dentry; @@ -269,6 +269,10 @@ void __fput(struct file *file) eventpoll_release(file); locks_remove_flock(file); + if (unlikely(file->f_flags & FASYNC)) { + if (file->f_op && file->f_op->fasync) + file->f_op->fasync(-1, file, 0); + } if (file->f_op && file->f_op->release) file->f_op->release(inode, file); security_file_free(file); diff --git a/fs/filesystems.c b/fs/filesystems.c index f37f87262837..d0e20ced62dd 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c @@ -8,6 +8,8 @@ #include <linux/syscalls.h> #include <linux/fs.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/init.h> @@ -214,6 +216,43 @@ int get_filesystem_list(char * buf) return len; } +#ifdef CONFIG_PROC_FS +static int filesystems_proc_show(struct seq_file *m, void *v) +{ + struct file_system_type * tmp; + + read_lock(&file_systems_lock); + tmp = file_systems; + while (tmp) { + seq_printf(m, "%s\t%s\n", + (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev", + tmp->name); + tmp = tmp->next; + } + read_unlock(&file_systems_lock); + return 0; +} + +static int filesystems_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, filesystems_proc_show, NULL); +} + +static const struct file_operations filesystems_proc_fops = { + .open = filesystems_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_filesystems_init(void) +{ + proc_create("filesystems", 0, NULL, &filesystems_proc_fops); + return 0; +} +module_init(proc_filesystems_init); +#endif + struct file_system_type *get_fs_type(const char *name) { struct file_system_type *fs; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 87250b6a8682..b72361479be2 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1056,7 +1056,6 @@ static int fuse_dev_release(struct inode *inode, struct file *file) end_requests(fc, &fc->pending); end_requests(fc, &fc->processing); spin_unlock(&fc->lock); - fasync_helper(-1, file, 0, &fc->fasync); fuse_conn_put(fc); } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 54b1f0e1ef58..2e99f34b4435 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -596,12 +596,8 @@ static struct dentry *fuse_get_dentry(struct super_block *sb, if (inode->i_generation != handle->generation) goto out_iput; - entry = d_alloc_anon(inode); - err = -ENOMEM; - if (!entry) - goto out_iput; - - if (get_node_id(inode) != FUSE_ROOT_ID) { + entry = d_obtain_alias(inode); + if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) { entry->d_op = &fuse_dentry_operations; fuse_invalidate_entry_cache(entry); } @@ -696,17 +692,14 @@ static struct dentry *fuse_get_parent(struct dentry *child) name.name = ".."; err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode), &name, &outarg, &inode); - if (err && err != -ENOENT) + if (err) { + if (err == -ENOENT) + return ERR_PTR(-ESTALE); return ERR_PTR(err); - if (err || !inode) - return ERR_PTR(-ESTALE); - - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - return ERR_PTR(-ENOMEM); } - if (get_node_id(inode) != FUSE_ROOT_ID) { + + parent = d_obtain_alias(inode); + if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) { parent->d_op = &fuse_dentry_operations; fuse_invalidate_entry_cache(parent); } diff --git a/fs/gfs2/ops_export.c b/fs/gfs2/ops_export.c index 9cda8536530c..bbb8c36403a9 100644 --- a/fs/gfs2/ops_export.c +++ b/fs/gfs2/ops_export.c @@ -130,28 +130,17 @@ static int gfs2_get_name(struct dentry *parent, char *name, static struct dentry *gfs2_get_parent(struct dentry *child) { struct qstr dotdot; - struct inode *inode; struct dentry *dentry; - gfs2_str2qstr(&dotdot, ".."); - inode = gfs2_lookupi(child->d_inode, &dotdot, 1); - - if (!inode) - return ERR_PTR(-ENOENT); /* - * In case of an error, @inode carries the error value, and we - * have to return that as a(n invalid) pointer to dentry. + * XXX(hch): it would be a good idea to keep this around as a + * static variable. */ - if (IS_ERR(inode)) - return ERR_CAST(inode); - - dentry = d_alloc_anon(inode); - if (!dentry) { - iput(inode); - return ERR_PTR(-ENOMEM); - } + gfs2_str2qstr(&dotdot, ".."); - dentry->d_op = &gfs2_dops; + dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); + if (!IS_ERR(dentry)) + dentry->d_op = &gfs2_dops; return dentry; } @@ -233,13 +222,9 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb, gfs2_glock_dq_uninit(&i_gh); out_inode: - dentry = d_alloc_anon(inode); - if (!dentry) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - - dentry->d_op = &gfs2_dops; + dentry = d_obtain_alias(inode); + if (!IS_ERR(dentry)) + dentry->d_op = &gfs2_dops; return dentry; fail_rgd: diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 534e1e2c65ca..d232991b9046 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -69,7 +69,7 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry, mark_inode_dirty(inode); break; } else if (PTR_ERR(inode) != -EEXIST || - (nd && (nd->intent.open.flags & O_EXCL))) { + (nd && nd->flags & LOOKUP_EXCL)) { gfs2_holder_uninit(ghs); return PTR_ERR(inode); } diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 7e19835efa2e..c69b7ac75bf7 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -511,13 +511,6 @@ void hfs_clear_inode(struct inode *inode) } } -static int hfs_permission(struct inode *inode, int mask) -{ - if (S_ISREG(inode->i_mode) && mask & MAY_EXEC) - return 0; - return generic_permission(inode, mask, NULL); -} - static int hfs_file_open(struct inode *inode, struct file *file) { if (HFS_IS_RSRC(inode)) @@ -616,7 +609,6 @@ static const struct inode_operations hfs_file_inode_operations = { .lookup = hfs_file_lookup, .truncate = hfs_file_truncate, .setattr = hfs_inode_setattr, - .permission = hfs_permission, .setxattr = hfs_setxattr, .getxattr = hfs_getxattr, .listxattr = hfs_listxattr, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 963be644297a..b207f0e6fc22 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -238,18 +238,6 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); } -static int hfsplus_permission(struct inode *inode, int mask) -{ - /* MAY_EXEC is also used for lookup, if no x bit is set allow lookup, - * open_exec has the same test, so it's still not executable, if a x bit - * is set fall back to standard permission check. - */ - if (S_ISREG(inode->i_mode) && mask & MAY_EXEC && !(inode->i_mode & 0111)) - return 0; - return generic_permission(inode, mask, NULL); -} - - static int hfsplus_file_open(struct inode *inode, struct file *file) { if (HFSPLUS_IS_RSRC(inode)) @@ -281,7 +269,6 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, - .permission = hfsplus_permission, .setxattr = hfsplus_setxattr, .getxattr = hfsplus_getxattr, .listxattr = hfsplus_listxattr, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index d6ecabf4d231..7f34f4385de0 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -20,7 +20,7 @@ struct hostfs_inode_info { char *host_filename; int fd; - int mode; + fmode_t mode; struct inode vfs_inode; }; @@ -373,7 +373,8 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) int hostfs_file_open(struct inode *ino, struct file *file) { char *name; - int mode = 0, r = 0, w = 0, fd; + fmode_t mode = 0; + int r = 0, w = 0, fd; mode = file->f_mode & (FMODE_READ | FMODE_WRITE); if ((mode & HOSTFS_I(ino)->mode) == mode) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index be8be5040e07..64ab52259204 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -143,5 +143,5 @@ const struct file_operations hpfs_file_ops = const struct inode_operations hpfs_file_iops = { .truncate = hpfs_truncate, - .setattr = hpfs_notify_change, + .setattr = hpfs_setattr, }; diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index 42ff60ccf2a9..c2ea31bae313 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -275,7 +275,7 @@ void hpfs_init_inode(struct inode *); void hpfs_read_inode(struct inode *); void hpfs_write_inode(struct inode *); void hpfs_write_inode_nolock(struct inode *); -int hpfs_notify_change(struct dentry *, struct iattr *); +int hpfs_setattr(struct dentry *, struct iattr *); void hpfs_write_if_changed(struct inode *); void hpfs_delete_inode(struct inode *); diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 85d3e1d9ac00..39a1bfbea312 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -260,19 +260,28 @@ void hpfs_write_inode_nolock(struct inode *i) brelse(bh); } -int hpfs_notify_change(struct dentry *dentry, struct iattr *attr) +int hpfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; - int error=0; + int error = -EINVAL; + lock_kernel(); - if ( ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) || - (hpfs_sb(inode->i_sb)->sb_root == inode->i_ino) ) { - error = -EINVAL; - } else if ((error = inode_change_ok(inode, attr))) { - } else if ((error = inode_setattr(inode, attr))) { - } else { - hpfs_write_inode(inode); - } + if (inode->i_ino == hpfs_sb(inode->i_sb)->sb_root) + goto out_unlock; + if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size) + goto out_unlock; + + error = inode_change_ok(inode, attr); + if (error) + goto out_unlock; + + error = inode_setattr(inode, attr); + if (error) + goto out_unlock; + + hpfs_write_inode(inode); + + out_unlock: unlock_kernel(); return error; } diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c index d9c59a775449..10783f3d265a 100644 --- a/fs/hpfs/namei.c +++ b/fs/hpfs/namei.c @@ -669,5 +669,5 @@ const struct inode_operations hpfs_dir_iops = .rmdir = hpfs_rmdir, .mknod = hpfs_mknod, .rename = hpfs_rename, - .setattr = hpfs_notify_change, + .setattr = hpfs_setattr, }; diff --git a/fs/inotify_user.c b/fs/inotify_user.c index d85c7d931cdf..d367e9b92862 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -537,9 +537,6 @@ static int inotify_release(struct inode *ignored, struct file *file) inotify_dev_event_dequeue(dev); mutex_unlock(&dev->ev_mutex); - if (file->f_flags & FASYNC) - inotify_fasync(-1, file, 0); - /* free this device: the put matching the get in inotify_init() */ put_inotify_dev(dev); diff --git a/fs/isofs/export.c b/fs/isofs/export.c index bb219138331a..e81a30593ba9 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -22,7 +22,7 @@ isofs_export_iget(struct super_block *sb, __u32 generation) { struct inode *inode; - struct dentry *result; + if (block == 0) return ERR_PTR(-ESTALE); inode = isofs_iget(sb, block, offset); @@ -32,12 +32,7 @@ isofs_export_iget(struct super_block *sb, iput(inode); return ERR_PTR(-ESTALE); } - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + return d_obtain_alias(inode); } /* This function is surprisingly simple. The trick is understanding @@ -51,7 +46,6 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) unsigned long parent_offset = 0; struct inode *child_inode = child->d_inode; struct iso_inode_info *e_child_inode = ISOFS_I(child_inode); - struct inode *parent_inode = NULL; struct iso_directory_record *de = NULL; struct buffer_head * bh = NULL; struct dentry *rv = NULL; @@ -104,28 +98,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child) /* Normalize */ isofs_normalize_block_and_offset(de, &parent_block, &parent_offset); - /* Get the inode. */ - parent_inode = isofs_iget(child_inode->i_sb, - parent_block, - parent_offset); - if (IS_ERR(parent_inode)) { - rv = ERR_CAST(parent_inode); - if (rv != ERR_PTR(-ENOMEM)) - rv = ERR_PTR(-EACCES); - goto out; - } - - /* Allocate the dentry. */ - rv = d_alloc_anon(parent_inode); - if (rv == NULL) { - rv = ERR_PTR(-ENOMEM); - goto out; - } - + rv = d_obtain_alias(isofs_iget(child_inode->i_sb, parent_block, + parent_offset)); out: - if (bh) { + if (bh) brelse(bh); - } return rv; } diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index a5432bbbfb88..1bd8d4acc6f2 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -93,7 +93,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) int ret = 0; struct buffer_head *bh = jh2bh(jh); - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { + if (jh->b_jlist == BJ_None && !buffer_locked(bh) && + !buffer_dirty(bh) && !buffer_write_io_error(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); @@ -126,14 +127,29 @@ void __log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we - * were waiting for the checkpoint lock + * were waiting for the checkpoint lock. If there are no + * outstanding transactions there is nothing to checkpoint and + * we can't make progress. Abort the journal in this case. */ spin_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); if (__log_space_left(journal) < nblocks) { + int chkpt = journal->j_checkpoint_transactions != NULL; + + spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); - log_do_checkpoint(journal); + if (chkpt) { + log_do_checkpoint(journal); + } else { + printk(KERN_ERR "%s: no transactions\n", + __func__); + journal_abort(journal, 0); + } + spin_lock(&journal->j_state_lock); + } else { + spin_unlock(&journal->j_list_lock); } mutex_unlock(&journal->j_checkpoint_mutex); } @@ -160,21 +176,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) * buffers. Note that we take the buffers in the opposite ordering * from the one in which they were submitted for IO. * + * Return 0 on success, and return <0 if some buffers have failed + * to be written out. + * * Called with j_list_lock held. */ -static void __wait_cp_io(journal_t *journal, transaction_t *transaction) +static int __wait_cp_io(journal_t *journal, transaction_t *transaction) { struct journal_head *jh; struct buffer_head *bh; tid_t this_tid; int released = 0; + int ret = 0; this_tid = transaction->t_tid; restart: /* Did somebody clean up the transaction in the meanwhile? */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) - return; + return ret; while (!released && transaction->t_checkpoint_io_list) { jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); @@ -194,6 +214,9 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + if (unlikely(buffer_write_io_error(bh))) + ret = -EIO; + /* * Now in whatever state the buffer currently is, we know that * it has been written out and so we can drop it from the list @@ -203,6 +226,8 @@ restart: journal_remove_journal_head(bh); __brelse(bh); } + + return ret; } #define NR_BATCH 64 @@ -226,7 +251,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. + * scan of the checkpoint list. Return <0 if the buffer has failed to + * be written out. * * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it @@ -256,6 +282,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { + ret = 1; + if (unlikely(buffer_write_io_error(bh))) + ret = -EIO; J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); @@ -263,7 +292,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, jbd_unlock_bh_state(bh); journal_remove_journal_head(bh); __brelse(bh); - ret = 1; } else { /* * Important: we are about to write the buffer, and @@ -295,6 +323,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. + * Called with j_checkpoint_mutex held. */ int log_do_checkpoint(journal_t *journal) { @@ -318,6 +347,7 @@ int log_do_checkpoint(journal_t *journal) * OK, we need to start writing disk blocks. Take one transaction * and write it. */ + result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; @@ -334,7 +364,7 @@ restart: int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; struct journal_head *jh; - int retry = 0; + int retry = 0, err; while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; @@ -347,6 +377,8 @@ restart: break; } retry = __process_buffer(journal, jh, bhs,&batch_count); + if (retry < 0 && !result) + result = retry; if (!retry && (need_resched() || spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); @@ -371,14 +403,18 @@ restart: * Now we have cleaned up the first transaction's checkpoint * list. Let's clean up the second one */ - __wait_cp_io(journal, transaction); + err = __wait_cp_io(journal, transaction); + if (!result) + result = err; } out: spin_unlock(&journal->j_list_lock); - result = cleanup_journal_tail(journal); if (result < 0) - return result; - return 0; + journal_abort(journal, result); + else + result = cleanup_journal_tail(journal); + + return (result < 0) ? result : 0; } /* @@ -394,8 +430,9 @@ out: * This is the only part of the journaling code which really needs to be * aware of transaction aborts. Checkpointing involves writing to the * main filesystem area rather than to the journal, so it can proceed - * even in abort state, but we must not update the journal superblock if - * we have an abort error outstanding. + * even in abort state, but we must not update the super block if + * checkpointing may have failed. Otherwise, we would lose some metadata + * buffers which should be written-back to the filesystem. */ int cleanup_journal_tail(journal_t *journal) @@ -404,6 +441,9 @@ int cleanup_journal_tail(journal_t *journal) tid_t first_tid; unsigned long blocknr, freed; + if (is_journal_aborted(journal)) + return 1; + /* OK, work out the oldest transaction remaining in the log, and * the log block it starts at. * diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index aa7143a8349b..9e4fa52d7dc8 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1121,9 +1121,12 @@ recovery_error: * * Release a journal_t structure once it is no longer in use by the * journaled object. + * Return <0 if we couldn't clean up the journal. */ -void journal_destroy(journal_t *journal) +int journal_destroy(journal_t *journal) { + int err = 0; + /* Wait for the commit thread to wake up and die. */ journal_kill_thread(journal); @@ -1146,11 +1149,16 @@ void journal_destroy(journal_t *journal) J_ASSERT(journal->j_checkpoint_transactions == NULL); spin_unlock(&journal->j_list_lock); - /* We can now mark the journal as empty. */ - journal->j_tail = 0; - journal->j_tail_sequence = ++journal->j_transaction_sequence; if (journal->j_sb_buffer) { - journal_update_superblock(journal, 1); + if (!is_journal_aborted(journal)) { + /* We can now mark the journal as empty. */ + journal->j_tail = 0; + journal->j_tail_sequence = + ++journal->j_transaction_sequence; + journal_update_superblock(journal, 1); + } else { + err = -EIO; + } brelse(journal->j_sb_buffer); } @@ -1160,6 +1168,8 @@ void journal_destroy(journal_t *journal) journal_destroy_revoke(journal); kfree(journal->j_wbuf); kfree(journal); + + return err; } @@ -1359,10 +1369,16 @@ int journal_flush(journal_t *journal) spin_lock(&journal->j_list_lock); while (!err && journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); + mutex_lock(&journal->j_checkpoint_mutex); err = log_do_checkpoint(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); + + if (is_journal_aborted(journal)) + return -EIO; + cleanup_journal_tail(journal); /* Finally, mark the journal as really needing no recovery. @@ -1384,7 +1400,7 @@ int journal_flush(journal_t *journal) J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); spin_unlock(&journal->j_state_lock); - return err; + return 0; } /** diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 43bc5e5ed064..db5e982c5ddf 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -223,7 +223,7 @@ do { \ */ int journal_recover(journal_t *journal) { - int err; + int err, err2; journal_superblock_t * sb; struct recovery_info info; @@ -261,7 +261,10 @@ int journal_recover(journal_t *journal) journal->j_transaction_sequence = ++info.end_transaction; journal_clear_revoke(journal); - sync_blockdev(journal->j_fs_dev); + err2 = sync_blockdev(journal->j_fs_dev); + if (!err) + err = err2; + return err; } diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index d15cd6e7251e..60d4c32c8808 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -860,7 +860,6 @@ out: * int journal_get_undo_access() - Notify intent to modify metadata with non-rewindable consequences * @handle: transaction * @bh: buffer to undo - * @credits: store the number of taken credits here (if not NULL) * * Sometimes there is a need to distinguish between metadata which has * been committed to disk and that which has not. The ext3fs code uses diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8b119e16aa36..ebc667bc54a8 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -974,6 +974,9 @@ restart_loop: journal->j_committing_transaction = NULL; spin_unlock(&journal->j_state_lock); + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + if (commit_transaction->t_checkpoint_list == NULL && commit_transaction->t_checkpoint_io_list == NULL) { __jbd2_journal_drop_transaction(journal, commit_transaction); @@ -995,11 +998,8 @@ restart_loop: } spin_unlock(&journal->j_list_lock); - if (journal->j_commit_callback) - journal->j_commit_callback(journal, commit_transaction); - trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", - journal->j_devname, commit_transaction->t_tid, + journal->j_devname, journal->j_commit_sequence, journal->j_tail_sequence); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index b1aaae823a52..6f60cc910f4c 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -39,7 +39,8 @@ const struct file_operations jffs2_dir_operations = .read = generic_read_dir, .readdir = jffs2_readdir, .unlocked_ioctl=jffs2_ioctl, - .fsync = jffs2_fsync + .fsync = jffs2_fsync, + .llseek = generic_file_llseek, }; @@ -108,9 +109,7 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, } } - d_add(target, inode); - - return NULL; + return d_splice_alias(inode, target); } /***********************************************************************/ diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index efd401257ed9..4c4e18c54a51 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -22,6 +22,7 @@ #include <linux/mtd/super.h> #include <linux/ctype.h> #include <linux/namei.h> +#include <linux/exportfs.h> #include "compr.h" #include "nodelist.h" @@ -62,6 +63,52 @@ static int jffs2_sync_fs(struct super_block *sb, int wait) return 0; } +static struct inode *jffs2_nfs_get_inode(struct super_block *sb, uint64_t ino, + uint32_t generation) +{ + /* We don't care about i_generation. We'll destroy the flash + before we start re-using inode numbers anyway. And even + if that wasn't true, we'd have other problems...*/ + return jffs2_iget(sb, ino); +} + +static struct dentry *jffs2_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + jffs2_nfs_get_inode); +} + +static struct dentry *jffs2_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + jffs2_nfs_get_inode); +} + +static struct dentry *jffs2_get_parent(struct dentry *child) +{ + struct jffs2_inode_info *f; + uint32_t pino; + + BUG_ON(!S_ISDIR(child->d_inode->i_mode)); + + f = JFFS2_INODE_INFO(child->d_inode); + + pino = f->inocache->pino_nlink; + + JFFS2_DEBUG("Parent of directory ino #%u is #%u\n", + f->inocache->ino, pino); + + return d_obtain_alias(jffs2_iget(child->d_inode->i_sb, pino)); +} + +static struct export_operations jffs2_export_ops = { + .get_parent = jffs2_get_parent, + .fh_to_dentry = jffs2_fh_to_dentry, + .fh_to_parent = jffs2_fh_to_parent, +}; + static const struct super_operations jffs2_super_operations = { .alloc_inode = jffs2_alloc_inode, @@ -104,6 +151,7 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) spin_lock_init(&c->inocache_lock); sb->s_op = &jffs2_super_operations; + sb->s_export_op = &jffs2_export_ops; sb->s_flags = sb->s_flags | MS_NOATIME; sb->s_xattr = jffs2_xattr_handlers; #ifdef CONFIG_JFFS2_FS_POSIX_ACL diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index cd2ec2988b59..335c4de6552d 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c @@ -1168,7 +1168,7 @@ journal_found: bd_release(bdev); close: /* close external log device */ - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); free: /* free log descriptor */ mutex_unlock(&jfs_log_mutex); @@ -1514,7 +1514,7 @@ int lmLogClose(struct super_block *sb) rc = lmLogShutdown(log); bd_release(bdev); - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE); kfree(log); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 2aba82386810..cc3cedffbfa1 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1511,25 +1511,12 @@ struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid, struct dentry *jfs_get_parent(struct dentry *dentry) { - struct super_block *sb = dentry->d_inode->i_sb; - struct dentry *parent = ERR_PTR(-ENOENT); - struct inode *inode; unsigned long parent_ino; parent_ino = le32_to_cpu(JFS_IP(dentry->d_inode)->i_dtroot.header.idotdot); - inode = jfs_iget(sb, parent_ino); - if (IS_ERR(inode)) { - parent = ERR_CAST(inode); - } else { - parent = d_alloc_anon(inode); - if (!parent) { - parent = ERR_PTR(-ENOMEM); - iput(inode); - } - } - return parent; + return d_obtain_alias(jfs_iget(dentry->d_inode->i_sb, parent_ino)); } const struct inode_operations jfs_dir_inode_operations = { @@ -1560,6 +1547,7 @@ const struct file_operations jfs_dir_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = jfs_compat_ioctl, #endif + .llseek = generic_file_llseek, }; static int jfs_ci_hash(struct dentry *dir, struct qstr *this) diff --git a/fs/libfs.c b/fs/libfs.c index 1add676a19df..e960a8321902 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -732,28 +732,6 @@ out: return ret; } -/* - * This is what d_alloc_anon should have been. Once the exportfs - * argument transition has been finished I will update d_alloc_anon - * to this prototype and this wrapper will go away. --hch - */ -static struct dentry *exportfs_d_alloc(struct inode *inode) -{ - struct dentry *dentry; - - if (!inode) - return NULL; - if (IS_ERR(inode)) - return ERR_PTR(PTR_ERR(inode)); - - dentry = d_alloc_anon(inode); - if (!dentry) { - iput(inode); - dentry = ERR_PTR(-ENOMEM); - } - return dentry; -} - /** * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation * @sb: filesystem to do the file handle conversion on @@ -782,7 +760,7 @@ struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid, break; } - return exportfs_d_alloc(inode); + return d_obtain_alias(inode); } EXPORT_SYMBOL_GPL(generic_fh_to_dentry); @@ -815,7 +793,7 @@ struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid, break; } - return exportfs_d_alloc(inode); + return d_obtain_alias(inode); } EXPORT_SYMBOL_GPL(generic_fh_to_parent); @@ -836,7 +814,7 @@ EXPORT_SYMBOL(simple_getattr); EXPORT_SYMBOL(simple_link); EXPORT_SYMBOL(simple_lookup); EXPORT_SYMBOL(simple_pin_fs); -EXPORT_SYMBOL(simple_prepare_write); +EXPORT_UNUSED_SYMBOL(simple_prepare_write); EXPORT_SYMBOL(simple_readpage); EXPORT_SYMBOL(simple_release_fs); EXPORT_SYMBOL(simple_rename); diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 014f6ce48172..4dfdcbc6bf68 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c @@ -434,6 +434,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, * reclaim all locks we hold on this server. */ memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = argp->addr; nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 548b0bb2b84d..3ca89e2a9381 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c @@ -466,6 +466,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, * reclaim all locks we hold on this server. */ memset(&saddr, 0, sizeof(saddr)); + saddr.sin_family = AF_INET; saddr.sin_addr.s_addr = argp->addr; nlm_host_rebooted(&saddr, argp->mon, argp->len, argp->state); diff --git a/fs/locks.c b/fs/locks.c index 5eb259e3cd38..09062e3ff104 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1580,7 +1580,8 @@ asmlinkage long sys_flock(unsigned int fd, unsigned int cmd) cmd &= ~LOCK_NB; unlock = (cmd == LOCK_UN); - if (!unlock && !(cmd & LOCK_MAND) && !(filp->f_mode & 3)) + if (!unlock && !(cmd & LOCK_MAND) && + !(filp->f_mode & (FMODE_READ|FMODE_WRITE))) goto out_putf; error = flock_make_lock(filp, &lock, cmd); @@ -2078,6 +2079,7 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) EXPORT_SYMBOL_GPL(vfs_cancel_lock); #ifdef CONFIG_PROC_FS +#include <linux/proc_fs.h> #include <linux/seq_file.h> static void lock_get_status(struct seq_file *f, struct file_lock *fl, @@ -2183,12 +2185,31 @@ static void locks_stop(struct seq_file *f, void *v) unlock_kernel(); } -struct seq_operations locks_seq_operations = { +static const struct seq_operations locks_seq_operations = { .start = locks_start, .next = locks_next, .stop = locks_stop, .show = locks_show, }; + +static int locks_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &locks_seq_operations); +} + +static const struct file_operations proc_locks_operations = { + .open = locks_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_locks_init(void) +{ + proc_create("locks", 0, NULL, &proc_locks_operations); + return 0; +} +module_init(proc_locks_init); #endif /** diff --git a/fs/msdos/Makefile b/fs/msdos/Makefile deleted file mode 100644 index ea67646fcb95..000000000000 --- a/fs/msdos/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux msdos filesystem routines. -# - -obj-$(CONFIG_MSDOS_FS) += msdos.o - -msdos-y := namei.o diff --git a/fs/namei.c b/fs/namei.c index 4ea63ed5e791..09ce58e49e72 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -212,8 +212,7 @@ int generic_permission(struct inode *inode, int mask, * Read/write DACs are always overridable. * Executable DACs are overridable if at least one exec bit is set. */ - if (!(mask & MAY_EXEC) || - (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode)) + if (!(mask & MAY_EXEC) || execute_ok(inode)) if (capable(CAP_DAC_OVERRIDE)) return 0; @@ -249,23 +248,11 @@ int inode_permission(struct inode *inode, int mask) } /* Ordinary permission routines do not understand MAY_APPEND. */ - if (inode->i_op && inode->i_op->permission) { + if (inode->i_op && inode->i_op->permission) retval = inode->i_op->permission(inode, mask); - if (!retval) { - /* - * Exec permission on a regular file is denied if none - * of the execute bits are set. - * - * This check should be done by the ->permission() - * method. - */ - if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode) && - !(inode->i_mode & S_IXUGO)) - return -EACCES; - } - } else { + else retval = generic_permission(inode, mask, NULL); - } + if (retval) return retval; @@ -1106,6 +1093,15 @@ int path_lookup(const char *name, unsigned int flags, return do_path_lookup(AT_FDCWD, name, flags, nd); } +int kern_path(const char *name, unsigned int flags, struct path *path) +{ + struct nameidata nd; + int res = do_path_lookup(AT_FDCWD, name, flags, &nd); + if (!res) + *path = nd.path; + return res; +} + /** * vfs_path_lookup - lookup a file path relative to a dentry-vfsmount pair * @dentry: pointer to dentry of the base directory @@ -1138,9 +1134,16 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } -static int __path_lookup_intent_open(int dfd, const char *name, - unsigned int lookup_flags, struct nameidata *nd, - int open_flags, int create_mode) +/** + * path_lookup_open - lookup a file path with open intent + * @dfd: the directory to use as base, or AT_FDCWD + * @name: pointer to file name + * @lookup_flags: lookup intent flags + * @nd: pointer to nameidata + * @open_flags: open intent flags + */ +int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, + struct nameidata *nd, int open_flags) { struct file *filp = get_empty_filp(); int err; @@ -1149,7 +1152,7 @@ static int __path_lookup_intent_open(int dfd, const char *name, return -ENFILE; nd->intent.open.file = filp; nd->intent.open.flags = open_flags; - nd->intent.open.create_mode = create_mode; + nd->intent.open.create_mode = 0; err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd); if (IS_ERR(nd->intent.open.file)) { if (err == 0) { @@ -1161,38 +1164,6 @@ static int __path_lookup_intent_open(int dfd, const char *name, return err; } -/** - * path_lookup_open - lookup a file path with open intent - * @dfd: the directory to use as base, or AT_FDCWD - * @name: pointer to file name - * @lookup_flags: lookup intent flags - * @nd: pointer to nameidata - * @open_flags: open intent flags - */ -int path_lookup_open(int dfd, const char *name, unsigned int lookup_flags, - struct nameidata *nd, int open_flags) -{ - return __path_lookup_intent_open(dfd, name, lookup_flags, nd, - open_flags, 0); -} - -/** - * path_lookup_create - lookup a file path with open + create intent - * @dfd: the directory to use as base, or AT_FDCWD - * @name: pointer to file name - * @lookup_flags: lookup intent flags - * @nd: pointer to nameidata - * @open_flags: open intent flags - * @create_mode: create intent flags - */ -static int path_lookup_create(int dfd, const char *name, - unsigned int lookup_flags, struct nameidata *nd, - int open_flags, int create_mode) -{ - return __path_lookup_intent_open(dfd, name, lookup_flags|LOOKUP_CREATE, - nd, open_flags, create_mode); -} - static struct dentry *__lookup_hash(struct qstr *name, struct dentry *base, struct nameidata *nd) { @@ -1470,20 +1441,18 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2) mutex_lock(&p1->d_inode->i_sb->s_vfs_rename_mutex); - for (p = p1; p->d_parent != p; p = p->d_parent) { - if (p->d_parent == p2) { - mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); - return p; - } + p = d_ancestor(p2, p1); + if (p) { + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_CHILD); + return p; } - for (p = p2; p->d_parent != p; p = p->d_parent) { - if (p->d_parent == p1) { - mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); - mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); - return p; - } + p = d_ancestor(p1, p2); + if (p) { + mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); + mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD); + return p; } mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT); @@ -1702,8 +1671,7 @@ struct file *do_filp_open(int dfd, const char *pathname, /* * Create - we need to know the parent. */ - error = path_lookup_create(dfd, pathname, LOOKUP_PARENT, - &nd, flag, mode); + error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd); if (error) return ERR_PTR(error); @@ -1714,10 +1682,20 @@ struct file *do_filp_open(int dfd, const char *pathname, */ error = -EISDIR; if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len]) - goto exit; + goto exit_parent; + error = -ENFILE; + filp = get_empty_filp(); + if (filp == NULL) + goto exit_parent; + nd.intent.open.file = filp; + nd.intent.open.flags = flag; + nd.intent.open.create_mode = mode; dir = nd.path.dentry; nd.flags &= ~LOOKUP_PARENT; + nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN; + if (flag & O_EXCL) + nd.flags |= LOOKUP_EXCL; mutex_lock(&dir->d_inode->i_mutex); path.dentry = lookup_hash(&nd); path.mnt = nd.path.mnt; @@ -1822,6 +1800,7 @@ exit_dput: exit: if (!IS_ERR(nd.intent.open.file)) release_open_intent(&nd); +exit_parent: path_put(&nd.path); return ERR_PTR(error); @@ -1914,7 +1893,7 @@ struct dentry *lookup_create(struct nameidata *nd, int is_dir) if (nd->last_type != LAST_NORM) goto fail; nd->flags &= ~LOOKUP_PARENT; - nd->flags |= LOOKUP_CREATE; + nd->flags |= LOOKUP_CREATE | LOOKUP_EXCL; nd->intent.open.flags = O_EXCL; /* @@ -2178,16 +2157,19 @@ static long do_rmdir(int dfd, const char __user *pathname) return error; switch(nd.last_type) { - case LAST_DOTDOT: - error = -ENOTEMPTY; - goto exit1; - case LAST_DOT: - error = -EINVAL; - goto exit1; - case LAST_ROOT: - error = -EBUSY; - goto exit1; + case LAST_DOTDOT: + error = -ENOTEMPTY; + goto exit1; + case LAST_DOT: + error = -EINVAL; + goto exit1; + case LAST_ROOT: + error = -EBUSY; + goto exit1; } + + nd.flags &= ~LOOKUP_PARENT; + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); @@ -2265,6 +2247,9 @@ static long do_unlinkat(int dfd, const char __user *pathname) error = -EISDIR; if (nd.last_type != LAST_NORM) goto exit1; + + nd.flags &= ~LOOKUP_PARENT; + mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); dentry = lookup_hash(&nd); error = PTR_ERR(dentry); @@ -2654,6 +2639,10 @@ asmlinkage long sys_renameat(int olddfd, const char __user *oldname, if (newnd.last_type != LAST_NORM) goto exit2; + oldnd.flags &= ~LOOKUP_PARENT; + newnd.flags &= ~LOOKUP_PARENT; + newnd.flags |= LOOKUP_RENAME_TARGET; + trap = lock_rename(new_dir, old_dir); old_dentry = lookup_hash(&oldnd); @@ -2855,6 +2844,7 @@ EXPORT_SYMBOL(__page_symlink); EXPORT_SYMBOL(page_symlink); EXPORT_SYMBOL(page_symlink_inode_operations); EXPORT_SYMBOL(path_lookup); +EXPORT_SYMBOL(kern_path); EXPORT_SYMBOL(vfs_path_lookup); EXPORT_SYMBOL(inode_permission); EXPORT_SYMBOL(vfs_permission); diff --git a/fs/namespace.c b/fs/namespace.c index 6e283c93b50d..cce46702d33c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1167,19 +1167,19 @@ asmlinkage long sys_oldumount(char __user * name) #endif -static int mount_is_safe(struct nameidata *nd) +static int mount_is_safe(struct path *path) { if (capable(CAP_SYS_ADMIN)) return 0; return -EPERM; #ifdef notyet - if (S_ISLNK(nd->path.dentry->d_inode->i_mode)) + if (S_ISLNK(path->dentry->d_inode->i_mode)) return -EPERM; - if (nd->path.dentry->d_inode->i_mode & S_ISVTX) { - if (current->uid != nd->path.dentry->d_inode->i_uid) + if (path->dentry->d_inode->i_mode & S_ISVTX) { + if (current->uid != path->dentry->d_inode->i_uid) return -EPERM; } - if (vfs_permission(nd, MAY_WRITE)) + if (inode_permission(path->dentry->d_inode, MAY_WRITE)) return -EPERM; return 0; #endif @@ -1425,11 +1425,10 @@ out_unlock: /* * recursively change the type of the mountpoint. - * noinline this do_mount helper to save do_mount stack space. */ -static noinline int do_change_type(struct nameidata *nd, int flag) +static int do_change_type(struct path *path, int flag) { - struct vfsmount *m, *mnt = nd->path.mnt; + struct vfsmount *m, *mnt = path->mnt; int recurse = flag & MS_REC; int type = flag & ~MS_REC; int err = 0; @@ -1437,7 +1436,7 @@ static noinline int do_change_type(struct nameidata *nd, int flag) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (nd->path.dentry != nd->path.mnt->mnt_root) + if (path->dentry != path->mnt->mnt_root) return -EINVAL; down_write(&namespace_sem); @@ -1459,40 +1458,39 @@ static noinline int do_change_type(struct nameidata *nd, int flag) /* * do loopback mount. - * noinline this do_mount helper to save do_mount stack space. */ -static noinline int do_loopback(struct nameidata *nd, char *old_name, +static int do_loopback(struct path *path, char *old_name, int recurse) { - struct nameidata old_nd; + struct path old_path; struct vfsmount *mnt = NULL; - int err = mount_is_safe(nd); + int err = mount_is_safe(path); if (err) return err; if (!old_name || !*old_name) return -EINVAL; - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); if (err) return err; down_write(&namespace_sem); err = -EINVAL; - if (IS_MNT_UNBINDABLE(old_nd.path.mnt)) + if (IS_MNT_UNBINDABLE(old_path.mnt)) goto out; - if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) + if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) goto out; err = -ENOMEM; if (recurse) - mnt = copy_tree(old_nd.path.mnt, old_nd.path.dentry, 0); + mnt = copy_tree(old_path.mnt, old_path.dentry, 0); else - mnt = clone_mnt(old_nd.path.mnt, old_nd.path.dentry, 0); + mnt = clone_mnt(old_path.mnt, old_path.dentry, 0); if (!mnt) goto out; - err = graft_tree(mnt, &nd->path); + err = graft_tree(mnt, path); if (err) { LIST_HEAD(umount_list); spin_lock(&vfsmount_lock); @@ -1503,7 +1501,7 @@ static noinline int do_loopback(struct nameidata *nd, char *old_name, out: up_write(&namespace_sem); - path_put(&old_nd.path); + path_put(&old_path); return err; } @@ -1528,33 +1526,37 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags) * change filesystem flags. dir should be a physical root of filesystem. * If you've mounted a non-root directory somewhere and want to do remount * on it - tough luck. - * noinline this do_mount helper to save do_mount stack space. */ -static noinline int do_remount(struct nameidata *nd, int flags, int mnt_flags, +static int do_remount(struct path *path, int flags, int mnt_flags, void *data) { int err; - struct super_block *sb = nd->path.mnt->mnt_sb; + struct super_block *sb = path->mnt->mnt_sb; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!check_mnt(nd->path.mnt)) + if (!check_mnt(path->mnt)) return -EINVAL; - if (nd->path.dentry != nd->path.mnt->mnt_root) + if (path->dentry != path->mnt->mnt_root) return -EINVAL; down_write(&sb->s_umount); if (flags & MS_BIND) - err = change_mount_flags(nd->path.mnt, flags); + err = change_mount_flags(path->mnt, flags); else err = do_remount_sb(sb, flags, data, 0); if (!err) - nd->path.mnt->mnt_flags = mnt_flags; + path->mnt->mnt_flags = mnt_flags; up_write(&sb->s_umount); - if (!err) - security_sb_post_remount(nd->path.mnt, flags, data); + if (!err) { + security_sb_post_remount(path->mnt, flags, data); + + spin_lock(&vfsmount_lock); + touch_mnt_namespace(path->mnt->mnt_ns); + spin_unlock(&vfsmount_lock); + } return err; } @@ -1568,90 +1570,85 @@ static inline int tree_contains_unbindable(struct vfsmount *mnt) return 0; } -/* - * noinline this do_mount helper to save do_mount stack space. - */ -static noinline int do_move_mount(struct nameidata *nd, char *old_name) +static int do_move_mount(struct path *path, char *old_name) { - struct nameidata old_nd; - struct path parent_path; + struct path old_path, parent_path; struct vfsmount *p; int err = 0; if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; - err = path_lookup(old_name, LOOKUP_FOLLOW, &old_nd); + err = kern_path(old_name, LOOKUP_FOLLOW, &old_path); if (err) return err; down_write(&namespace_sem); - while (d_mountpoint(nd->path.dentry) && - follow_down(&nd->path.mnt, &nd->path.dentry)) + while (d_mountpoint(path->dentry) && + follow_down(&path->mnt, &path->dentry)) ; err = -EINVAL; - if (!check_mnt(nd->path.mnt) || !check_mnt(old_nd.path.mnt)) + if (!check_mnt(path->mnt) || !check_mnt(old_path.mnt)) goto out; err = -ENOENT; - mutex_lock(&nd->path.dentry->d_inode->i_mutex); - if (IS_DEADDIR(nd->path.dentry->d_inode)) + mutex_lock(&path->dentry->d_inode->i_mutex); + if (IS_DEADDIR(path->dentry->d_inode)) goto out1; - if (!IS_ROOT(nd->path.dentry) && d_unhashed(nd->path.dentry)) + if (!IS_ROOT(path->dentry) && d_unhashed(path->dentry)) goto out1; err = -EINVAL; - if (old_nd.path.dentry != old_nd.path.mnt->mnt_root) + if (old_path.dentry != old_path.mnt->mnt_root) goto out1; - if (old_nd.path.mnt == old_nd.path.mnt->mnt_parent) + if (old_path.mnt == old_path.mnt->mnt_parent) goto out1; - if (S_ISDIR(nd->path.dentry->d_inode->i_mode) != - S_ISDIR(old_nd.path.dentry->d_inode->i_mode)) + if (S_ISDIR(path->dentry->d_inode->i_mode) != + S_ISDIR(old_path.dentry->d_inode->i_mode)) goto out1; /* * Don't move a mount residing in a shared parent. */ - if (old_nd.path.mnt->mnt_parent && - IS_MNT_SHARED(old_nd.path.mnt->mnt_parent)) + if (old_path.mnt->mnt_parent && + IS_MNT_SHARED(old_path.mnt->mnt_parent)) goto out1; /* * Don't move a mount tree containing unbindable mounts to a destination * mount which is shared. */ - if (IS_MNT_SHARED(nd->path.mnt) && - tree_contains_unbindable(old_nd.path.mnt)) + if (IS_MNT_SHARED(path->mnt) && + tree_contains_unbindable(old_path.mnt)) goto out1; err = -ELOOP; - for (p = nd->path.mnt; p->mnt_parent != p; p = p->mnt_parent) - if (p == old_nd.path.mnt) + for (p = path->mnt; p->mnt_parent != p; p = p->mnt_parent) + if (p == old_path.mnt) goto out1; - err = attach_recursive_mnt(old_nd.path.mnt, &nd->path, &parent_path); + err = attach_recursive_mnt(old_path.mnt, path, &parent_path); if (err) goto out1; /* if the mount is moved, it should no longer be expire * automatically */ - list_del_init(&old_nd.path.mnt->mnt_expire); + list_del_init(&old_path.mnt->mnt_expire); out1: - mutex_unlock(&nd->path.dentry->d_inode->i_mutex); + mutex_unlock(&path->dentry->d_inode->i_mutex); out: up_write(&namespace_sem); if (!err) path_put(&parent_path); - path_put(&old_nd.path); + path_put(&old_path); return err; } /* * create a new mount for userspace and request it to be added into the * namespace's tree - * noinline this do_mount helper to save do_mount stack space. */ -static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, +static int do_new_mount(struct path *path, char *type, int flags, int mnt_flags, char *name, void *data) { struct vfsmount *mnt; @@ -1667,7 +1664,7 @@ static noinline int do_new_mount(struct nameidata *nd, char *type, int flags, if (IS_ERR(mnt)) return PTR_ERR(mnt); - return do_add_mount(mnt, &nd->path, mnt_flags, NULL); + return do_add_mount(mnt, path, mnt_flags, NULL); } /* @@ -1902,7 +1899,7 @@ int copy_mount_options(const void __user * data, unsigned long *where) long do_mount(char *dev_name, char *dir_name, char *type_page, unsigned long flags, void *data_page) { - struct nameidata nd; + struct path path; int retval = 0; int mnt_flags = 0; @@ -1940,29 +1937,29 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); /* ... and get the mountpoint */ - retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); + retval = kern_path(dir_name, LOOKUP_FOLLOW, &path); if (retval) return retval; - retval = security_sb_mount(dev_name, &nd.path, + retval = security_sb_mount(dev_name, &path, type_page, flags, data_page); if (retval) goto dput_out; if (flags & MS_REMOUNT) - retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags, + retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags, data_page); else if (flags & MS_BIND) - retval = do_loopback(&nd, dev_name, flags & MS_REC); + retval = do_loopback(&path, dev_name, flags & MS_REC); else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE)) - retval = do_change_type(&nd, flags); + retval = do_change_type(&path, flags); else if (flags & MS_MOVE) - retval = do_move_mount(&nd, dev_name); + retval = do_move_mount(&path, dev_name); else - retval = do_new_mount(&nd, type_page, flags, mnt_flags, + retval = do_new_mount(&path, type_page, flags, mnt_flags, dev_name, data_page); dput_out: - path_put(&nd.path); + path_put(&path); return retval; } diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index efdba2e802d7..3e64b98f3a93 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -707,9 +707,7 @@ static int nfs_is_exclusive_create(struct inode *dir, struct nameidata *nd) { if (NFS_PROTO(dir)->version == 2) return 0; - if (nd == NULL || nfs_lookup_check_intent(nd, LOOKUP_CREATE) == 0) - return 0; - return (nd->intent.open.flags & O_EXCL) != 0; + return nd && nfs_lookup_check_intent(nd, LOOKUP_EXCL); } /* @@ -1009,7 +1007,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry /* Let vfs_create() deal with O_EXCL. Instantiate, but don't hash * the dentry. */ - if (nd->intent.open.flags & O_EXCL) { + if (nd->flags & LOOKUP_EXCL) { d_instantiate(dentry, NULL); goto out; } @@ -1959,6 +1957,9 @@ force_lookup: } else res = PTR_ERR(cred); out: + if (!res && (mask & MAY_EXEC) && !execute_ok(inode)) + res = -EACCES; + dfprintk(VFS, "NFS: permission(%s/%ld), mask=0x%x, res=%d\n", inode->i_sb->s_id, inode->i_ino, mask, res); return res; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index fae97196daad..b7c9b2df1f29 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -107,11 +107,10 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - mntroot = d_alloc_anon(inode); - if (!mntroot) { - iput(inode); + mntroot = d_obtain_alias(inode); + if (IS_ERR(mntroot)) { dprintk("nfs_get_root: get root dentry failed\n"); - return ERR_PTR(-ENOMEM); + return mntroot; } security_d_instantiate(mntroot, inode); @@ -277,11 +276,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) * if the dentry tree reaches them; however if the dentry already * exists, we'll pick it up at this point and use it as the root */ - mntroot = d_alloc_anon(inode); - if (!mntroot) { - iput(inode); + mntroot = d_obtain_alias(inode); + if (IS_ERR(mntroot)) { dprintk("nfs_get_root: get root dentry failed\n"); - return ERR_PTR(-ENOMEM); + return mntroot; } security_d_instantiate(mntroot, inode); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index b9195c02a863..d22eb383e1cf 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -5,7 +5,7 @@ * * nfs inode and superblock handling functions * - * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some + * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some * experimental NFS changes. Modularisation taken straight from SYS5 fs. * * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. @@ -908,21 +908,16 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); } -static unsigned long nfs_attr_generation_counter; +static atomic_long_t nfs_attr_generation_counter; static unsigned long nfs_read_attr_generation_counter(void) { - smp_rmb(); - return nfs_attr_generation_counter; + return atomic_long_read(&nfs_attr_generation_counter); } unsigned long nfs_inc_attr_generation_counter(void) { - unsigned long ret; - smp_rmb(); - ret = ++nfs_attr_generation_counter; - smp_wmb(); - return ret; + return atomic_long_inc_return(&nfs_attr_generation_counter); } void nfs_fattr_init(struct nfs_fattr *fattr) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a3b0061dfd45..f48db679a1c6 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -5,7 +5,7 @@ * * nfs superblock handling functions * - * Modularised by Alan Cox <Alan.Cox@linux.org>, while hacking some + * Modularised by Alan Cox <alan@lxorguk.ukuu.org.uk>, while hacking some * experimental NFS changes. Modularisation taken straight from SYS5 fs. * * Change to nfs_read_super() to permit NFS mounts to multi-homed hosts. diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 9dc036f18356..5839b229cd0e 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -99,7 +99,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) int fsidtype; char *ep; struct svc_expkey key; - struct svc_expkey *ek; + struct svc_expkey *ek = NULL; if (mesg[mlen-1] != '\n') return -EINVAL; @@ -107,7 +107,8 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) buf = kmalloc(PAGE_SIZE, GFP_KERNEL); err = -ENOMEM; - if (!buf) goto out; + if (!buf) + goto out; err = -EINVAL; if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) @@ -151,34 +152,32 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen) /* now we want a pathname, or empty meaning NEGATIVE */ err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) < 0) + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len < 0) goto out; dprintk("Path seems to be <%s>\n", buf); err = 0; if (len == 0) { set_bit(CACHE_NEGATIVE, &key.h.flags); ek = svc_expkey_update(&key, ek); - if (ek) - cache_put(&ek->h, &svc_expkey_cache); - else err = -ENOMEM; + if (!ek) + err = -ENOMEM; } else { - struct nameidata nd; - err = path_lookup(buf, 0, &nd); + err = kern_path(buf, 0, &key.ek_path); if (err) goto out; dprintk("Found the path %s\n", buf); - key.ek_path = nd.path; ek = svc_expkey_update(&key, ek); - if (ek) - cache_put(&ek->h, &svc_expkey_cache); - else + if (!ek) err = -ENOMEM; - path_put(&nd.path); + path_put(&key.ek_path); } cache_flush(); out: + if (ek) + cache_put(&ek->h, &svc_expkey_cache); if (dom) auth_domain_put(dom); kfree(buf); @@ -500,35 +499,22 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) int len; int err; struct auth_domain *dom = NULL; - struct nameidata nd; - struct svc_export exp, *expp; + struct svc_export exp = {}, *expp; int an_int; - nd.path.dentry = NULL; - exp.ex_pathname = NULL; - - /* fs locations */ - exp.ex_fslocs.locations = NULL; - exp.ex_fslocs.locations_count = 0; - exp.ex_fslocs.migrated = 0; - - exp.ex_uuid = NULL; - - /* secinfo */ - exp.ex_nflavors = 0; - if (mesg[mlen-1] != '\n') return -EINVAL; mesg[mlen-1] = 0; buf = kmalloc(PAGE_SIZE, GFP_KERNEL); - err = -ENOMEM; - if (!buf) goto out; + if (!buf) + return -ENOMEM; /* client */ - len = qword_get(&mesg, buf, PAGE_SIZE); err = -EINVAL; - if (len <= 0) goto out; + len = qword_get(&mesg, buf, PAGE_SIZE); + if (len <= 0) + goto out; err = -ENOENT; dom = auth_domain_find(buf); @@ -537,25 +523,25 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) /* path */ err = -EINVAL; - if ((len=qword_get(&mesg, buf, PAGE_SIZE)) <= 0) - goto out; - err = path_lookup(buf, 0, &nd); - if (err) goto out_no_path; + if ((len = qword_get(&mesg, buf, PAGE_SIZE)) <= 0) + goto out1; + + err = kern_path(buf, 0, &exp.ex_path); + if (err) + goto out1; - exp.h.flags = 0; exp.ex_client = dom; - exp.ex_path.mnt = nd.path.mnt; - exp.ex_path.dentry = nd.path.dentry; - exp.ex_pathname = kstrdup(buf, GFP_KERNEL); + err = -ENOMEM; + exp.ex_pathname = kstrdup(buf, GFP_KERNEL); if (!exp.ex_pathname) - goto out; + goto out2; /* expiry */ err = -EINVAL; exp.h.expiry_time = get_expiry(&mesg); if (exp.h.expiry_time == 0) - goto out; + goto out3; /* flags */ err = get_int(&mesg, &an_int); @@ -563,22 +549,26 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = 0; set_bit(CACHE_NEGATIVE, &exp.h.flags); } else { - if (err || an_int < 0) goto out; + if (err || an_int < 0) + goto out3; exp.ex_flags= an_int; /* anon uid */ err = get_int(&mesg, &an_int); - if (err) goto out; + if (err) + goto out3; exp.ex_anon_uid= an_int; /* anon gid */ err = get_int(&mesg, &an_int); - if (err) goto out; + if (err) + goto out3; exp.ex_anon_gid= an_int; /* fsid */ err = get_int(&mesg, &an_int); - if (err) goto out; + if (err) + goto out3; exp.ex_fsid = an_int; while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) { @@ -604,12 +594,13 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) */ break; if (err) - goto out; + goto out4; } - err = check_export(nd.path.dentry->d_inode, exp.ex_flags, + err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags, exp.ex_uuid); - if (err) goto out; + if (err) + goto out4; } expp = svc_export_lookup(&exp); @@ -622,15 +613,16 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) err = -ENOMEM; else exp_put(expp); - out: +out4: nfsd4_fslocs_free(&exp.ex_fslocs); kfree(exp.ex_uuid); +out3: kfree(exp.ex_pathname); - if (nd.path.dentry) - path_put(&nd.path); - out_no_path: - if (dom) - auth_domain_put(dom); +out2: + path_put(&exp.ex_path); +out1: + auth_domain_put(dom); +out: kfree(buf); return err; } @@ -998,7 +990,7 @@ exp_export(struct nfsctl_export *nxp) struct svc_export *exp = NULL; struct svc_export new; struct svc_expkey *fsid_key = NULL; - struct nameidata nd; + struct path path; int err; /* Consistency check */ @@ -1021,12 +1013,12 @@ exp_export(struct nfsctl_export *nxp) /* Look up the dentry */ - err = path_lookup(nxp->ex_path, 0, &nd); + err = kern_path(nxp->ex_path, 0, &path); if (err) goto out_put_clp; err = -EINVAL; - exp = exp_get_by_name(clp, nd.path.mnt, nd.path.dentry, NULL); + exp = exp_get_by_name(clp, path.mnt, path.dentry, NULL); memset(&new, 0, sizeof(new)); @@ -1034,8 +1026,8 @@ exp_export(struct nfsctl_export *nxp) if ((nxp->ex_flags & NFSEXP_FSID) && (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && fsid_key->ek_path.mnt && - (fsid_key->ek_path.mnt != nd.path.mnt || - fsid_key->ek_path.dentry != nd.path.dentry)) + (fsid_key->ek_path.mnt != path.mnt || + fsid_key->ek_path.dentry != path.dentry)) goto finish; if (!IS_ERR(exp)) { @@ -1051,7 +1043,7 @@ exp_export(struct nfsctl_export *nxp) goto finish; } - err = check_export(nd.path.dentry->d_inode, nxp->ex_flags, NULL); + err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL); if (err) goto finish; err = -ENOMEM; @@ -1064,7 +1056,7 @@ exp_export(struct nfsctl_export *nxp) if (!new.ex_pathname) goto finish; new.ex_client = clp; - new.ex_path = nd.path; + new.ex_path = path; new.ex_flags = nxp->ex_flags; new.ex_anon_uid = nxp->ex_anon_uid; new.ex_anon_gid = nxp->ex_anon_gid; @@ -1090,7 +1082,7 @@ finish: exp_put(exp); if (fsid_key && !IS_ERR(fsid_key)) cache_put(&fsid_key->h, &svc_expkey_cache); - path_put(&nd.path); + path_put(&path); out_put_clp: auth_domain_put(clp); out_unlock: @@ -1121,7 +1113,7 @@ exp_unexport(struct nfsctl_export *nxp) { struct auth_domain *dom; svc_export *exp; - struct nameidata nd; + struct path path; int err; /* Consistency check */ @@ -1138,13 +1130,13 @@ exp_unexport(struct nfsctl_export *nxp) goto out_unlock; } - err = path_lookup(nxp->ex_path, 0, &nd); + err = kern_path(nxp->ex_path, 0, &path); if (err) goto out_domain; err = -EINVAL; - exp = exp_get_by_name(dom, nd.path.mnt, nd.path.dentry, NULL); - path_put(&nd.path); + exp = exp_get_by_name(dom, path.mnt, path.dentry, NULL); + path_put(&path); if (IS_ERR(exp)) goto out_domain; @@ -1166,26 +1158,26 @@ out_unlock: * since its harder to fool a kernel module than a user space program. */ int -exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) +exp_rootfh(svc_client *clp, char *name, struct knfsd_fh *f, int maxsize) { struct svc_export *exp; - struct nameidata nd; + struct path path; struct inode *inode; struct svc_fh fh; int err; err = -EPERM; /* NB: we probably ought to check that it's NUL-terminated */ - if (path_lookup(path, 0, &nd)) { - printk("nfsd: exp_rootfh path not found %s", path); + if (kern_path(name, 0, &path)) { + printk("nfsd: exp_rootfh path not found %s", name); return err; } - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; dprintk("nfsd: exp_rootfh(%s [%p] %s:%s/%ld)\n", - path, nd.path.dentry, clp->name, + name, path.dentry, clp->name, inode->i_sb->s_id, inode->i_ino); - exp = exp_parent(clp, nd.path.mnt, nd.path.dentry, NULL); + exp = exp_parent(clp, path.mnt, path.dentry, NULL); if (IS_ERR(exp)) { err = PTR_ERR(exp); goto out; @@ -1195,7 +1187,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) * fh must be initialized before calling fh_compose */ fh_init(&fh, maxsize); - if (fh_compose(&fh, exp, nd.path.dentry, NULL)) + if (fh_compose(&fh, exp, path.dentry, NULL)) err = -EINVAL; else err = 0; @@ -1203,7 +1195,7 @@ exp_rootfh(svc_client *clp, char *path, struct knfsd_fh *f, int maxsize) fh_put(&fh); exp_put(exp); out: - path_put(&nd.path); + path_put(&path); return err; } diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 145b3c877a27..bb93946ace22 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -51,7 +51,7 @@ #define NFSDDBG_FACILITY NFSDDBG_PROC /* Globals */ -static struct nameidata rec_dir; +static struct path rec_dir; static int rec_dir_init = 0; static void @@ -121,9 +121,9 @@ out_no_tfm: static void nfsd4_sync_rec_dir(void) { - mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); - nfsd_sync_dir(rec_dir.path.dentry); - mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); + nfsd_sync_dir(rec_dir.dentry); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); } int @@ -143,9 +143,9 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) nfs4_save_user(&uid, &gid); /* lock the parent */ - mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); - dentry = lookup_one_len(dname, rec_dir.path.dentry, HEXDIR_LEN-1); + dentry = lookup_one_len(dname, rec_dir.dentry, HEXDIR_LEN-1); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); goto out_unlock; @@ -155,15 +155,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp) dprintk("NFSD: nfsd4_create_clid_dir: DIRECTORY EXISTS\n"); goto out_put; } - status = mnt_want_write(rec_dir.path.mnt); + status = mnt_want_write(rec_dir.mnt); if (status) goto out_put; - status = vfs_mkdir(rec_dir.path.dentry->d_inode, dentry, S_IRWXU); - mnt_drop_write(rec_dir.path.mnt); + status = vfs_mkdir(rec_dir.dentry->d_inode, dentry, S_IRWXU); + mnt_drop_write(rec_dir.mnt); out_put: dput(dentry); out_unlock: - mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); if (status == 0) { clp->cl_firststate = 1; nfsd4_sync_rec_dir(); @@ -226,7 +226,7 @@ nfsd4_list_rec_dir(struct dentry *dir, recdir_func *f) nfs4_save_user(&uid, &gid); - filp = dentry_open(dget(dir), mntget(rec_dir.path.mnt), O_RDONLY); + filp = dentry_open(dget(dir), mntget(rec_dir.mnt), O_RDONLY); status = PTR_ERR(filp); if (IS_ERR(filp)) goto out; @@ -291,9 +291,9 @@ nfsd4_unlink_clid_dir(char *name, int namlen) dprintk("NFSD: nfsd4_unlink_clid_dir. name %.*s\n", namlen, name); - mutex_lock(&rec_dir.path.dentry->d_inode->i_mutex); - dentry = lookup_one_len(name, rec_dir.path.dentry, namlen); - mutex_unlock(&rec_dir.path.dentry->d_inode->i_mutex); + mutex_lock(&rec_dir.dentry->d_inode->i_mutex); + dentry = lookup_one_len(name, rec_dir.dentry, namlen); + mutex_unlock(&rec_dir.dentry->d_inode->i_mutex); if (IS_ERR(dentry)) { status = PTR_ERR(dentry); return status; @@ -302,7 +302,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen) if (!dentry->d_inode) goto out; - status = nfsd4_clear_clid_dir(rec_dir.path.dentry, dentry); + status = nfsd4_clear_clid_dir(rec_dir.dentry, dentry); out: dput(dentry); return status; @@ -318,7 +318,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) if (!rec_dir_init || !clp->cl_firststate) return; - status = mnt_want_write(rec_dir.path.mnt); + status = mnt_want_write(rec_dir.mnt); if (status) goto out; clp->cl_firststate = 0; @@ -327,7 +327,7 @@ nfsd4_remove_clid_dir(struct nfs4_client *clp) nfs4_reset_user(uid, gid); if (status == 0) nfsd4_sync_rec_dir(); - mnt_drop_write(rec_dir.path.mnt); + mnt_drop_write(rec_dir.mnt); out: if (status) printk("NFSD: Failed to remove expired client state directory" @@ -357,17 +357,17 @@ nfsd4_recdir_purge_old(void) { if (!rec_dir_init) return; - status = mnt_want_write(rec_dir.path.mnt); + status = mnt_want_write(rec_dir.mnt); if (status) goto out; - status = nfsd4_list_rec_dir(rec_dir.path.dentry, purge_old); + status = nfsd4_list_rec_dir(rec_dir.dentry, purge_old); if (status == 0) nfsd4_sync_rec_dir(); - mnt_drop_write(rec_dir.path.mnt); + mnt_drop_write(rec_dir.mnt); out: if (status) printk("nfsd4: failed to purge old clients from recovery" - " directory %s\n", rec_dir.path.dentry->d_name.name); + " directory %s\n", rec_dir.dentry->d_name.name); } static int @@ -387,10 +387,10 @@ int nfsd4_recdir_load(void) { int status; - status = nfsd4_list_rec_dir(rec_dir.path.dentry, load_recdir); + status = nfsd4_list_rec_dir(rec_dir.dentry, load_recdir); if (status) printk("nfsd4: failed loading clients from recovery" - " directory %s\n", rec_dir.path.dentry->d_name.name); + " directory %s\n", rec_dir.dentry->d_name.name); return status; } @@ -412,7 +412,7 @@ nfsd4_init_recdir(char *rec_dirname) nfs4_save_user(&uid, &gid); - status = path_lookup(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, + status = kern_path(rec_dirname, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &rec_dir); if (status) printk("NFSD: unable to find recovery directory %s\n", @@ -429,5 +429,5 @@ nfsd4_shutdown_recdir(void) if (!rec_dir_init) return; rec_dir_init = 0; - path_put(&rec_dir.path); + path_put(&rec_dir); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 0cc7ff5d5ab5..b0bebc552a11 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -3284,17 +3284,17 @@ int nfs4_reset_recoverydir(char *recdir) { int status; - struct nameidata nd; + struct path path; - status = path_lookup(recdir, LOOKUP_FOLLOW, &nd); + status = kern_path(recdir, LOOKUP_FOLLOW, &path); if (status) return status; status = -ENOTDIR; - if (S_ISDIR(nd.path.dentry->d_inode->i_mode)) { + if (S_ISDIR(path.dentry->d_inode->i_mode)) { nfs4_set_recdir(recdir); status = 0; } - path_put(&nd.path); + path_put(&path); return status; } diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 97543df58242..e3f9783fdcf7 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -341,7 +341,7 @@ static ssize_t failover_unlock_ip(struct file *file, char *buf, size_t size) static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) { - struct nameidata nd; + struct path path; char *fo_path; int error; @@ -356,13 +356,13 @@ static ssize_t failover_unlock_fs(struct file *file, char *buf, size_t size) if (qword_get(&buf, fo_path, size) < 0) return -EINVAL; - error = path_lookup(fo_path, 0, &nd); + error = kern_path(fo_path, 0, &path); if (error) return error; - error = nlmsvc_unlock_all_by_sb(nd.path.mnt->mnt_sb); + error = nlmsvc_unlock_all_by_sb(path.mnt->mnt_sb); - path_put(&nd.path); + path_put(&path); return error; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 59eeb46f82c5..07e4f5d7baa8 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -249,6 +249,10 @@ static int nfsd_init_socks(int port) if (error < 0) return error; + error = lockd_up(); + if (error < 0) + return error; + error = svc_create_xprt(nfsd_serv, "tcp", port, SVC_SOCK_DEFAULTS); if (error < 0) diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index aa1d0d6489a1..848a03e83a42 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -410,6 +410,7 @@ out_nfserr: static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) { ssize_t buflen; + ssize_t ret; buflen = vfs_getxattr(dentry, key, NULL, 0); if (buflen <= 0) @@ -419,7 +420,10 @@ static ssize_t nfsd_getxattr(struct dentry *dentry, char *key, void **buf) if (!*buf) return -ENOMEM; - return vfs_getxattr(dentry, key, *buf, buflen); + ret = vfs_getxattr(dentry, key, *buf, buflen); + if (ret < 0) + kfree(*buf); + return ret; } #endif @@ -1814,6 +1818,116 @@ out: } /* + * We do this buffering because we must not call back into the file + * system's ->lookup() method from the filldir callback. That may well + * deadlock a number of file systems. + * + * This is based heavily on the implementation of same in XFS. + */ +struct buffered_dirent { + u64 ino; + loff_t offset; + int namlen; + unsigned int d_type; + char name[]; +}; + +struct readdir_data { + char *dirent; + size_t used; + int full; +}; + +static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, + loff_t offset, u64 ino, unsigned int d_type) +{ + struct readdir_data *buf = __buf; + struct buffered_dirent *de = (void *)(buf->dirent + buf->used); + unsigned int reclen; + + reclen = ALIGN(sizeof(struct buffered_dirent) + namlen, sizeof(u64)); + if (buf->used + reclen > PAGE_SIZE) { + buf->full = 1; + return -EINVAL; + } + + de->namlen = namlen; + de->offset = offset; + de->ino = ino; + de->d_type = d_type; + memcpy(de->name, name, namlen); + buf->used += reclen; + + return 0; +} + +static int nfsd_buffered_readdir(struct file *file, filldir_t func, + struct readdir_cd *cdp, loff_t *offsetp) +{ + struct readdir_data buf; + struct buffered_dirent *de; + int host_err; + int size; + loff_t offset; + + buf.dirent = (void *)__get_free_page(GFP_KERNEL); + if (!buf.dirent) + return -ENOMEM; + + offset = *offsetp; + cdp->err = nfserr_eof; /* will be cleared on successful read */ + + while (1) { + unsigned int reclen; + + buf.used = 0; + buf.full = 0; + + host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); + if (buf.full) + host_err = 0; + + if (host_err < 0) + break; + + size = buf.used; + + if (!size) + break; + + de = (struct buffered_dirent *)buf.dirent; + while (size > 0) { + offset = de->offset; + + if (func(cdp, de->name, de->namlen, de->offset, + de->ino, de->d_type)) + goto done; + + if (cdp->err != nfs_ok) + goto done; + + reclen = ALIGN(sizeof(*de) + de->namlen, + sizeof(u64)); + size -= reclen; + de = (struct buffered_dirent *)((char *)de + reclen); + } + offset = vfs_llseek(file, 0, SEEK_CUR); + cdp->err = nfserr_eof; + if (!buf.full) + break; + } + + done: + free_page((unsigned long)(buf.dirent)); + + if (host_err) + return nfserrno(host_err); + + *offsetp = offset; + return cdp->err; +} + +/* * Read entries from a directory. * The NFSv3/4 verifier we ignore for now. */ @@ -1822,7 +1936,6 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, struct readdir_cd *cdp, filldir_t func) { __be32 err; - int host_err; struct file *file; loff_t offset = *offsetp; @@ -1836,21 +1949,7 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, goto out_close; } - /* - * Read the directory entries. This silly loop is necessary because - * readdir() is not guaranteed to fill up the entire buffer, but - * may choose to do less. - */ - - do { - cdp->err = nfserr_eof; /* will be cleared on successful read */ - host_err = vfs_readdir(file, func, cdp); - } while (host_err >=0 && cdp->err == nfs_ok); - if (host_err) - err = nfserrno(host_err); - else - err = cdp->err; - *offsetp = vfs_llseek(file, 0, 1); + err = nfsd_buffered_readdir(file, func, cdp, offsetp); if (err == nfserr_eof || err == nfserr_toosmall) err = nfs_ok; /* can still be found in ->err */ diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 9e8a95be7a1e..2ca00153b6ec 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -304,8 +304,6 @@ static struct dentry *ntfs_get_parent(struct dentry *child_dent) ntfs_attr_search_ctx *ctx; ATTR_RECORD *attr; FILE_NAME_ATTR *fn; - struct inode *parent_vi; - struct dentry *parent_dent; unsigned long parent_ino; int err; @@ -345,24 +343,8 @@ try_next: /* Release the search context and the mft record of the child. */ ntfs_attr_put_search_ctx(ctx); unmap_mft_record(ni); - /* Get the inode of the parent directory. */ - parent_vi = ntfs_iget(vi->i_sb, parent_ino); - if (IS_ERR(parent_vi) || unlikely(is_bad_inode(parent_vi))) { - if (!IS_ERR(parent_vi)) - iput(parent_vi); - ntfs_error(vi->i_sb, "Failed to get parent directory inode " - "0x%lx of child inode 0x%lx.", parent_ino, - vi->i_ino); - return ERR_PTR(-EACCES); - } - /* Finally get a dentry for the parent directory and return it. */ - parent_dent = d_alloc_anon(parent_vi); - if (unlikely(!parent_dent)) { - iput(parent_vi); - return ERR_PTR(-ENOMEM); - } - ntfs_debug("Done for inode 0x%lx.", vi->i_ino); - return parent_dent; + + return d_obtain_alias(ntfs_iget(vi->i_sb, parent_ino)); } static struct inode *ntfs_nfs_get_inode(struct super_block *sb, diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 7dce1612553e..6ebaa58e2c03 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -976,7 +976,7 @@ static void o2hb_region_release(struct config_item *item) } if (reg->hr_bdev) - blkdev_put(reg->hr_bdev); + blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); if (reg->hr_slots) kfree(reg->hr_slots); @@ -1268,7 +1268,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, goto out; reg->hr_bdev = I_BDEV(filp->f_mapping->host); - ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0); + ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ); if (ret) { reg->hr_bdev = NULL; goto out; @@ -1358,7 +1358,7 @@ out: iput(inode); if (ret < 0) { if (reg->hr_bdev) { - blkdev_put(reg->hr_bdev); + blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE); reg->hr_bdev = NULL; } } diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c index 67527cebf214..2f27b332d8b3 100644 --- a/fs/ocfs2/export.c +++ b/fs/ocfs2/export.c @@ -68,14 +68,9 @@ static struct dentry *ocfs2_get_dentry(struct super_block *sb, return ERR_PTR(-ESTALE); } - result = d_alloc_anon(inode); - - if (!result) { - iput(inode); - mlog_errno(-ENOMEM); - return ERR_PTR(-ENOMEM); - } - result->d_op = &ocfs2_dentry_ops; + result = d_obtain_alias(inode); + if (!IS_ERR(result)) + result->d_op = &ocfs2_dentry_ops; mlog_exit_ptr(result); return result; @@ -86,7 +81,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) int status; u64 blkno; struct dentry *parent; - struct inode *inode; struct inode *dir = child->d_inode; mlog_entry("(0x%p, '%.*s')\n", child, @@ -109,21 +103,9 @@ static struct dentry *ocfs2_get_parent(struct dentry *child) goto bail_unlock; } - inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0); - if (IS_ERR(inode)) { - mlog(ML_ERROR, "Unable to create inode %llu\n", - (unsigned long long)blkno); - parent = ERR_PTR(-EACCES); - goto bail_unlock; - } - - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - - parent->d_op = &ocfs2_dentry_ops; + parent = d_obtain_alias(ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0, 0)); + if (!IS_ERR(parent)) + parent->d_op = &ocfs2_dentry_ops; bail_unlock: ocfs2_inode_unlock(dir, 0); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8d3225a78073..7efe937a415f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -679,8 +679,7 @@ leave: /* Some parts of this taken from generic_cont_expand, which turned out * to be too fragile to do exactly what we need without us having to - * worry about recursive locking in ->prepare_write() and - * ->commit_write(). */ + * worry about recursive locking in ->write_begin() and ->write_end(). */ static int ocfs2_write_zero_page(struct inode *inode, u64 size) { diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index c0757e998876..c7275cfbdcfb 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c @@ -501,4 +501,5 @@ struct inode_operations omfs_dir_inops = { struct file_operations omfs_dir_operations = { .read = generic_read_dir, .readdir = omfs_readdir, + .llseek = generic_file_llseek, }; diff --git a/fs/open.c b/fs/open.c index 5596049863bf..83cdb9dee0c1 100644 --- a/fs/open.c +++ b/fs/open.c @@ -798,7 +798,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, int error; f->f_flags = flags; - f->f_mode = ((flags+1) & O_ACCMODE) | FMODE_LSEEK | + f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; inode = dentry->d_inode; if (f->f_mode & FMODE_WRITE) { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 9f5b054f06b9..d41bdc784de4 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -167,6 +167,7 @@ static int openpromfs_readdir(struct file *, void *, filldir_t); static const struct file_operations openprom_operations = { .read = generic_read_dir, .readdir = openpromfs_readdir, + .llseek = generic_file_llseek, }; static struct dentry *openpromfs_lookup(struct inode *, struct dentry *, struct nameidata *); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index cfb0c80690aa..633f7a0ebb2c 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -485,10 +485,10 @@ void register_disk(struct gendisk *disk) goto exit; bdev->bd_invalidated = 1; - err = blkdev_get(bdev, FMODE_READ, 0); + err = blkdev_get(bdev, FMODE_READ); if (err < 0) goto exit; - blkdev_put(bdev); + blkdev_put(bdev, FMODE_READ); exit: /* announce disk after possible partitions are created */ diff --git a/fs/pipe.c b/fs/pipe.c index fcba6542b8d0..7aea8b89baac 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -717,14 +717,12 @@ pipe_rdwr_fasync(int fd, struct file *filp, int on) static int pipe_read_release(struct inode *inode, struct file *filp) { - pipe_read_fasync(-1, filp, 0); return pipe_release(inode, 1, 0); } static int pipe_write_release(struct inode *inode, struct file *filp) { - pipe_write_fasync(-1, filp, 0); return pipe_release(inode, 0, 1); } @@ -733,7 +731,6 @@ pipe_rdwr_release(struct inode *inode, struct file *filp) { int decr, decw; - pipe_rdwr_fasync(-1, filp, 0); decr = (filp->f_mode & FMODE_READ) != 0; decw = (filp->f_mode & FMODE_WRITE) != 0; return pipe_release(inode, decr, decw); diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ebaba0213546..63d965193b22 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -8,11 +8,20 @@ proc-y := nommu.o task_nommu.o proc-$(CONFIG_MMU) := mmu.o task_mmu.o proc-y += inode.o root.o base.o generic.o array.o \ - proc_tty.o proc_misc.o - + proc_tty.o +proc-y += cmdline.o +proc-y += cpuinfo.o +proc-y += devices.o +proc-y += interrupts.o +proc-y += loadavg.o +proc-y += meminfo.o +proc-y += stat.o +proc-y += uptime.o +proc-y += version.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o proc-$(CONFIG_PROC_VMCORE) += vmcore.o proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o proc-$(CONFIG_PRINTK) += kmsg.o +proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o diff --git a/fs/proc/array.c b/fs/proc/array.c index bb9f4b05703d..6af7fba7abb1 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -40,7 +40,7 @@ * * * Alan Cox : security fixes. - * <Alan.Cox@linux.org> + * <alan@lxorguk.ukuu.org.uk> * * Al Viro : safe handling of mm_struct * diff --git a/fs/proc/base.c b/fs/proc/base.c index b5918ae8ca79..486cf3fe7139 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1712,9 +1712,9 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, file = fcheck_files(files, fd); if (!file) goto out_unlock; - if (file->f_mode & 1) + if (file->f_mode & FMODE_READ) inode->i_mode |= S_IRUSR | S_IXUSR; - if (file->f_mode & 2) + if (file->f_mode & FMODE_WRITE) inode->i_mode |= S_IWUSR | S_IXUSR; spin_unlock(&files->file_lock); put_files_struct(files); diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c new file mode 100644 index 000000000000..82676e3fcd1d --- /dev/null +++ b/fs/proc/cmdline.c @@ -0,0 +1,29 @@ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +static int cmdline_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, "%s\n", saved_command_line); + return 0; +} + +static int cmdline_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, cmdline_proc_show, NULL); +} + +static const struct file_operations cmdline_proc_fops = { + .open = cmdline_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_cmdline_init(void) +{ + proc_create("cmdline", 0, NULL, &cmdline_proc_fops); + return 0; +} +module_init(proc_cmdline_init); diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c new file mode 100644 index 000000000000..5a1e539a234b --- /dev/null +++ b/fs/proc/cpuinfo.c @@ -0,0 +1,24 @@ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +extern const struct seq_operations cpuinfo_op; +static int cpuinfo_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &cpuinfo_op); +} + +static const struct file_operations proc_cpuinfo_operations = { + .open = cpuinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_cpuinfo_init(void) +{ + proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); + return 0; +} +module_init(proc_cpuinfo_init); diff --git a/fs/proc/devices.c b/fs/proc/devices.c new file mode 100644 index 000000000000..59ee7da959c9 --- /dev/null +++ b/fs/proc/devices.c @@ -0,0 +1,70 @@ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +static int devinfo_show(struct seq_file *f, void *v) +{ + int i = *(loff_t *) v; + + if (i < CHRDEV_MAJOR_HASH_SIZE) { + if (i == 0) + seq_printf(f, "Character devices:\n"); + chrdev_show(f, i); + } +#ifdef CONFIG_BLOCK + else { + i -= CHRDEV_MAJOR_HASH_SIZE; + if (i == 0) + seq_printf(f, "\nBlock devices:\n"); + blkdev_show(f, i); + } +#endif + return 0; +} + +static void *devinfo_start(struct seq_file *f, loff_t *pos) +{ + if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) + return pos; + return NULL; +} + +static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos) +{ + (*pos)++; + if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) + return NULL; + return pos; +} + +static void devinfo_stop(struct seq_file *f, void *v) +{ + /* Nothing to do */ +} + +static const struct seq_operations devinfo_ops = { + .start = devinfo_start, + .next = devinfo_next, + .stop = devinfo_stop, + .show = devinfo_show +}; + +static int devinfo_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &devinfo_ops); +} + +static const struct file_operations proc_devinfo_operations = { + .open = devinfo_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_devices_init(void) +{ + proc_create("devices", 0, NULL, &proc_devinfo_operations); + return 0; +} +module_init(proc_devices_init); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 7821589a17d5..60a359b35582 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -547,9 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp for (tmp = dir->subdir; tmp; tmp = tmp->next) if (strcmp(tmp->name, dp->name) == 0) { - printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", + WARN(1, KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", dir->name, dp->name); - dump_stack(); break; } diff --git a/fs/proc/inode.c b/fs/proc/inode.c index c6b4fa7e3b49..2543fd00c658 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -106,14 +106,13 @@ static void init_once(void *foo) inode_init_once(&ei->vfs_inode); } -int __init proc_init_inodecache(void) +void __init proc_init_inodecache(void) { proc_inode_cachep = kmem_cache_create("proc_inode_cache", sizeof(struct proc_inode), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_MEM_SPREAD|SLAB_PANIC), init_once); - return 0; } static const struct super_operations proc_sops = { diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 3bfb7b8747b3..3e8aeb8b61ce 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -61,12 +61,11 @@ extern const struct file_operations proc_smaps_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern const struct file_operations proc_net_operations; -extern const struct file_operations proc_kmsg_operations; extern const struct inode_operations proc_net_inode_operations; void free_proc_entry(struct proc_dir_entry *de); -int proc_init_inodecache(void); +void proc_init_inodecache(void); static inline struct pid *proc_pid(struct inode *inode) { diff --git a/fs/proc/interrupts.c b/fs/proc/interrupts.c new file mode 100644 index 000000000000..05029c0e2f24 --- /dev/null +++ b/fs/proc/interrupts.c @@ -0,0 +1,53 @@ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irqnr.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> + +/* + * /proc/interrupts + */ +static void *int_seq_start(struct seq_file *f, loff_t *pos) +{ + return (*pos <= nr_irqs) ? pos : NULL; +} + +static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) +{ + (*pos)++; + if (*pos > nr_irqs) + return NULL; + return pos; +} + +static void int_seq_stop(struct seq_file *f, void *v) +{ + /* Nothing to do */ +} + +static const struct seq_operations int_seq_ops = { + .start = int_seq_start, + .next = int_seq_next, + .stop = int_seq_stop, + .show = show_interrupts +}; + +static int interrupts_open(struct inode *inode, struct file *filp) +{ + return seq_open(filp, &int_seq_ops); +} + +static const struct file_operations proc_interrupts_operations = { + .open = interrupts_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, +}; + +static int __init proc_interrupts_init(void) +{ + proc_create("interrupts", 0, NULL, &proc_interrupts_operations); + return 0; +} +module_init(proc_interrupts_init); diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index c2370c76fb71..59b43a068872 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -27,6 +27,8 @@ #define ELF_CORE_EFLAGS 0 #endif +static struct proc_dir_entry *proc_root_kcore; + static int open_kcore(struct inode * inode, struct file * filp) { return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; @@ -34,7 +36,7 @@ static int open_kcore(struct inode * inode, struct file * filp) static ssize_t read_kcore(struct file *, char __user *, size_t, loff_t *); -const struct file_operations proc_kcore_operations = { +static const struct file_operations proc_kcore_operations = { .read = read_kcore, .open = open_kcore, }; @@ -399,3 +401,13 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return acc; } + +static int __init proc_kcore_init(void) +{ + proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); + if (proc_root_kcore) + proc_root_kcore->size = + (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; + return 0; +} +module_init(proc_kcore_init); diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index 9fd5df3f40ce..7ca78346d3f0 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -10,13 +10,12 @@ #include <linux/time.h> #include <linux/kernel.h> #include <linux/poll.h> +#include <linux/proc_fs.h> #include <linux/fs.h> #include <asm/uaccess.h> #include <asm/io.h> -#include "internal.h" - extern wait_queue_head_t log_wait; extern int do_syslog(int type, char __user *bug, int count); @@ -49,9 +48,16 @@ static unsigned int kmsg_poll(struct file *file, poll_table *wait) } -const struct file_operations proc_kmsg_operations = { +static const struct file_operations proc_kmsg_operations = { .read = kmsg_read, .poll = kmsg_poll, .open = kmsg_open, .release = kmsg_release, }; + +static int __init proc_kmsg_init(void) +{ + proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); + return 0; +} +module_init(proc_kmsg_init); diff --git a/fs/proc/loadavg.c b/fs/proc/loadavg.c new file mode 100644 index 000000000000..9bca39cf99ee --- /dev/null +++ b/fs/proc/loadavg.c @@ -0,0 +1,51 @@ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/pid_namespace.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/seqlock.h> +#include <linux/time.h> + +#define LOAD_INT(x) ((x) >> FSHIFT) +#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) + +static int loadavg_proc_show(struct seq_file *m, void *v) +{ + int a, b, c; + unsigned long seq; + + do { + seq = read_seqbegin(&xtime_lock); + a = avenrun[0] + (FIXED_1/200); + b = avenrun[1] + (FIXED_1/200); + c = avenrun[2] + (FIXED_1/200); + } while (read_seqretry(&xtime_lock, seq)); + + seq_printf(m, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n", + LOAD_INT(a), LOAD_FRAC(a), + LOAD_INT(b), LOAD_FRAC(b), + LOAD_INT(c), LOAD_FRAC(c), + nr_running(), nr_threads, + task_active_pid_ns(current)->last_pid); + return 0; +} + +static int loadavg_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, loadavg_proc_show, NULL); +} + +static const struct file_operations loadavg_proc_fops = { + .open = loadavg_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_loadavg_init(void) +{ + proc_create("loadavg", 0, NULL, &loadavg_proc_fops); + return 0; +} +module_init(proc_loadavg_init); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c new file mode 100644 index 000000000000..b1675c4e66da --- /dev/null +++ b/fs/proc/meminfo.c @@ -0,0 +1,168 @@ +#include <linux/fs.h> +#include <linux/hugetlb.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/mman.h> +#include <linux/mmzone.h> +#include <linux/proc_fs.h> +#include <linux/quicklist.h> +#include <linux/seq_file.h> +#include <linux/swap.h> +#include <linux/vmstat.h> +#include <asm/atomic.h> +#include <asm/page.h> +#include <asm/pgtable.h> +#include "internal.h" + +void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) +{ +} + +static int meminfo_proc_show(struct seq_file *m, void *v) +{ + struct sysinfo i; + unsigned long committed; + unsigned long allowed; + struct vmalloc_info vmi; + long cached; + unsigned long pages[NR_LRU_LISTS]; + int lru; + +/* + * display in kilobytes. + */ +#define K(x) ((x) << (PAGE_SHIFT - 10)) + si_meminfo(&i); + si_swapinfo(&i); + committed = atomic_long_read(&vm_committed_space); + allowed = ((totalram_pages - hugetlb_total_pages()) + * sysctl_overcommit_ratio / 100) + total_swap_pages; + + cached = global_page_state(NR_FILE_PAGES) - + total_swapcache_pages - i.bufferram; + if (cached < 0) + cached = 0; + + get_vmalloc_info(&vmi); + + for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) + pages[lru] = global_page_state(NR_LRU_BASE + lru); + + /* + * Tagged format, for easy grepping and expansion. + */ + seq_printf(m, + "MemTotal: %8lu kB\n" + "MemFree: %8lu kB\n" + "Buffers: %8lu kB\n" + "Cached: %8lu kB\n" + "SwapCached: %8lu kB\n" + "Active: %8lu kB\n" + "Inactive: %8lu kB\n" + "Active(anon): %8lu kB\n" + "Inactive(anon): %8lu kB\n" + "Active(file): %8lu kB\n" + "Inactive(file): %8lu kB\n" +#ifdef CONFIG_UNEVICTABLE_LRU + "Unevictable: %8lu kB\n" + "Mlocked: %8lu kB\n" +#endif +#ifdef CONFIG_HIGHMEM + "HighTotal: %8lu kB\n" + "HighFree: %8lu kB\n" + "LowTotal: %8lu kB\n" + "LowFree: %8lu kB\n" +#endif + "SwapTotal: %8lu kB\n" + "SwapFree: %8lu kB\n" + "Dirty: %8lu kB\n" + "Writeback: %8lu kB\n" + "AnonPages: %8lu kB\n" + "Mapped: %8lu kB\n" + "Slab: %8lu kB\n" + "SReclaimable: %8lu kB\n" + "SUnreclaim: %8lu kB\n" + "PageTables: %8lu kB\n" +#ifdef CONFIG_QUICKLIST + "Quicklists: %8lu kB\n" +#endif + "NFS_Unstable: %8lu kB\n" + "Bounce: %8lu kB\n" + "WritebackTmp: %8lu kB\n" + "CommitLimit: %8lu kB\n" + "Committed_AS: %8lu kB\n" + "VmallocTotal: %8lu kB\n" + "VmallocUsed: %8lu kB\n" + "VmallocChunk: %8lu kB\n", + K(i.totalram), + K(i.freeram), + K(i.bufferram), + K(cached), + K(total_swapcache_pages), + K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), + K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), + K(pages[LRU_ACTIVE_ANON]), + K(pages[LRU_INACTIVE_ANON]), + K(pages[LRU_ACTIVE_FILE]), + K(pages[LRU_INACTIVE_FILE]), +#ifdef CONFIG_UNEVICTABLE_LRU + K(pages[LRU_UNEVICTABLE]), + K(global_page_state(NR_MLOCK)), +#endif +#ifdef CONFIG_HIGHMEM + K(i.totalhigh), + K(i.freehigh), + K(i.totalram-i.totalhigh), + K(i.freeram-i.freehigh), +#endif + K(i.totalswap), + K(i.freeswap), + K(global_page_state(NR_FILE_DIRTY)), + K(global_page_state(NR_WRITEBACK)), + K(global_page_state(NR_ANON_PAGES)), + K(global_page_state(NR_FILE_MAPPED)), + K(global_page_state(NR_SLAB_RECLAIMABLE) + + global_page_state(NR_SLAB_UNRECLAIMABLE)), + K(global_page_state(NR_SLAB_RECLAIMABLE)), + K(global_page_state(NR_SLAB_UNRECLAIMABLE)), + K(global_page_state(NR_PAGETABLE)), +#ifdef CONFIG_QUICKLIST + K(quicklist_total_size()), +#endif + K(global_page_state(NR_UNSTABLE_NFS)), + K(global_page_state(NR_BOUNCE)), + K(global_page_state(NR_WRITEBACK_TEMP)), + K(allowed), + K(committed), + (unsigned long)VMALLOC_TOTAL >> 10, + vmi.used >> 10, + vmi.largest_chunk >> 10 + ); + + hugetlb_report_meminfo(m); + + arch_report_meminfo(m); + + return 0; +#undef K +} + +static int meminfo_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, meminfo_proc_show, NULL); +} + +static const struct file_operations meminfo_proc_fops = { + .open = meminfo_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_meminfo_init(void) +{ + proc_create("meminfo", 0, NULL, &meminfo_proc_fops); + return 0; +} +module_init(proc_meminfo_init); diff --git a/fs/proc/page.c b/fs/proc/page.c new file mode 100644 index 000000000000..767d95a6d1b1 --- /dev/null +++ b/fs/proc/page.c @@ -0,0 +1,147 @@ +#include <linux/bootmem.h> +#include <linux/compiler.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <asm/uaccess.h> +#include "internal.h" + +#define KPMSIZE sizeof(u64) +#define KPMMASK (KPMSIZE - 1) +/* /proc/kpagecount - an array exposing page counts + * + * Each entry is a u64 representing the corresponding + * physical page count. + */ +static ssize_t kpagecount_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 pcount; + + pfn = src / KPMSIZE; + count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EINVAL; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + pcount = 0; + else + pcount = page_mapcount(ppage); + + if (put_user(pcount, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static const struct file_operations proc_kpagecount_operations = { + .llseek = mem_lseek, + .read = kpagecount_read, +}; + +/* /proc/kpageflags - an array exposing page flags + * + * Each entry is a u64 representing the corresponding + * physical page flags. + */ + +/* These macros are used to decouple internal flags from exported ones */ + +#define KPF_LOCKED 0 +#define KPF_ERROR 1 +#define KPF_REFERENCED 2 +#define KPF_UPTODATE 3 +#define KPF_DIRTY 4 +#define KPF_LRU 5 +#define KPF_ACTIVE 6 +#define KPF_SLAB 7 +#define KPF_WRITEBACK 8 +#define KPF_RECLAIM 9 +#define KPF_BUDDY 10 + +#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) + +static ssize_t kpageflags_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + u64 __user *out = (u64 __user *)buf; + struct page *ppage; + unsigned long src = *ppos; + unsigned long pfn; + ssize_t ret = 0; + u64 kflags, uflags; + + pfn = src / KPMSIZE; + count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); + if (src & KPMMASK || count & KPMMASK) + return -EINVAL; + + while (count > 0) { + ppage = NULL; + if (pfn_valid(pfn)) + ppage = pfn_to_page(pfn); + pfn++; + if (!ppage) + kflags = 0; + else + kflags = ppage->flags; + + uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | + kpf_copy_bit(kflags, KPF_ERROR, PG_error) | + kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | + kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | + kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | + kpf_copy_bit(kflags, KPF_LRU, PG_lru) | + kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | + kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | + kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | + kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | + kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); + + if (put_user(uflags, out++)) { + ret = -EFAULT; + break; + } + + count -= KPMSIZE; + } + + *ppos += (char __user *)out - buf; + if (!ret) + ret = (char __user *)out - buf; + return ret; +} + +static const struct file_operations proc_kpageflags_operations = { + .llseek = mem_lseek, + .read = kpageflags_read, +}; + +static int __init proc_page_init(void) +{ + proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); + proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); + return 0; +} +module_init(proc_page_init); diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index eca471bc8512..d777789b7a89 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -4,6 +4,7 @@ * Copyright 1997 Paul Mackerras */ #include <linux/errno.h> +#include <linux/init.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> @@ -214,7 +215,7 @@ void proc_device_tree_add_node(struct device_node *np, /* * Called on initialization to set up the /proc/device-tree subtree */ -void proc_device_tree_init(void) +void __init proc_device_tree_init(void) { struct device_node *root; if ( !have_of ) diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c deleted file mode 100644 index 7ea52c79b2da..000000000000 --- a/fs/proc/proc_misc.c +++ /dev/null @@ -1,933 +0,0 @@ -/* - * linux/fs/proc/proc_misc.c - * - * linux/fs/proc/array.c - * Copyright (C) 1992 by Linus Torvalds - * based on ideas by Darren Senn - * - * This used to be the part of array.c. See the rest of history and credits - * there. I took this into a separate file and switched the thing to generic - * proc_file_inode_operations, leaving in array.c only per-process stuff. - * Inumbers allocation made dynamic (via create_proc_entry()). AV, May 1999. - * - * Changes: - * Fulton Green : Encapsulated position metric calculations. - * <kernel@FultonGreen.com> - */ - -#include <linux/types.h> -#include <linux/errno.h> -#include <linux/time.h> -#include <linux/kernel.h> -#include <linux/kernel_stat.h> -#include <linux/fs.h> -#include <linux/tty.h> -#include <linux/string.h> -#include <linux/mman.h> -#include <linux/quicklist.h> -#include <linux/proc_fs.h> -#include <linux/ioport.h> -#include <linux/mm.h> -#include <linux/mmzone.h> -#include <linux/pagemap.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/swap.h> -#include <linux/slab.h> -#include <linux/genhd.h> -#include <linux/smp.h> -#include <linux/signal.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/seq_file.h> -#include <linux/times.h> -#include <linux/profile.h> -#include <linux/utsname.h> -#include <linux/blkdev.h> -#include <linux/hugetlb.h> -#include <linux/jiffies.h> -#include <linux/vmalloc.h> -#include <linux/crash_dump.h> -#include <linux/pid_namespace.h> -#include <linux/bootmem.h> -#include <asm/uaccess.h> -#include <asm/pgtable.h> -#include <asm/io.h> -#include <asm/tlb.h> -#include <asm/div64.h> -#include "internal.h" - -#define LOAD_INT(x) ((x) >> FSHIFT) -#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100) -/* - * Warning: stuff below (imported functions) assumes that its output will fit - * into one page. For some of those functions it may be wrong. Moreover, we - * have a way to deal with that gracefully. Right now I used straightforward - * wrappers, but this needs further analysis wrt potential overflows. - */ -extern int get_hardware_list(char *); -extern int get_stram_list(char *); -extern int get_exec_domain_list(char *); - -static int proc_calc_metrics(char *page, char **start, off_t off, - int count, int *eof, int len) -{ - if (len <= off+count) *eof = 1; - *start = page + off; - len -= off; - if (len>count) len = count; - if (len<0) len = 0; - return len; -} - -static int loadavg_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int a, b, c; - int len; - unsigned long seq; - - do { - seq = read_seqbegin(&xtime_lock); - a = avenrun[0] + (FIXED_1/200); - b = avenrun[1] + (FIXED_1/200); - c = avenrun[2] + (FIXED_1/200); - } while (read_seqretry(&xtime_lock, seq)); - - len = sprintf(page,"%d.%02d %d.%02d %d.%02d %ld/%d %d\n", - LOAD_INT(a), LOAD_FRAC(a), - LOAD_INT(b), LOAD_FRAC(b), - LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, - task_active_pid_ns(current)->last_pid); - return proc_calc_metrics(page, start, off, count, eof, len); -} - -static int uptime_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct timespec uptime; - struct timespec idle; - int len; - cputime_t idletime = cputime_add(init_task.utime, init_task.stime); - - do_posix_clock_monotonic_gettime(&uptime); - monotonic_to_bootbased(&uptime); - cputime_to_timespec(idletime, &idle); - len = sprintf(page,"%lu.%02lu %lu.%02lu\n", - (unsigned long) uptime.tv_sec, - (uptime.tv_nsec / (NSEC_PER_SEC / 100)), - (unsigned long) idle.tv_sec, - (idle.tv_nsec / (NSEC_PER_SEC / 100))); - - return proc_calc_metrics(page, start, off, count, eof, len); -} - -int __attribute__((weak)) arch_report_meminfo(char *page) -{ - return 0; -} - -static int meminfo_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - struct sysinfo i; - int len; - unsigned long committed; - unsigned long allowed; - struct vmalloc_info vmi; - long cached; - unsigned long pages[NR_LRU_LISTS]; - int lru; - -/* - * display in kilobytes. - */ -#define K(x) ((x) << (PAGE_SHIFT - 10)) - si_meminfo(&i); - si_swapinfo(&i); - committed = atomic_long_read(&vm_committed_space); - allowed = ((totalram_pages - hugetlb_total_pages()) - * sysctl_overcommit_ratio / 100) + total_swap_pages; - - cached = global_page_state(NR_FILE_PAGES) - - total_swapcache_pages - i.bufferram; - if (cached < 0) - cached = 0; - - get_vmalloc_info(&vmi); - - for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) - pages[lru] = global_page_state(NR_LRU_BASE + lru); - - /* - * Tagged format, for easy grepping and expansion. - */ - len = sprintf(page, - "MemTotal: %8lu kB\n" - "MemFree: %8lu kB\n" - "Buffers: %8lu kB\n" - "Cached: %8lu kB\n" - "SwapCached: %8lu kB\n" - "Active: %8lu kB\n" - "Inactive: %8lu kB\n" - "Active(anon): %8lu kB\n" - "Inactive(anon): %8lu kB\n" - "Active(file): %8lu kB\n" - "Inactive(file): %8lu kB\n" -#ifdef CONFIG_UNEVICTABLE_LRU - "Unevictable: %8lu kB\n" - "Mlocked: %8lu kB\n" -#endif -#ifdef CONFIG_HIGHMEM - "HighTotal: %8lu kB\n" - "HighFree: %8lu kB\n" - "LowTotal: %8lu kB\n" - "LowFree: %8lu kB\n" -#endif - "SwapTotal: %8lu kB\n" - "SwapFree: %8lu kB\n" - "Dirty: %8lu kB\n" - "Writeback: %8lu kB\n" - "AnonPages: %8lu kB\n" - "Mapped: %8lu kB\n" - "Slab: %8lu kB\n" - "SReclaimable: %8lu kB\n" - "SUnreclaim: %8lu kB\n" - "PageTables: %8lu kB\n" -#ifdef CONFIG_QUICKLIST - "Quicklists: %8lu kB\n" -#endif - "NFS_Unstable: %8lu kB\n" - "Bounce: %8lu kB\n" - "WritebackTmp: %8lu kB\n" - "CommitLimit: %8lu kB\n" - "Committed_AS: %8lu kB\n" - "VmallocTotal: %8lu kB\n" - "VmallocUsed: %8lu kB\n" - "VmallocChunk: %8lu kB\n", - K(i.totalram), - K(i.freeram), - K(i.bufferram), - K(cached), - K(total_swapcache_pages), - K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), - K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), - K(pages[LRU_ACTIVE_ANON]), - K(pages[LRU_INACTIVE_ANON]), - K(pages[LRU_ACTIVE_FILE]), - K(pages[LRU_INACTIVE_FILE]), -#ifdef CONFIG_UNEVICTABLE_LRU - K(pages[LRU_UNEVICTABLE]), - K(global_page_state(NR_MLOCK)), -#endif -#ifdef CONFIG_HIGHMEM - K(i.totalhigh), - K(i.freehigh), - K(i.totalram-i.totalhigh), - K(i.freeram-i.freehigh), -#endif - K(i.totalswap), - K(i.freeswap), - K(global_page_state(NR_FILE_DIRTY)), - K(global_page_state(NR_WRITEBACK)), - K(global_page_state(NR_ANON_PAGES)), - K(global_page_state(NR_FILE_MAPPED)), - K(global_page_state(NR_SLAB_RECLAIMABLE) + - global_page_state(NR_SLAB_UNRECLAIMABLE)), - K(global_page_state(NR_SLAB_RECLAIMABLE)), - K(global_page_state(NR_SLAB_UNRECLAIMABLE)), - K(global_page_state(NR_PAGETABLE)), -#ifdef CONFIG_QUICKLIST - K(quicklist_total_size()), -#endif - K(global_page_state(NR_UNSTABLE_NFS)), - K(global_page_state(NR_BOUNCE)), - K(global_page_state(NR_WRITEBACK_TEMP)), - K(allowed), - K(committed), - (unsigned long)VMALLOC_TOTAL >> 10, - vmi.used >> 10, - vmi.largest_chunk >> 10 - ); - - len += hugetlb_report_meminfo(page + len); - - len += arch_report_meminfo(page + len); - - return proc_calc_metrics(page, start, off, count, eof, len); -#undef K -} - -static int fragmentation_open(struct inode *inode, struct file *file) -{ - (void)inode; - return seq_open(file, &fragmentation_op); -} - -static const struct file_operations fragmentation_file_operations = { - .open = fragmentation_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int pagetypeinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &pagetypeinfo_op); -} - -static const struct file_operations pagetypeinfo_file_ops = { - .open = pagetypeinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int zoneinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &zoneinfo_op); -} - -static const struct file_operations proc_zoneinfo_file_operations = { - .open = zoneinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int version_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = snprintf(page, PAGE_SIZE, linux_proc_banner, - utsname()->sysname, - utsname()->release, - utsname()->version); - return proc_calc_metrics(page, start, off, count, eof, len); -} - -extern const struct seq_operations cpuinfo_op; -static int cpuinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &cpuinfo_op); -} - -static const struct file_operations proc_cpuinfo_operations = { - .open = cpuinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int devinfo_show(struct seq_file *f, void *v) -{ - int i = *(loff_t *) v; - - if (i < CHRDEV_MAJOR_HASH_SIZE) { - if (i == 0) - seq_printf(f, "Character devices:\n"); - chrdev_show(f, i); - } -#ifdef CONFIG_BLOCK - else { - i -= CHRDEV_MAJOR_HASH_SIZE; - if (i == 0) - seq_printf(f, "\nBlock devices:\n"); - blkdev_show(f, i); - } -#endif - return 0; -} - -static void *devinfo_start(struct seq_file *f, loff_t *pos) -{ - if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) - return pos; - return NULL; -} - -static void *devinfo_next(struct seq_file *f, void *v, loff_t *pos) -{ - (*pos)++; - if (*pos >= (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE)) - return NULL; - return pos; -} - -static void devinfo_stop(struct seq_file *f, void *v) -{ - /* Nothing to do */ -} - -static const struct seq_operations devinfo_ops = { - .start = devinfo_start, - .next = devinfo_next, - .stop = devinfo_stop, - .show = devinfo_show -}; - -static int devinfo_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &devinfo_ops); -} - -static const struct file_operations proc_devinfo_operations = { - .open = devinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int vmstat_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &vmstat_op); -} -static const struct file_operations proc_vmstat_file_operations = { - .open = vmstat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#ifdef CONFIG_PROC_HARDWARE -static int hardware_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = get_hardware_list(page); - return proc_calc_metrics(page, start, off, count, eof, len); -} -#endif - -#ifdef CONFIG_STRAM_PROC -static int stram_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = get_stram_list(page); - return proc_calc_metrics(page, start, off, count, eof, len); -} -#endif - -#ifdef CONFIG_BLOCK -static int partitions_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &partitions_op); -} -static const struct file_operations proc_partitions_operations = { - .open = partitions_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int diskstats_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &diskstats_op); -} -static const struct file_operations proc_diskstats_operations = { - .open = diskstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif - -#ifdef CONFIG_MODULES -extern const struct seq_operations modules_op; -static int modules_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &modules_op); -} -static const struct file_operations proc_modules_operations = { - .open = modules_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif - -#ifdef CONFIG_SLABINFO -static int slabinfo_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &slabinfo_op); -} -static const struct file_operations proc_slabinfo_operations = { - .open = slabinfo_open, - .read = seq_read, - .write = slabinfo_write, - .llseek = seq_lseek, - .release = seq_release, -}; - -#ifdef CONFIG_DEBUG_SLAB_LEAK -extern const struct seq_operations slabstats_op; -static int slabstats_open(struct inode *inode, struct file *file) -{ - unsigned long *n = kzalloc(PAGE_SIZE, GFP_KERNEL); - int ret = -ENOMEM; - if (n) { - ret = seq_open(file, &slabstats_op); - if (!ret) { - struct seq_file *m = file->private_data; - *n = PAGE_SIZE / (2 * sizeof(unsigned long)); - m->private = n; - n = NULL; - } - kfree(n); - } - return ret; -} - -static const struct file_operations proc_slabstats_operations = { - .open = slabstats_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; -#endif -#endif - -#ifdef CONFIG_MMU -static int vmalloc_open(struct inode *inode, struct file *file) -{ - unsigned int *ptr = NULL; - int ret; - - if (NUMA_BUILD) - ptr = kmalloc(nr_node_ids * sizeof(unsigned int), GFP_KERNEL); - ret = seq_open(file, &vmalloc_op); - if (!ret) { - struct seq_file *m = file->private_data; - m->private = ptr; - } else - kfree(ptr); - return ret; -} - -static const struct file_operations proc_vmalloc_operations = { - .open = vmalloc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; -#endif - -#ifndef arch_irq_stat_cpu -#define arch_irq_stat_cpu(cpu) 0 -#endif -#ifndef arch_irq_stat -#define arch_irq_stat() 0 -#endif - -static int show_stat(struct seq_file *p, void *v) -{ - int i, j; - unsigned long jif; - cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; - cputime64_t guest; - u64 sum = 0; - struct timespec boottime; - unsigned int per_irq_sum; - - user = nice = system = idle = iowait = - irq = softirq = steal = cputime64_zero; - guest = cputime64_zero; - getboottime(&boottime); - jif = boottime.tv_sec; - - for_each_possible_cpu(i) { - user = cputime64_add(user, kstat_cpu(i).cpustat.user); - nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); - system = cputime64_add(system, kstat_cpu(i).cpustat.system); - idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); - iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); - irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); - softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); - steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); - guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); - - for_each_irq_nr(j) - sum += kstat_irqs_cpu(j, i); - - sum += arch_irq_stat_cpu(i); - } - sum += arch_irq_stat(); - - seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", - (unsigned long long)cputime64_to_clock_t(user), - (unsigned long long)cputime64_to_clock_t(nice), - (unsigned long long)cputime64_to_clock_t(system), - (unsigned long long)cputime64_to_clock_t(idle), - (unsigned long long)cputime64_to_clock_t(iowait), - (unsigned long long)cputime64_to_clock_t(irq), - (unsigned long long)cputime64_to_clock_t(softirq), - (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); - for_each_online_cpu(i) { - - /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ - user = kstat_cpu(i).cpustat.user; - nice = kstat_cpu(i).cpustat.nice; - system = kstat_cpu(i).cpustat.system; - idle = kstat_cpu(i).cpustat.idle; - iowait = kstat_cpu(i).cpustat.iowait; - irq = kstat_cpu(i).cpustat.irq; - softirq = kstat_cpu(i).cpustat.softirq; - steal = kstat_cpu(i).cpustat.steal; - guest = kstat_cpu(i).cpustat.guest; - seq_printf(p, - "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", - i, - (unsigned long long)cputime64_to_clock_t(user), - (unsigned long long)cputime64_to_clock_t(nice), - (unsigned long long)cputime64_to_clock_t(system), - (unsigned long long)cputime64_to_clock_t(idle), - (unsigned long long)cputime64_to_clock_t(iowait), - (unsigned long long)cputime64_to_clock_t(irq), - (unsigned long long)cputime64_to_clock_t(softirq), - (unsigned long long)cputime64_to_clock_t(steal), - (unsigned long long)cputime64_to_clock_t(guest)); - } - seq_printf(p, "intr %llu", (unsigned long long)sum); - - /* sum again ? it could be updated? */ - for_each_irq_nr(j) { - per_irq_sum = 0; - - for_each_possible_cpu(i) - per_irq_sum += kstat_irqs_cpu(j, i); - - seq_printf(p, " %u", per_irq_sum); - } - - seq_printf(p, - "\nctxt %llu\n" - "btime %lu\n" - "processes %lu\n" - "procs_running %lu\n" - "procs_blocked %lu\n", - nr_context_switches(), - (unsigned long)jif, - total_forks, - nr_running(), - nr_iowait()); - - return 0; -} - -static int stat_open(struct inode *inode, struct file *file) -{ - unsigned size = 4096 * (1 + num_possible_cpus() / 32); - char *buf; - struct seq_file *m; - int res; - - /* don't ask for more than the kmalloc() max size, currently 128 KB */ - if (size > 128 * 1024) - size = 128 * 1024; - buf = kmalloc(size, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - res = single_open(file, show_stat, NULL); - if (!res) { - m = file->private_data; - m->buf = buf; - m->size = size; - } else - kfree(buf); - return res; -} -static const struct file_operations proc_stat_operations = { - .open = stat_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -/* - * /proc/interrupts - */ -static void *int_seq_start(struct seq_file *f, loff_t *pos) -{ - return (*pos <= nr_irqs) ? pos : NULL; -} - - -static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) -{ - (*pos)++; - return (*pos <= nr_irqs) ? pos : NULL; -} - -static void int_seq_stop(struct seq_file *f, void *v) -{ - /* Nothing to do */ -} - -static const struct seq_operations int_seq_ops = { - .start = int_seq_start, - .next = int_seq_next, - .stop = int_seq_stop, - .show = show_interrupts -}; - -static int interrupts_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &int_seq_ops); -} - -static const struct file_operations proc_interrupts_operations = { - .open = interrupts_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -static int filesystems_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = get_filesystem_list(page); - return proc_calc_metrics(page, start, off, count, eof, len); -} - -static int cmdline_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len; - - len = sprintf(page, "%s\n", saved_command_line); - return proc_calc_metrics(page, start, off, count, eof, len); -} - -#ifdef CONFIG_FILE_LOCKING -static int locks_open(struct inode *inode, struct file *filp) -{ - return seq_open(filp, &locks_seq_operations); -} - -static const struct file_operations proc_locks_operations = { - .open = locks_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif /* CONFIG_FILE_LOCKING */ - -static int execdomains_read_proc(char *page, char **start, off_t off, - int count, int *eof, void *data) -{ - int len = get_exec_domain_list(page); - return proc_calc_metrics(page, start, off, count, eof, len); -} - -#ifdef CONFIG_PROC_PAGE_MONITOR -#define KPMSIZE sizeof(u64) -#define KPMMASK (KPMSIZE - 1) -/* /proc/kpagecount - an array exposing page counts - * - * Each entry is a u64 representing the corresponding - * physical page count. - */ -static ssize_t kpagecount_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - u64 __user *out = (u64 __user *)buf; - struct page *ppage; - unsigned long src = *ppos; - unsigned long pfn; - ssize_t ret = 0; - u64 pcount; - - pfn = src / KPMSIZE; - count = min_t(size_t, count, (max_pfn * KPMSIZE) - src); - if (src & KPMMASK || count & KPMMASK) - return -EINVAL; - - while (count > 0) { - ppage = NULL; - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - pfn++; - if (!ppage) - pcount = 0; - else - pcount = page_mapcount(ppage); - - if (put_user(pcount, out++)) { - ret = -EFAULT; - break; - } - - count -= KPMSIZE; - } - - *ppos += (char __user *)out - buf; - if (!ret) - ret = (char __user *)out - buf; - return ret; -} - -static struct file_operations proc_kpagecount_operations = { - .llseek = mem_lseek, - .read = kpagecount_read, -}; - -/* /proc/kpageflags - an array exposing page flags - * - * Each entry is a u64 representing the corresponding - * physical page flags. - */ - -/* These macros are used to decouple internal flags from exported ones */ - -#define KPF_LOCKED 0 -#define KPF_ERROR 1 -#define KPF_REFERENCED 2 -#define KPF_UPTODATE 3 -#define KPF_DIRTY 4 -#define KPF_LRU 5 -#define KPF_ACTIVE 6 -#define KPF_SLAB 7 -#define KPF_WRITEBACK 8 -#define KPF_RECLAIM 9 -#define KPF_BUDDY 10 - -#define kpf_copy_bit(flags, srcpos, dstpos) (((flags >> srcpos) & 1) << dstpos) - -static ssize_t kpageflags_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) -{ - u64 __user *out = (u64 __user *)buf; - struct page *ppage; - unsigned long src = *ppos; - unsigned long pfn; - ssize_t ret = 0; - u64 kflags, uflags; - - pfn = src / KPMSIZE; - count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src); - if (src & KPMMASK || count & KPMMASK) - return -EINVAL; - - while (count > 0) { - ppage = NULL; - if (pfn_valid(pfn)) - ppage = pfn_to_page(pfn); - pfn++; - if (!ppage) - kflags = 0; - else - kflags = ppage->flags; - - uflags = kpf_copy_bit(KPF_LOCKED, PG_locked, kflags) | - kpf_copy_bit(kflags, KPF_ERROR, PG_error) | - kpf_copy_bit(kflags, KPF_REFERENCED, PG_referenced) | - kpf_copy_bit(kflags, KPF_UPTODATE, PG_uptodate) | - kpf_copy_bit(kflags, KPF_DIRTY, PG_dirty) | - kpf_copy_bit(kflags, KPF_LRU, PG_lru) | - kpf_copy_bit(kflags, KPF_ACTIVE, PG_active) | - kpf_copy_bit(kflags, KPF_SLAB, PG_slab) | - kpf_copy_bit(kflags, KPF_WRITEBACK, PG_writeback) | - kpf_copy_bit(kflags, KPF_RECLAIM, PG_reclaim) | - kpf_copy_bit(kflags, KPF_BUDDY, PG_buddy); - - if (put_user(uflags, out++)) { - ret = -EFAULT; - break; - } - - count -= KPMSIZE; - } - - *ppos += (char __user *)out - buf; - if (!ret) - ret = (char __user *)out - buf; - return ret; -} - -static struct file_operations proc_kpageflags_operations = { - .llseek = mem_lseek, - .read = kpageflags_read, -}; -#endif /* CONFIG_PROC_PAGE_MONITOR */ - -struct proc_dir_entry *proc_root_kcore; - -void __init proc_misc_init(void) -{ - static struct { - char *name; - int (*read_proc)(char*,char**,off_t,int,int*,void*); - } *p, simple_ones[] = { - {"loadavg", loadavg_read_proc}, - {"uptime", uptime_read_proc}, - {"meminfo", meminfo_read_proc}, - {"version", version_read_proc}, -#ifdef CONFIG_PROC_HARDWARE - {"hardware", hardware_read_proc}, -#endif -#ifdef CONFIG_STRAM_PROC - {"stram", stram_read_proc}, -#endif - {"filesystems", filesystems_read_proc}, - {"cmdline", cmdline_read_proc}, - {"execdomains", execdomains_read_proc}, - {NULL,} - }; - for (p = simple_ones; p->name; p++) - create_proc_read_entry(p->name, 0, NULL, p->read_proc, NULL); - - proc_symlink("mounts", NULL, "self/mounts"); - - /* And now for trickier ones */ -#ifdef CONFIG_PRINTK - proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); -#endif -#ifdef CONFIG_FILE_LOCKING - proc_create("locks", 0, NULL, &proc_locks_operations); -#endif - proc_create("devices", 0, NULL, &proc_devinfo_operations); - proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); -#ifdef CONFIG_BLOCK - proc_create("partitions", 0, NULL, &proc_partitions_operations); -#endif - proc_create("stat", 0, NULL, &proc_stat_operations); - proc_create("interrupts", 0, NULL, &proc_interrupts_operations); -#ifdef CONFIG_SLABINFO - proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations); -#ifdef CONFIG_DEBUG_SLAB_LEAK - proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations); -#endif -#endif -#ifdef CONFIG_MMU - proc_create("vmallocinfo", S_IRUSR, NULL, &proc_vmalloc_operations); -#endif - proc_create("buddyinfo", S_IRUGO, NULL, &fragmentation_file_operations); - proc_create("pagetypeinfo", S_IRUGO, NULL, &pagetypeinfo_file_ops); - proc_create("vmstat", S_IRUGO, NULL, &proc_vmstat_file_operations); - proc_create("zoneinfo", S_IRUGO, NULL, &proc_zoneinfo_file_operations); -#ifdef CONFIG_BLOCK - proc_create("diskstats", 0, NULL, &proc_diskstats_operations); -#endif -#ifdef CONFIG_MODULES - proc_create("modules", 0, NULL, &proc_modules_operations); -#endif -#ifdef CONFIG_SCHEDSTATS - proc_create("schedstat", 0, NULL, &proc_schedstat_operations); -#endif -#ifdef CONFIG_PROC_KCORE - proc_root_kcore = proc_create("kcore", S_IRUSR, NULL, &proc_kcore_operations); - if (proc_root_kcore) - proc_root_kcore->size = - (size_t)high_memory - PAGE_OFFSET + PAGE_SIZE; -#endif -#ifdef CONFIG_PROC_PAGE_MONITOR - proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations); - proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations); -#endif -#ifdef CONFIG_PROC_VMCORE - proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); -#endif -} diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 945a81043ba2..94fcfff6863a 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1,7 +1,7 @@ /* * /proc/sys support */ - +#include <linux/init.h> #include <linux/sysctl.h> #include <linux/proc_fs.h> #include <linux/security.h> @@ -298,13 +298,19 @@ static int proc_sys_permission(struct inode *inode, int mask) * sysctl entries that are not writeable, * are _NOT_ writeable, capabilities or not. */ - struct ctl_table_header *head = grab_header(inode); - struct ctl_table *table = PROC_I(inode)->sysctl_entry; + struct ctl_table_header *head; + struct ctl_table *table; int error; + /* Executable files are not allowed under /proc/sys/ */ + if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) + return -EACCES; + + head = grab_header(inode); if (IS_ERR(head)) return PTR_ERR(head); + table = PROC_I(inode)->sysctl_entry; if (!table) /* global root - r-xr-xr-x */ error = mask & MAY_WRITE ? -EACCES : 0; else /* Use the permissions on the sysctl table entry */ @@ -353,6 +359,7 @@ static const struct file_operations proc_sys_file_operations = { static const struct file_operations proc_sys_dir_file_operations = { .readdir = proc_sys_readdir, + .llseek = generic_file_llseek, }; static const struct inode_operations proc_sys_inode_operations = { @@ -395,7 +402,7 @@ static struct dentry_operations proc_sys_dentry_operations = { .d_compare = proc_sys_compare, }; -int proc_sys_init(void) +int __init proc_sys_init(void) { struct proc_dir_entry *proc_sys_root; diff --git a/fs/proc/root.c b/fs/proc/root.c index 95117538a4f6..7761602af9de 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -104,9 +104,9 @@ static struct file_system_type proc_fs_type = { void __init proc_root_init(void) { - int err = proc_init_inodecache(); - if (err) - return; + int err; + + proc_init_inodecache(); err = register_filesystem(&proc_fs_type); if (err) return; @@ -117,7 +117,7 @@ void __init proc_root_init(void) return; } - proc_misc_init(); + proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); diff --git a/fs/proc/stat.c b/fs/proc/stat.c new file mode 100644 index 000000000000..81904f07679d --- /dev/null +++ b/fs/proc/stat.c @@ -0,0 +1,153 @@ +#include <linux/cpumask.h> +#include <linux/fs.h> +#include <linux/gfp.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/kernel_stat.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/time.h> +#include <asm/cputime.h> + +#ifndef arch_irq_stat_cpu +#define arch_irq_stat_cpu(cpu) 0 +#endif +#ifndef arch_irq_stat +#define arch_irq_stat() 0 +#endif + +static int show_stat(struct seq_file *p, void *v) +{ + int i, j; + unsigned long jif; + cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; + cputime64_t guest; + u64 sum = 0; + struct timespec boottime; + unsigned int per_irq_sum; + + user = nice = system = idle = iowait = + irq = softirq = steal = cputime64_zero; + guest = cputime64_zero; + getboottime(&boottime); + jif = boottime.tv_sec; + + for_each_possible_cpu(i) { + user = cputime64_add(user, kstat_cpu(i).cpustat.user); + nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); + system = cputime64_add(system, kstat_cpu(i).cpustat.system); + idle = cputime64_add(idle, kstat_cpu(i).cpustat.idle); + iowait = cputime64_add(iowait, kstat_cpu(i).cpustat.iowait); + irq = cputime64_add(irq, kstat_cpu(i).cpustat.irq); + softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); + steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); + guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); + + for_each_irq_nr(j) + sum += kstat_irqs_cpu(j, i); + + sum += arch_irq_stat_cpu(i); + } + sum += arch_irq_stat(); + + seq_printf(p, "cpu %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + (unsigned long long)cputime64_to_clock_t(user), + (unsigned long long)cputime64_to_clock_t(nice), + (unsigned long long)cputime64_to_clock_t(system), + (unsigned long long)cputime64_to_clock_t(idle), + (unsigned long long)cputime64_to_clock_t(iowait), + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), + (unsigned long long)cputime64_to_clock_t(steal), + (unsigned long long)cputime64_to_clock_t(guest)); + for_each_online_cpu(i) { + + /* Copy values here to work around gcc-2.95.3, gcc-2.96 */ + user = kstat_cpu(i).cpustat.user; + nice = kstat_cpu(i).cpustat.nice; + system = kstat_cpu(i).cpustat.system; + idle = kstat_cpu(i).cpustat.idle; + iowait = kstat_cpu(i).cpustat.iowait; + irq = kstat_cpu(i).cpustat.irq; + softirq = kstat_cpu(i).cpustat.softirq; + steal = kstat_cpu(i).cpustat.steal; + guest = kstat_cpu(i).cpustat.guest; + seq_printf(p, + "cpu%d %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", + i, + (unsigned long long)cputime64_to_clock_t(user), + (unsigned long long)cputime64_to_clock_t(nice), + (unsigned long long)cputime64_to_clock_t(system), + (unsigned long long)cputime64_to_clock_t(idle), + (unsigned long long)cputime64_to_clock_t(iowait), + (unsigned long long)cputime64_to_clock_t(irq), + (unsigned long long)cputime64_to_clock_t(softirq), + (unsigned long long)cputime64_to_clock_t(steal), + (unsigned long long)cputime64_to_clock_t(guest)); + } + seq_printf(p, "intr %llu", (unsigned long long)sum); + + /* sum again ? it could be updated? */ + for_each_irq_nr(j) { + per_irq_sum = 0; + + for_each_possible_cpu(i) + per_irq_sum += kstat_irqs_cpu(j, i); + + seq_printf(p, " %u", per_irq_sum); + } + + seq_printf(p, + "\nctxt %llu\n" + "btime %lu\n" + "processes %lu\n" + "procs_running %lu\n" + "procs_blocked %lu\n", + nr_context_switches(), + (unsigned long)jif, + total_forks, + nr_running(), + nr_iowait()); + + return 0; +} + +static int stat_open(struct inode *inode, struct file *file) +{ + unsigned size = 4096 * (1 + num_possible_cpus() / 32); + char *buf; + struct seq_file *m; + int res; + + /* don't ask for more than the kmalloc() max size, currently 128 KB */ + if (size > 128 * 1024) + size = 128 * 1024; + buf = kmalloc(size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + res = single_open(file, show_stat, NULL); + if (!res) { + m = file->private_data; + m->buf = buf; + m->size = size; + } else + kfree(buf); + return res; +} + +static const struct file_operations proc_stat_operations = { + .open = stat_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_stat_init(void) +{ + proc_create("stat", 0, NULL, &proc_stat_operations); + return 0; +} +module_init(proc_stat_init); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 4806830ea2a1..b770c095e45c 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -198,11 +198,8 @@ static int do_maps_open(struct inode *inode, struct file *file, return ret; } -static int show_map(struct seq_file *m, void *v) +static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) { - struct proc_maps_private *priv = m->private; - struct task_struct *task = priv->task; - struct vm_area_struct *vma = v; struct mm_struct *mm = vma->vm_mm; struct file *file = vma->vm_file; int flags = vma->vm_flags; @@ -254,6 +251,15 @@ static int show_map(struct seq_file *m, void *v) } } seq_putc(m, '\n'); +} + +static int show_map(struct seq_file *m, void *v) +{ + struct vm_area_struct *vma = v; + struct proc_maps_private *priv = m->private; + struct task_struct *task = priv->task; + + show_map_vma(m, vma); if (m->count < m->size) /* vma is copied successfully */ m->version = (vma != get_gate_vma(task))? vma->vm_start: 0; @@ -364,9 +370,10 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, static int show_smap(struct seq_file *m, void *v) { + struct proc_maps_private *priv = m->private; + struct task_struct *task = priv->task; struct vm_area_struct *vma = v; struct mem_size_stats mss; - int ret; struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range, .mm = vma->vm_mm, @@ -378,9 +385,7 @@ static int show_smap(struct seq_file *m, void *v) if (vma->vm_mm && !is_vm_hugetlb_page(vma)) walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); - ret = show_map(m, v); - if (ret) - return ret; + show_map_vma(m, vma); seq_printf(m, "Size: %8lu kB\n" @@ -402,7 +407,9 @@ static int show_smap(struct seq_file *m, void *v) mss.referenced >> 10, mss.swap >> 10); - return ret; + if (m->count < m->size) /* vma is copied successfully */ + m->version = (vma != get_gate_vma(task)) ? vma->vm_start : 0; + return 0; } static const struct seq_operations proc_pid_smaps_op = { diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c new file mode 100644 index 000000000000..df26aa88fa47 --- /dev/null +++ b/fs/proc/uptime.c @@ -0,0 +1,45 @@ +#include <linux/init.h> +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/time.h> +#include <asm/cputime.h> + +static int proc_calc_metrics(char *page, char **start, off_t off, + int count, int *eof, int len) +{ + if (len <= off + count) + *eof = 1; + *start = page + off; + len -= off; + if (len > count) + len = count; + if (len < 0) + len = 0; + return len; +} + +static int uptime_read_proc(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct timespec uptime; + struct timespec idle; + int len; + cputime_t idletime = cputime_add(init_task.utime, init_task.stime); + + do_posix_clock_monotonic_gettime(&uptime); + monotonic_to_bootbased(&uptime); + cputime_to_timespec(idletime, &idle); + len = sprintf(page, "%lu.%02lu %lu.%02lu\n", + (unsigned long) uptime.tv_sec, + (uptime.tv_nsec / (NSEC_PER_SEC / 100)), + (unsigned long) idle.tv_sec, + (idle.tv_nsec / (NSEC_PER_SEC / 100))); + return proc_calc_metrics(page, start, off, count, eof, len); +} + +static int __init proc_uptime_init(void) +{ + create_proc_read_entry("uptime", 0, NULL, uptime_read_proc, NULL); + return 0; +} +module_init(proc_uptime_init); diff --git a/fs/proc/version.c b/fs/proc/version.c new file mode 100644 index 000000000000..76817a60678c --- /dev/null +++ b/fs/proc/version.c @@ -0,0 +1,34 @@ +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/utsname.h> + +static int version_proc_show(struct seq_file *m, void *v) +{ + seq_printf(m, linux_proc_banner, + utsname()->sysname, + utsname()->release, + utsname()->version); + return 0; +} + +static int version_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, version_proc_show, NULL); +} + +static const struct file_operations version_proc_fops = { + .open = version_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init proc_version_init(void) +{ + proc_create("version", 0, NULL, &version_proc_fops); + return 0; +} +module_init(proc_version_init); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index cd9ca67f841b..03ec59504906 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -32,7 +32,7 @@ static size_t elfcorebuf_sz; /* Total size of vmcore file. */ static u64 vmcore_size; -struct proc_dir_entry *proc_vmcore = NULL; +static struct proc_dir_entry *proc_vmcore = NULL; /* Reads a page from the oldmem device from given offset. */ static ssize_t read_from_oldmem(char *buf, size_t count, @@ -162,7 +162,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, return acc; } -const struct file_operations proc_vmcore_operations = { +static const struct file_operations proc_vmcore_operations = { .read = read_vmcore, }; @@ -652,7 +652,7 @@ static int __init vmcore_init(void) return rc; } - /* Initialize /proc/vmcore size if proc is already up. */ + proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); if (proc_vmcore) proc_vmcore->size = vmcore_size; return 0; diff --git a/fs/read_write.c b/fs/read_write.c index 9ba495d5a29b..969a6d9c020b 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -31,39 +31,61 @@ const struct file_operations generic_ro_fops = { EXPORT_SYMBOL(generic_ro_fops); +/** + * generic_file_llseek_unlocked - lockless generic llseek implementation + * @file: file structure to seek on + * @offset: file offset to seek to + * @origin: type of seek + * + * Updates the file offset to the value specified by @offset and @origin. + * Locking must be provided by the caller. + */ loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) { - loff_t retval; struct inode *inode = file->f_mapping->host; switch (origin) { - case SEEK_END: - offset += inode->i_size; - break; - case SEEK_CUR: - offset += file->f_pos; + case SEEK_END: + offset += inode->i_size; + break; + case SEEK_CUR: + offset += file->f_pos; + break; } - retval = -EINVAL; - if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { - /* Special lock needed here? */ - if (offset != file->f_pos) { - file->f_pos = offset; - file->f_version = 0; - } - retval = offset; + + if (offset < 0 || offset > inode->i_sb->s_maxbytes) + return -EINVAL; + + /* Special lock needed here? */ + if (offset != file->f_pos) { + file->f_pos = offset; + file->f_version = 0; } - return retval; + + return offset; } EXPORT_SYMBOL(generic_file_llseek_unlocked); +/** + * generic_file_llseek - generic llseek implementation for regular files + * @file: file structure to seek on + * @offset: file offset to seek to + * @origin: type of seek + * + * This is a generic implemenation of ->llseek useable for all normal local + * filesystems. It just updates the file offset to the value specified by + * @offset and @origin under i_mutex. + */ loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) { - loff_t n; + loff_t rval; + mutex_lock(&file->f_dentry->d_inode->i_mutex); - n = generic_file_llseek_unlocked(file, offset, origin); + rval = generic_file_llseek_unlocked(file, offset, origin); mutex_unlock(&file->f_dentry->d_inode->i_mutex); - return n; + + return rval; } EXPORT_SYMBOL(generic_file_llseek); diff --git a/fs/readdir.c b/fs/readdir.c index 93a7559bbfd8..b318d9b5af2e 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -117,7 +117,7 @@ asmlinkage long old_readdir(unsigned int fd, struct old_linux_dirent __user * di buf.dirent = dirent; error = vfs_readdir(file, fillonedir, &buf); - if (error >= 0) + if (buf.result) error = buf.result; fput(file); @@ -209,9 +209,8 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren buf.error = 0; error = vfs_readdir(file, filldir, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { if (put_user(file->f_pos, &lastdirent->d_off)) @@ -219,8 +218,6 @@ asmlinkage long sys_getdents(unsigned int fd, struct linux_dirent __user * diren else error = count - buf.count; } - -out_putf: fput(file); out: return error; @@ -293,19 +290,16 @@ asmlinkage long sys_getdents64(unsigned int fd, struct linux_dirent64 __user * d buf.error = 0; error = vfs_readdir(file, filldir64, &buf); - if (error < 0) - goto out_putf; - error = buf.error; + if (error >= 0) + error = buf.error; lastdirent = buf.previous; if (lastdirent) { typeof(lastdirent->d_off) d_off = file->f_pos; - error = -EFAULT; if (__put_user(d_off, &lastdirent->d_off)) - goto out_putf; - error = count - buf.count; + error = -EFAULT; + else + error = count - buf.count; } - -out_putf: fput(file); out: return error; diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index a804903d31d1..33408417038c 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -296,6 +296,7 @@ const struct file_operations reiserfs_file_operations = { .aio_write = generic_file_aio_write, .splice_read = generic_file_splice_read, .splice_write = generic_file_splice_write, + .llseek = generic_file_llseek, }; const struct inode_operations reiserfs_file_inode_operations = { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 5699171212ae..6c4c2c69449f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1522,7 +1522,6 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, { struct cpu_key key; - struct dentry *result; struct inode *inode; key.on_disk_key.k_objectid = objectid; @@ -1535,16 +1534,8 @@ static struct dentry *reiserfs_get_dentry(struct super_block *sb, inode = NULL; } reiserfs_write_unlock(sb); - if (!inode) - inode = ERR_PTR(-ESTALE); - if (IS_ERR(inode)) - return ERR_CAST(inode); - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + + return d_obtain_alias(inode); } struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid, diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c21df71943a6..9643c3bbeb3b 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2575,7 +2575,7 @@ static int release_journal_dev(struct super_block *super, if (journal->j_dev_bd != NULL) { if (journal->j_dev_bd->bd_dev != super->s_dev) bd_release(journal->j_dev_bd); - result = blkdev_put(journal->j_dev_bd); + result = blkdev_put(journal->j_dev_bd, journal->j_dev_mode); journal->j_dev_bd = NULL; } @@ -2593,7 +2593,7 @@ static int journal_init_dev(struct super_block *super, { int result; dev_t jdev; - int blkdev_mode = FMODE_READ | FMODE_WRITE; + fmode_t blkdev_mode = FMODE_READ | FMODE_WRITE; char b[BDEVNAME_SIZE]; result = 0; @@ -2608,6 +2608,7 @@ static int journal_init_dev(struct super_block *super, /* there is no "jdev" option and journal is on separate device */ if ((!jdev_name || !jdev_name[0])) { journal->j_dev_bd = open_by_devnum(jdev, blkdev_mode); + journal->j_dev_mode = blkdev_mode; if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; @@ -2618,7 +2619,7 @@ static int journal_init_dev(struct super_block *super, } else if (jdev != super->s_dev) { result = bd_claim(journal->j_dev_bd, journal); if (result) { - blkdev_put(journal->j_dev_bd); + blkdev_put(journal->j_dev_bd, blkdev_mode); return result; } @@ -2628,7 +2629,9 @@ static int journal_init_dev(struct super_block *super, return 0; } - journal->j_dev_bd = open_bdev_excl(jdev_name, 0, journal); + journal->j_dev_mode = blkdev_mode; + journal->j_dev_bd = open_bdev_exclusive(jdev_name, + blkdev_mode, journal); if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index c1add28dd45e..f89ebb943f3f 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -383,7 +383,6 @@ struct dentry *reiserfs_get_parent(struct dentry *child) struct inode *inode = NULL; struct reiserfs_dir_entry de; INITIALIZE_PATH(path_to_entry); - struct dentry *parent; struct inode *dir = child->d_inode; if (dir->i_nlink == 0) { @@ -401,15 +400,7 @@ struct dentry *reiserfs_get_parent(struct dentry *child) inode = reiserfs_iget(dir->i_sb, (struct cpu_key *)&(de.de_dir_id)); reiserfs_write_unlock(dir->i_sb); - if (!inode || IS_ERR(inode)) { - return ERR_PTR(-EACCES); - } - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - return parent; + return d_obtain_alias(inode); } /* add entry to the directory (entry can be hidden). diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index d318c7e663fa..663a91f5dce8 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2058,10 +2058,10 @@ static int reiserfs_quota_on_mount(struct super_block *sb, int type) * Standard function to be called on quota_on */ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, - char *path, int remount) + char *name, int remount) { int err; - struct nameidata nd; + struct path path; struct inode *inode; struct reiserfs_transaction_handle th; @@ -2069,16 +2069,16 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, return -EINVAL; /* No more checks needed? Path and format_id are bogus anyway... */ if (remount) - return vfs_quota_on(sb, type, format_id, path, 1); - err = path_lookup(path, LOOKUP_FOLLOW, &nd); + return vfs_quota_on(sb, type, format_id, name, 1); + err = kern_path(name, LOOKUP_FOLLOW, &path); if (err) return err; /* Quotafile not on the same filesystem? */ - if (nd.path.mnt->mnt_sb != sb) { + if (path.mnt->mnt_sb != sb) { err = -EXDEV; goto out; } - inode = nd.path.dentry->d_inode; + inode = path.dentry->d_inode; /* We must not pack tails for quota files on reiserfs for quota IO to work */ if (!(REISERFS_I(inode)->i_flags & i_nopack_mask)) { err = reiserfs_unpack(inode, NULL); @@ -2094,7 +2094,7 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, /* Journaling quota? */ if (REISERFS_SB(sb)->s_qf_names[type]) { /* Quotafile not of fs root? */ - if (nd.path.dentry->d_parent->d_inode != sb->s_root->d_inode) + if (path.dentry->d_parent != sb->s_root) reiserfs_warning(sb, "reiserfs: Quota file not on filesystem root. " "Journalled quota will not work."); @@ -2113,9 +2113,9 @@ static int reiserfs_quota_on(struct super_block *sb, int type, int format_id, if (err) goto out; } - err = vfs_quota_on_path(sb, type, format_id, &nd.path); + err = vfs_quota_on_path(sb, type, format_id, &path); out: - path_put(&nd.path); + path_put(&path); return err; } diff --git a/fs/select.c b/fs/select.c index da0e88201c3a..87df51eadcf2 100644 --- a/fs/select.c +++ b/fs/select.c @@ -24,9 +24,64 @@ #include <linux/fdtable.h> #include <linux/fs.h> #include <linux/rcupdate.h> +#include <linux/hrtimer.h> #include <asm/uaccess.h> + +/* + * Estimate expected accuracy in ns from a timeval. + * + * After quite a bit of churning around, we've settled on + * a simple thing of taking 0.1% of the timeout as the + * slack, with a cap of 100 msec. + * "nice" tasks get a 0.5% slack instead. + * + * Consider this comment an open invitation to come up with even + * better solutions.. + */ + +static long __estimate_accuracy(struct timespec *tv) +{ + long slack; + int divfactor = 1000; + + if (task_nice(current) > 0) + divfactor = divfactor / 5; + + slack = tv->tv_nsec / divfactor; + slack += tv->tv_sec * (NSEC_PER_SEC/divfactor); + + if (slack > 100 * NSEC_PER_MSEC) + slack = 100 * NSEC_PER_MSEC; + + if (slack < 0) + slack = 0; + return slack; +} + +static long estimate_accuracy(struct timespec *tv) +{ + unsigned long ret; + struct timespec now; + + /* + * Realtime tasks get a slack of 0 for obvious reasons. + */ + + if (rt_task(current)) + return 0; + + ktime_get_ts(&now); + now = timespec_sub(*tv, now); + ret = __estimate_accuracy(&now); + if (ret < current->timer_slack_ns) + return current->timer_slack_ns; + return ret; +} + + + struct poll_table_page { struct poll_table_page * next; struct poll_table_entry * entry; @@ -130,6 +185,79 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address, add_wait_queue(wait_address, &entry->wait); } +/** + * poll_select_set_timeout - helper function to setup the timeout value + * @to: pointer to timespec variable for the final timeout + * @sec: seconds (from user space) + * @nsec: nanoseconds (from user space) + * + * Note, we do not use a timespec for the user space value here, That + * way we can use the function for timeval and compat interfaces as well. + * + * Returns -EINVAL if sec/nsec are not normalized. Otherwise 0. + */ +int poll_select_set_timeout(struct timespec *to, long sec, long nsec) +{ + struct timespec ts = {.tv_sec = sec, .tv_nsec = nsec}; + + if (!timespec_valid(&ts)) + return -EINVAL; + + /* Optimize for the zero timeout value here */ + if (!sec && !nsec) { + to->tv_sec = to->tv_nsec = 0; + } else { + ktime_get_ts(to); + *to = timespec_add_safe(*to, ts); + } + return 0; +} + +static int poll_select_copy_remaining(struct timespec *end_time, void __user *p, + int timeval, int ret) +{ + struct timespec rts; + struct timeval rtv; + + if (!p) + return ret; + + if (current->personality & STICKY_TIMEOUTS) + goto sticky; + + /* No update for zero timeout */ + if (!end_time->tv_sec && !end_time->tv_nsec) + return ret; + + ktime_get_ts(&rts); + rts = timespec_sub(*end_time, rts); + if (rts.tv_sec < 0) + rts.tv_sec = rts.tv_nsec = 0; + + if (timeval) { + rtv.tv_sec = rts.tv_sec; + rtv.tv_usec = rts.tv_nsec / NSEC_PER_USEC; + + if (!copy_to_user(p, &rtv, sizeof(rtv))) + return ret; + + } else if (!copy_to_user(p, &rts, sizeof(rts))) + return ret; + + /* + * If an application puts its timeval in read-only memory, we + * don't want the Linux-specific update to the timeval to + * cause a fault after the select has completed + * successfully. However, because we're not updating the + * timeval, we can't restart the system call. + */ + +sticky: + if (ret == -ERESTARTNOHAND) + ret = -EINTR; + return ret; +} + #define FDS_IN(fds, n) (fds->in + n) #define FDS_OUT(fds, n) (fds->out + n) #define FDS_EX(fds, n) (fds->ex + n) @@ -182,11 +310,13 @@ get_max: #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) -int do_select(int n, fd_set_bits *fds, s64 *timeout) +int do_select(int n, fd_set_bits *fds, struct timespec *end_time) { + ktime_t expire, *to = NULL; struct poll_wqueues table; poll_table *wait; - int retval, i; + int retval, i, timed_out = 0; + unsigned long slack = 0; rcu_read_lock(); retval = max_select_fd(n, fds); @@ -198,12 +328,17 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) poll_initwait(&table); wait = &table.pt; - if (!*timeout) + if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { wait = NULL; + timed_out = 1; + } + + if (end_time && !timed_out) + slack = estimate_accuracy(end_time); + retval = 0; for (;;) { unsigned long *rinp, *routp, *rexp, *inp, *outp, *exp; - long __timeout; set_current_state(TASK_INTERRUPTIBLE); @@ -259,27 +394,25 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) cond_resched(); } wait = NULL; - if (retval || !*timeout || signal_pending(current)) + if (retval || timed_out || signal_pending(current)) break; if (table.error) { retval = table.error; break; } - if (*timeout < 0) { - /* Wait indefinitely */ - __timeout = MAX_SCHEDULE_TIMEOUT; - } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT - 1)) { - /* Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in a loop */ - __timeout = MAX_SCHEDULE_TIMEOUT - 1; - *timeout -= __timeout; - } else { - __timeout = *timeout; - *timeout = 0; + /* + * If this is the first loop and we have a timeout + * given, then we convert to ktime_t and set the to + * pointer to the expiry value. + */ + if (end_time && !to) { + expire = timespec_to_ktime(*end_time); + to = &expire; } - __timeout = schedule_timeout(__timeout); - if (*timeout >= 0) - *timeout += __timeout; + + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + timed_out = 1; } __set_current_state(TASK_RUNNING); @@ -300,7 +433,7 @@ int do_select(int n, fd_set_bits *fds, s64 *timeout) ((unsigned long) (MAX_SCHEDULE_TIMEOUT / HZ)-1) int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, - fd_set __user *exp, s64 *timeout) + fd_set __user *exp, struct timespec *end_time) { fd_set_bits fds; void *bits; @@ -351,7 +484,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, zero_fd_set(n, fds.res_out); zero_fd_set(n, fds.res_ex); - ret = do_select(n, &fds, timeout); + ret = do_select(n, &fds, end_time); if (ret < 0) goto out; @@ -377,7 +510,7 @@ out_nofds: asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timeval __user *tvp) { - s64 timeout = -1; + struct timespec end_time, *to = NULL; struct timeval tv; int ret; @@ -385,43 +518,15 @@ asmlinkage long sys_select(int n, fd_set __user *inp, fd_set __user *outp, if (copy_from_user(&tv, tvp, sizeof(tv))) return -EFAULT; - if (tv.tv_sec < 0 || tv.tv_usec < 0) + to = &end_time; + if (poll_select_set_timeout(to, + tv.tv_sec + (tv.tv_usec / USEC_PER_SEC), + (tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)tv.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(tv.tv_usec, USEC_PER_SEC/HZ); - timeout += tv.tv_sec * HZ; - } } - ret = core_sys_select(n, inp, outp, exp, &timeout); - - if (tvp) { - struct timeval rtv; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rtv.tv_usec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)); - rtv.tv_sec = timeout; - if (timeval_compare(&rtv, &tv) >= 0) - rtv = tv; - if (copy_to_user(tvp, &rtv, sizeof(rtv))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = core_sys_select(n, inp, outp, exp, to); + ret = poll_select_copy_remaining(&end_time, tvp, 1, ret); return ret; } @@ -431,25 +536,17 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec __user *tsp, const sigset_t __user *sigmask, size_t sigsetsize) { - s64 timeout = MAX_SCHEDULE_TIMEOUT; sigset_t ksigmask, sigsaved; - struct timespec ts; + struct timespec ts, end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - if (ts.tv_sec < 0 || ts.tv_nsec < 0) + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) return -EINVAL; - - /* Cast to u64 to make GCC stop complaining */ - if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); - timeout += ts.tv_sec * HZ; - } } if (sigmask) { @@ -463,32 +560,8 @@ asmlinkage long sys_pselect7(int n, fd_set __user *inp, fd_set __user *outp, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = core_sys_select(n, inp, outp, exp, &timeout); - - if (tsp) { - struct timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { -sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND) - ret = -EINTR; - } - } + ret = core_sys_select(n, inp, outp, exp, &end_time); + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); if (ret == -ERESTARTNOHAND) { /* @@ -574,18 +647,24 @@ static inline unsigned int do_pollfd(struct pollfd *pollfd, poll_table *pwait) } static int do_poll(unsigned int nfds, struct poll_list *list, - struct poll_wqueues *wait, s64 *timeout) + struct poll_wqueues *wait, struct timespec *end_time) { - int count = 0; poll_table* pt = &wait->pt; + ktime_t expire, *to = NULL; + int timed_out = 0, count = 0; + unsigned long slack = 0; /* Optimise the no-wait case */ - if (!(*timeout)) + if (end_time && !end_time->tv_sec && !end_time->tv_nsec) { pt = NULL; + timed_out = 1; + } + + if (end_time && !timed_out) + slack = estimate_accuracy(end_time); for (;;) { struct poll_list *walk; - long __timeout; set_current_state(TASK_INTERRUPTIBLE); for (walk = list; walk != NULL; walk = walk->next) { @@ -617,27 +696,21 @@ static int do_poll(unsigned int nfds, struct poll_list *list, if (signal_pending(current)) count = -EINTR; } - if (count || !*timeout) + if (count || timed_out) break; - if (*timeout < 0) { - /* Wait indefinitely */ - __timeout = MAX_SCHEDULE_TIMEOUT; - } else if (unlikely(*timeout >= (s64)MAX_SCHEDULE_TIMEOUT-1)) { - /* - * Wait for longer than MAX_SCHEDULE_TIMEOUT. Do it in - * a loop - */ - __timeout = MAX_SCHEDULE_TIMEOUT - 1; - *timeout -= __timeout; - } else { - __timeout = *timeout; - *timeout = 0; + /* + * If this is the first loop and we have a timeout + * given, then we convert to ktime_t and set the to + * pointer to the expiry value. + */ + if (end_time && !to) { + expire = timespec_to_ktime(*end_time); + to = &expire; } - __timeout = schedule_timeout(__timeout); - if (*timeout >= 0) - *timeout += __timeout; + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) + timed_out = 1; } __set_current_state(TASK_RUNNING); return count; @@ -646,7 +719,8 @@ static int do_poll(unsigned int nfds, struct poll_list *list, #define N_STACK_PPS ((sizeof(stack_pps) - sizeof(struct poll_list)) / \ sizeof(struct pollfd)) -int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) +int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, + struct timespec *end_time) { struct poll_wqueues table; int err = -EFAULT, fdcount, len, size; @@ -686,7 +760,7 @@ int do_sys_poll(struct pollfd __user *ufds, unsigned int nfds, s64 *timeout) } poll_initwait(&table); - fdcount = do_poll(nfds, head, &table, timeout); + fdcount = do_poll(nfds, head, &table, end_time); poll_freewait(&table); for (walk = head; walk; walk = walk->next) { @@ -712,16 +786,21 @@ out_fds: static long do_restart_poll(struct restart_block *restart_block) { - struct pollfd __user *ufds = (struct pollfd __user*)restart_block->arg0; - int nfds = restart_block->arg1; - s64 timeout = ((s64)restart_block->arg3<<32) | (s64)restart_block->arg2; + struct pollfd __user *ufds = restart_block->poll.ufds; + int nfds = restart_block->poll.nfds; + struct timespec *to = NULL, end_time; int ret; - ret = do_sys_poll(ufds, nfds, &timeout); + if (restart_block->poll.has_timeout) { + end_time.tv_sec = restart_block->poll.tv_sec; + end_time.tv_nsec = restart_block->poll.tv_nsec; + to = &end_time; + } + + ret = do_sys_poll(ufds, nfds, to); + if (ret == -EINTR) { restart_block->fn = do_restart_poll; - restart_block->arg2 = timeout & 0xFFFFFFFF; - restart_block->arg3 = (u64)timeout >> 32; ret = -ERESTART_RESTARTBLOCK; } return ret; @@ -730,31 +809,32 @@ static long do_restart_poll(struct restart_block *restart_block) asmlinkage long sys_poll(struct pollfd __user *ufds, unsigned int nfds, long timeout_msecs) { - s64 timeout_jiffies; + struct timespec end_time, *to = NULL; int ret; - if (timeout_msecs > 0) { -#if HZ > 1000 - /* We can only overflow if HZ > 1000 */ - if (timeout_msecs / 1000 > (s64)0x7fffffffffffffffULL / (s64)HZ) - timeout_jiffies = -1; - else -#endif - timeout_jiffies = msecs_to_jiffies(timeout_msecs) + 1; - } else { - /* Infinite (< 0) or no (0) timeout */ - timeout_jiffies = timeout_msecs; + if (timeout_msecs >= 0) { + to = &end_time; + poll_select_set_timeout(to, timeout_msecs / MSEC_PER_SEC, + NSEC_PER_MSEC * (timeout_msecs % MSEC_PER_SEC)); } - ret = do_sys_poll(ufds, nfds, &timeout_jiffies); + ret = do_sys_poll(ufds, nfds, to); + if (ret == -EINTR) { struct restart_block *restart_block; + restart_block = ¤t_thread_info()->restart_block; restart_block->fn = do_restart_poll; - restart_block->arg0 = (unsigned long)ufds; - restart_block->arg1 = nfds; - restart_block->arg2 = timeout_jiffies & 0xFFFFFFFF; - restart_block->arg3 = (u64)timeout_jiffies >> 32; + restart_block->poll.ufds = ufds; + restart_block->poll.nfds = nfds; + + if (timeout_msecs >= 0) { + restart_block->poll.tv_sec = end_time.tv_sec; + restart_block->poll.tv_nsec = end_time.tv_nsec; + restart_block->poll.has_timeout = 1; + } else + restart_block->poll.has_timeout = 0; + ret = -ERESTART_RESTARTBLOCK; } return ret; @@ -766,21 +846,16 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, size_t sigsetsize) { sigset_t ksigmask, sigsaved; - struct timespec ts; - s64 timeout = -1; + struct timespec ts, end_time, *to = NULL; int ret; if (tsp) { if (copy_from_user(&ts, tsp, sizeof(ts))) return -EFAULT; - /* Cast to u64 to make GCC stop complaining */ - if ((u64)ts.tv_sec >= (u64)MAX_INT64_SECONDS) - timeout = -1; /* infinite */ - else { - timeout = DIV_ROUND_UP(ts.tv_nsec, NSEC_PER_SEC/HZ); - timeout += ts.tv_sec * HZ; - } + to = &end_time; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; } if (sigmask) { @@ -794,7 +869,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); } - ret = do_sys_poll(ufds, nfds, &timeout); + ret = do_sys_poll(ufds, nfds, to); /* We can restart this syscall, usually */ if (ret == -EINTR) { @@ -812,31 +887,7 @@ asmlinkage long sys_ppoll(struct pollfd __user *ufds, unsigned int nfds, } else if (sigmask) sigprocmask(SIG_SETMASK, &sigsaved, NULL); - if (tsp && timeout >= 0) { - struct timespec rts; - - if (current->personality & STICKY_TIMEOUTS) - goto sticky; - /* Yes, we know it's actually an s64, but it's also positive. */ - rts.tv_nsec = jiffies_to_usecs(do_div((*(u64*)&timeout), HZ)) * - 1000; - rts.tv_sec = timeout; - if (timespec_compare(&rts, &ts) >= 0) - rts = ts; - if (copy_to_user(tsp, &rts, sizeof(rts))) { - sticky: - /* - * If an application puts its timeval in read-only - * memory, we don't want the Linux-specific update to - * the timeval to cause a fault after the select has - * completed successfully. However, because we're not - * updating the timeval, we can't restart the system - * call. - */ - if (ret == -ERESTARTNOHAND && timeout >= 0) - ret = -EINTR; - } - } + ret = poll_select_copy_remaining(&end_time, tsp, 0, ret); return ret; } diff --git a/fs/splice.c b/fs/splice.c index a1e701c27156..1abab5cee4ba 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -731,8 +731,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, }; /* - * The actor worker might be calling ->prepare_write and - * ->commit_write. Most of the time, these expect i_mutex to + * The actor worker might be calling ->write_begin and + * ->write_end. Most of the time, these expect i_mutex to * be held. Since this may result in an ABBA deadlock with * pipe->inode, we have to order lock acquiry here. */ diff --git a/fs/super.c b/fs/super.c index e931ae9511fe..400a7608f15e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -682,7 +682,7 @@ void emergency_remount(void) * filesystems which don't use real block-devices. -- jrs */ -static struct idr unnamed_dev_idr; +static DEFINE_IDA(unnamed_dev_ida); static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */ int set_anon_super(struct super_block *s, void *data) @@ -691,10 +691,10 @@ int set_anon_super(struct super_block *s, void *data) int error; retry: - if (idr_pre_get(&unnamed_dev_idr, GFP_ATOMIC) == 0) + if (ida_pre_get(&unnamed_dev_ida, GFP_ATOMIC) == 0) return -ENOMEM; spin_lock(&unnamed_dev_lock); - error = idr_get_new(&unnamed_dev_idr, NULL, &dev); + error = ida_get_new(&unnamed_dev_ida, &dev); spin_unlock(&unnamed_dev_lock); if (error == -EAGAIN) /* We raced and lost with another CPU. */ @@ -704,7 +704,7 @@ int set_anon_super(struct super_block *s, void *data) if ((dev & MAX_ID_MASK) == (1 << MINORBITS)) { spin_lock(&unnamed_dev_lock); - idr_remove(&unnamed_dev_idr, dev); + ida_remove(&unnamed_dev_ida, dev); spin_unlock(&unnamed_dev_lock); return -EMFILE; } @@ -720,17 +720,12 @@ void kill_anon_super(struct super_block *sb) generic_shutdown_super(sb); spin_lock(&unnamed_dev_lock); - idr_remove(&unnamed_dev_idr, slot); + ida_remove(&unnamed_dev_ida, slot); spin_unlock(&unnamed_dev_lock); } EXPORT_SYMBOL(kill_anon_super); -void __init unnamed_dev_init(void) -{ - idr_init(&unnamed_dev_idr); -} - void kill_litter_super(struct super_block *sb) { if (sb->s_root) @@ -760,9 +755,13 @@ int get_sb_bdev(struct file_system_type *fs_type, { struct block_device *bdev; struct super_block *s; + fmode_t mode = FMODE_READ; int error = 0; - bdev = open_bdev_excl(dev_name, flags, fs_type); + if (!(flags & MS_RDONLY)) + mode |= FMODE_WRITE; + + bdev = open_bdev_exclusive(dev_name, mode, fs_type); if (IS_ERR(bdev)) return PTR_ERR(bdev); @@ -785,11 +784,12 @@ int get_sb_bdev(struct file_system_type *fs_type, goto error_bdev; } - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); } else { char b[BDEVNAME_SIZE]; s->s_flags = flags; + s->s_mode = mode; strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id)); sb_set_blocksize(s, block_size(bdev)); error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); @@ -807,7 +807,7 @@ int get_sb_bdev(struct file_system_type *fs_type, error_s: error = PTR_ERR(s); error_bdev: - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); error: return error; } @@ -817,10 +817,11 @@ EXPORT_SYMBOL(get_sb_bdev); void kill_block_super(struct super_block *sb) { struct block_device *bdev = sb->s_bdev; + fmode_t mode = sb->s_mode; generic_shutdown_super(sb); sync_blockdev(bdev); - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, mode); } EXPORT_SYMBOL(kill_block_super); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3a05a596e3b4..82d3b79d0e08 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -983,4 +983,5 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) const struct file_operations sysfs_dir_operations = { .read = generic_read_dir, .readdir = sysfs_readdir, + .llseek = generic_file_llseek, }; diff --git a/fs/timerfd.c b/fs/timerfd.c index c502c60e4f54..0862f0e49d0c 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -52,11 +52,9 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) { - ktime_t now, remaining; - - now = ctx->tmr.base->get_time(); - remaining = ktime_sub(ctx->tmr.expires, now); + ktime_t remaining; + remaining = hrtimer_expires_remaining(&ctx->tmr); return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } @@ -74,7 +72,7 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags, ctx->ticks = 0; ctx->tintv = timespec_to_ktime(ktmr->it_interval); hrtimer_init(&ctx->tmr, ctx->clockid, htmode); - ctx->tmr.expires = texp; + hrtimer_set_expires(&ctx->tmr, texp); ctx->tmr.function = timerfd_tmrproc; if (texp.tv64 != 0) hrtimer_start(&ctx->tmr, texp, htmode); diff --git a/fs/udf/namei.c b/fs/udf/namei.c index d3231947db19..082409cd4b8a 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -142,7 +142,7 @@ int udf_write_fi(struct inode *inode, struct fileIdentDesc *cfi, } static struct fileIdentDesc *udf_find_entry(struct inode *dir, - struct dentry *dentry, + struct qstr *child, struct udf_fileident_bh *fibh, struct fileIdentDesc *cfi) { @@ -159,8 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, sector_t offset; struct extent_position epos = {}; struct udf_inode_info *dinfo = UDF_I(dir); - int isdotdot = dentry->d_name.len == 2 && - dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.'; + int isdotdot = child->len == 2 && + child->name[0] == '.' && child->name[1] == '.'; size = udf_ext0_offset(dir) + dir->i_size; f_pos = udf_ext0_offset(dir); @@ -238,8 +238,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir, continue; flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); - if (flen && udf_match(flen, fname, dentry->d_name.len, - dentry->d_name.name)) + if (flen && udf_match(flen, fname, child->len, child->name)) goto out_ok; } @@ -283,7 +282,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry, } else #endif /* UDF_RECOVERY */ - if (udf_find_entry(dir, dentry, &fibh, &cfi)) { + if (udf_find_entry(dir, &dentry->d_name, &fibh, &cfi)) { if (fibh.sbh != fibh.ebh) brelse(fibh.ebh); brelse(fibh.sbh); @@ -783,7 +782,7 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry) retval = -ENOENT; lock_kernel(); - fi = udf_find_entry(dir, dentry, &fibh, &cfi); + fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (!fi) goto out; @@ -829,7 +828,7 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry) retval = -ENOENT; lock_kernel(); - fi = udf_find_entry(dir, dentry, &fibh, &cfi); + fi = udf_find_entry(dir, &dentry->d_name, &fibh, &cfi); if (!fi) goto out; @@ -1113,7 +1112,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, struct udf_inode_info *old_iinfo = UDF_I(old_inode); lock_kernel(); - ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); + ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); if (ofi) { if (ofibh.sbh != ofibh.ebh) brelse(ofibh.ebh); @@ -1124,7 +1123,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, != old_inode->i_ino) goto end_rename; - nfi = udf_find_entry(new_dir, new_dentry, &nfibh, &ncfi); + nfi = udf_find_entry(new_dir, &new_dentry->d_name, &nfibh, &ncfi); if (nfi) { if (!new_inode) { if (nfibh.sbh != nfibh.ebh) @@ -1192,7 +1191,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry, udf_write_fi(new_dir, &ncfi, nfi, &nfibh, NULL, NULL); /* The old fid may have moved - find it again */ - ofi = udf_find_entry(old_dir, old_dentry, &ofibh, &ocfi); + ofi = udf_find_entry(old_dir, &old_dentry->d_name, &ofibh, &ocfi); udf_delete_entry(old_dir, ofi, &ofibh, &ocfi); if (new_inode) { @@ -1243,15 +1242,11 @@ end_rename: static struct dentry *udf_get_parent(struct dentry *child) { - struct dentry *parent; struct inode *inode = NULL; - struct dentry dotdot; + struct qstr dotdot = {.name = "..", .len = 2}; struct fileIdentDesc cfi; struct udf_fileident_bh fibh; - dotdot.d_name.name = ".."; - dotdot.d_name.len = 2; - lock_kernel(); if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi)) goto out_unlock; @@ -1266,13 +1261,7 @@ static struct dentry *udf_get_parent(struct dentry *child) goto out_unlock; unlock_kernel(); - parent = d_alloc_anon(inode); - if (!parent) { - iput(inode); - parent = ERR_PTR(-ENOMEM); - } - - return parent; + return d_obtain_alias(inode); out_unlock: unlock_kernel(); return ERR_PTR(-EACCES); @@ -1283,7 +1272,6 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, u16 partref, __u32 generation) { struct inode *inode; - struct dentry *result; kernel_lb_addr loc; if (block == 0) @@ -1300,12 +1288,7 @@ static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block, iput(inode); return ERR_PTR(-ESTALE); } - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + return d_obtain_alias(inode); } static struct dentry *udf_fh_to_dentry(struct super_block *sb, diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index df0bef18742d..dbbbc4668769 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -667,4 +667,5 @@ const struct file_operations ufs_dir_operations = { .read = generic_read_dir, .readdir = ufs_readdir, .fsync = file_fsync, + .llseek = generic_file_llseek, }; diff --git a/fs/vfat/Makefile b/fs/vfat/Makefile deleted file mode 100644 index 40f2798a4f08..000000000000 --- a/fs/vfat/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the linux vfat-filesystem routines. -# - -obj-$(CONFIG_VFAT_FS) += vfat.o - -vfat-y := namei.o diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c index 24fd598af846..7f7abec25e14 100644 --- a/fs/xfs/linux-2.6/xfs_export.c +++ b/fs/xfs/linux-2.6/xfs_export.c @@ -148,7 +148,6 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, { struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; struct inode *inode = NULL; - struct dentry *result; if (fh_len < xfs_fileid_length(fileid_type)) return NULL; @@ -164,16 +163,7 @@ xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid, break; } - if (!inode) - return NULL; - if (IS_ERR(inode)) - return ERR_CAST(inode); - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + return d_obtain_alias(inode); } STATIC struct dentry * @@ -182,7 +172,6 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, { struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid; struct inode *inode = NULL; - struct dentry *result; switch (fileid_type) { case FILEID_INO32_GEN_PARENT: @@ -195,16 +184,7 @@ xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid, break; } - if (!inode) - return NULL; - if (IS_ERR(inode)) - return ERR_CAST(inode); - result = d_alloc_anon(inode); - if (!result) { - iput(inode); - return ERR_PTR(-ENOMEM); - } - return result; + return d_obtain_alias(inode); } STATIC struct dentry * @@ -213,18 +193,12 @@ xfs_fs_get_parent( { int error; struct xfs_inode *cip; - struct dentry *parent; error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL); if (unlikely(error)) return ERR_PTR(-error); - parent = d_alloc_anon(VFS_I(cip)); - if (unlikely(!parent)) { - iput(VFS_I(cip)); - return ERR_PTR(-ENOMEM); - } - return parent; + return d_obtain_alias(VFS_I(cip)); } const struct export_operations xfs_export_operations = { diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5311c1acdd40..3fee790f138b 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -204,15 +204,6 @@ xfs_file_fsync( return -xfs_fsync(XFS_I(dentry->d_inode)); } -/* - * Unfortunately we can't just use the clean and simple readdir implementation - * below, because nfs might call back into ->lookup from the filldir callback - * and that will deadlock the low-level btree code. - * - * Hopefully we'll find a better workaround that allows to use the optimal - * version at least for local readdirs for 2.6.25. - */ -#if 0 STATIC int xfs_file_readdir( struct file *filp, @@ -244,125 +235,6 @@ xfs_file_readdir( return -error; return 0; } -#else - -struct hack_dirent { - u64 ino; - loff_t offset; - int namlen; - unsigned int d_type; - char name[]; -}; - -struct hack_callback { - char *dirent; - size_t len; - size_t used; -}; - -STATIC int -xfs_hack_filldir( - void *__buf, - const char *name, - int namlen, - loff_t offset, - u64 ino, - unsigned int d_type) -{ - struct hack_callback *buf = __buf; - struct hack_dirent *de = (struct hack_dirent *)(buf->dirent + buf->used); - unsigned int reclen; - - reclen = ALIGN(sizeof(struct hack_dirent) + namlen, sizeof(u64)); - if (buf->used + reclen > buf->len) - return -EINVAL; - - de->namlen = namlen; - de->offset = offset; - de->ino = ino; - de->d_type = d_type; - memcpy(de->name, name, namlen); - buf->used += reclen; - return 0; -} - -STATIC int -xfs_file_readdir( - struct file *filp, - void *dirent, - filldir_t filldir) -{ - struct inode *inode = filp->f_path.dentry->d_inode; - xfs_inode_t *ip = XFS_I(inode); - struct hack_callback buf; - struct hack_dirent *de; - int error; - loff_t size; - int eof = 0; - xfs_off_t start_offset, curr_offset, offset; - - /* - * Try fairly hard to get memory - */ - buf.len = PAGE_CACHE_SIZE; - do { - buf.dirent = kmalloc(buf.len, GFP_KERNEL); - if (buf.dirent) - break; - buf.len >>= 1; - } while (buf.len >= 1024); - - if (!buf.dirent) - return -ENOMEM; - - curr_offset = filp->f_pos; - if (curr_offset == 0x7fffffff) - offset = 0xffffffff; - else - offset = filp->f_pos; - - while (!eof) { - unsigned int reclen; - - start_offset = offset; - - buf.used = 0; - error = -xfs_readdir(ip, &buf, buf.len, &offset, - xfs_hack_filldir); - if (error || offset == start_offset) { - size = 0; - break; - } - - size = buf.used; - de = (struct hack_dirent *)buf.dirent; - while (size > 0) { - curr_offset = de->offset /* & 0x7fffffff */; - if (filldir(dirent, de->name, de->namlen, - curr_offset & 0x7fffffff, - de->ino, de->d_type)) { - goto done; - } - - reclen = ALIGN(sizeof(struct hack_dirent) + de->namlen, - sizeof(u64)); - size -= reclen; - de = (struct hack_dirent *)((char *)de + reclen); - } - } - - done: - if (!error) { - if (size == 0) - filp->f_pos = offset & 0x7fffffff; - else if (de) - filp->f_pos = curr_offset; - } - - kfree(buf.dirent); - return error; -} -#endif STATIC int xfs_file_mmap( diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 48799ba7e3e6..d3438c72dcaf 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c @@ -311,11 +311,10 @@ xfs_open_by_handle( return new_fd; } - dentry = d_alloc_anon(inode); - if (dentry == NULL) { - iput(inode); + dentry = d_obtain_alias(inode); + if (IS_ERR(dentry)) { put_unused_fd(new_fd); - return -XFS_ERROR(ENOMEM); + return PTR_ERR(dentry); } /* Ensure umount returns EBUSY on umounts while this file is open. */ diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index e39013619b26..37ebe36056eb 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -589,7 +589,7 @@ xfs_blkdev_get( { int error = 0; - *bdevp = open_bdev_excl(name, 0, mp); + *bdevp = open_bdev_exclusive(name, FMODE_READ|FMODE_WRITE, mp); if (IS_ERR(*bdevp)) { error = PTR_ERR(*bdevp); printk("XFS: Invalid device [%s], error=%d\n", name, error); @@ -603,7 +603,7 @@ xfs_blkdev_put( struct block_device *bdev) { if (bdev) - close_bdev_excl(bdev); + close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE); } /* |