summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/Makefile3
-rw-r--r--fs/afs/internal.h1
-rw-r--r--fs/afs/rxrpc.c12
-rw-r--r--fs/anon_inodes.c34
-rw-r--r--fs/bio-integrity.c84
-rw-r--r--fs/bio.c1
-rw-r--r--fs/cifs/cifsglob.h2
-rw-r--r--fs/cifs/file.c24
-rw-r--r--fs/cifs/transport.c29
-rw-r--r--fs/compat.c127
-rw-r--r--fs/compat_binfmt_elf.c5
-rw-r--r--fs/compat_ioctl.c5
-rw-r--r--fs/dcache.c4
-rw-r--r--fs/efivarfs/file.c13
-rw-r--r--fs/exec.c6
-rw-r--r--fs/ext4/inode.c15
-rw-r--r--fs/file.c47
-rw-r--r--fs/file_table.c1
-rw-r--r--fs/hfsplus/catalog.c41
-rw-r--r--fs/hfsplus/hfsplus_fs.h1
-rw-r--r--fs/hfsplus/hfsplus_raw.h6
-rw-r--r--fs/hfsplus/inode.c9
-rw-r--r--fs/kernfs/Kconfig7
-rw-r--r--fs/kernfs/dir.c752
-rw-r--r--fs/kernfs/file.c22
-rw-r--r--fs/kernfs/kernfs-internal.h15
-rw-r--r--fs/kernfs/mount.c39
-rw-r--r--fs/kernfs/symlink.c6
-rw-r--r--fs/mount.h4
-rw-r--r--fs/namei.c31
-rw-r--r--fs/namespace.c177
-rw-r--r--fs/nfsd/vfs.c1
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/ocfs2/stackglue.c4
-rw-r--r--fs/open.c4
-rw-r--r--fs/pnode.c26
-rw-r--r--fs/pnode.h4
-rw-r--r--fs/proc/base.c1
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc/uptime.c2
-rw-r--r--fs/read_write.c80
-rw-r--r--fs/sysfs/Kconfig1
-rw-r--r--fs/sysfs/dir.c44
-rw-r--r--fs/sysfs/file.c23
-rw-r--r--fs/sysfs/group.c7
-rw-r--r--fs/sysfs/mount.c2
-rw-r--r--fs/timerfd.c1
48 files changed, 1148 insertions, 586 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 7385e54be4b9..312393f32948 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -96,6 +96,7 @@ endif # BLOCK
menu "Pseudo filesystems"
source "fs/proc/Kconfig"
+source "fs/kernfs/Kconfig"
source "fs/sysfs/Kconfig"
config TMPFS
diff --git a/fs/Makefile b/fs/Makefile
index 47ac07bb4acc..f9cb9876e466 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -52,7 +52,8 @@ obj-$(CONFIG_FHANDLE) += fhandle.o
obj-y += quota/
obj-$(CONFIG_PROC_FS) += proc/
-obj-$(CONFIG_SYSFS) += sysfs/ kernfs/
+obj-$(CONFIG_KERNFS) += kernfs/
+obj-$(CONFIG_SYSFS) += sysfs/
obj-$(CONFIG_CONFIGFS_FS) += configfs/
obj-y += devpts/
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6621f8008122..be75b500005d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -75,6 +75,7 @@ struct afs_call {
const struct afs_call_type *type; /* type of call */
const struct afs_wait_mode *wait_mode; /* completion wait mode */
wait_queue_head_t waitq; /* processes awaiting completion */
+ work_func_t async_workfn;
struct work_struct async_work; /* asynchronous work processor */
struct work_struct work; /* actual work processor */
struct sk_buff_head rx_queue; /* received packets */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 8ad8c2a0703a..ef943df73b8c 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -644,7 +644,7 @@ static void afs_process_async_call(struct work_struct *work)
/* we can't just delete the call because the work item may be
* queued */
- PREPARE_WORK(&call->async_work, afs_delete_async_call);
+ call->async_workfn = afs_delete_async_call;
queue_work(afs_async_calls, &call->async_work);
}
@@ -663,6 +663,13 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb)
call->reply_size += len;
}
+static void afs_async_workfn(struct work_struct *work)
+{
+ struct afs_call *call = container_of(work, struct afs_call, async_work);
+
+ call->async_workfn(work);
+}
+
/*
* accept the backlog of incoming calls
*/
@@ -685,7 +692,8 @@ static void afs_collect_incoming_call(struct work_struct *work)
return;
}
- INIT_WORK(&call->async_work, afs_process_async_call);
+ call->async_workfn = afs_process_async_call;
+ INIT_WORK(&call->async_work, afs_async_workfn);
call->wait_mode = &afs_async_incoming_call;
call->type = &afs_RXCMxxxx;
init_waitqueue_head(&call->waitq);
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 24084732b1d0..80ef38c73e5a 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -41,19 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = {
static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- struct dentry *root;
- root = mount_pseudo(fs_type, "anon_inode:", NULL,
+ return mount_pseudo(fs_type, "anon_inode:", NULL,
&anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC);
- if (!IS_ERR(root)) {
- struct super_block *s = root->d_sb;
- anon_inode_inode = alloc_anon_inode(s);
- if (IS_ERR(anon_inode_inode)) {
- dput(root);
- deactivate_locked_super(s);
- root = ERR_CAST(anon_inode_inode);
- }
- }
- return root;
}
static struct file_system_type anon_inode_fs_type = {
@@ -175,22 +164,15 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd);
static int __init anon_inode_init(void)
{
- int error;
-
- error = register_filesystem(&anon_inode_fs_type);
- if (error)
- goto err_exit;
anon_inode_mnt = kern_mount(&anon_inode_fs_type);
- if (IS_ERR(anon_inode_mnt)) {
- error = PTR_ERR(anon_inode_mnt);
- goto err_unregister_filesystem;
- }
- return 0;
+ if (IS_ERR(anon_inode_mnt))
+ panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt));
-err_unregister_filesystem:
- unregister_filesystem(&anon_inode_fs_type);
-err_exit:
- panic(KERN_ERR "anon_inode_init() failed (%d)\n", error);
+ anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb);
+ if (IS_ERR(anon_inode_inode))
+ panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode));
+
+ return 0;
}
fs_initcall(anon_inode_init);
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 4f70f383132c..29696b78d1f4 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -301,25 +301,25 @@ int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
EXPORT_SYMBOL(bio_integrity_get_tag);
/**
- * bio_integrity_generate - Generate integrity metadata for a bio
- * @bio: bio to generate integrity metadata for
- *
- * Description: Generates integrity metadata for a bio by calling the
- * block device's generation callback function. The bio must have a
- * bip attached with enough room to accommodate the generated
- * integrity metadata.
+ * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
+ * @bio: bio to generate/verify integrity metadata for
+ * @operate: operate number, 1 for generate, 0 for verify
*/
-static void bio_integrity_generate(struct bio *bio)
+static int bio_integrity_generate_verify(struct bio *bio, int operate)
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity_exchg bix;
struct bio_vec bv;
struct bvec_iter iter;
- sector_t sector = bio->bi_iter.bi_sector;
- unsigned int sectors, total;
+ sector_t sector;
+ unsigned int sectors, ret = 0;
void *prot_buf = bio->bi_integrity->bip_buf;
- total = 0;
+ if (operate)
+ sector = bio->bi_iter.bi_sector;
+ else
+ sector = bio->bi_integrity->bip_iter.bi_sector;
+
bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
bix.sector_size = bi->sector_size;
@@ -330,16 +330,37 @@ static void bio_integrity_generate(struct bio *bio)
bix.prot_buf = prot_buf;
bix.sector = sector;
- bi->generate_fn(&bix);
+ if (operate) {
+ bi->generate_fn(&bix);
+ } else {
+ ret = bi->verify_fn(&bix);
+ if (ret) {
+ kunmap_atomic(kaddr);
+ return ret;
+ }
+ }
sectors = bv.bv_len / bi->sector_size;
sector += sectors;
prot_buf += sectors * bi->tuple_size;
- total += sectors * bi->tuple_size;
- BUG_ON(total > bio->bi_integrity->bip_iter.bi_size);
kunmap_atomic(kaddr);
}
+ return ret;
+}
+
+/**
+ * bio_integrity_generate - Generate integrity metadata for a bio
+ * @bio: bio to generate integrity metadata for
+ *
+ * Description: Generates integrity metadata for a bio by calling the
+ * block device's generation callback function. The bio must have a
+ * bip attached with enough room to accommodate the generated
+ * integrity metadata.
+ */
+static void bio_integrity_generate(struct bio *bio)
+{
+ bio_integrity_generate_verify(bio, 1);
}
static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
@@ -454,40 +475,7 @@ EXPORT_SYMBOL(bio_integrity_prep);
*/
static int bio_integrity_verify(struct bio *bio)
{
- struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
- struct blk_integrity_exchg bix;
- struct bio_vec *bv;
- sector_t sector = bio->bi_integrity->bip_iter.bi_sector;
- unsigned int sectors, ret = 0;
- void *prot_buf = bio->bi_integrity->bip_buf;
- int i;
-
- bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
- bix.sector_size = bi->sector_size;
-
- bio_for_each_segment_all(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page);
-
- bix.data_buf = kaddr + bv->bv_offset;
- bix.data_size = bv->bv_len;
- bix.prot_buf = prot_buf;
- bix.sector = sector;
-
- ret = bi->verify_fn(&bix);
-
- if (ret) {
- kunmap_atomic(kaddr);
- return ret;
- }
-
- sectors = bv->bv_len / bi->sector_size;
- sector += sectors;
- prot_buf += sectors * bi->tuple_size;
-
- kunmap_atomic(kaddr);
- }
-
- return ret;
+ return bio_integrity_generate_verify(bio, 0);
}
/**
diff --git a/fs/bio.c b/fs/bio.c
index 8754e7b6eb49..b2dd42ed9edd 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -116,7 +116,6 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
if (!slab)
goto out_unlock;
- printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry);
bslab->slab = slab;
bslab->slab_ref = 1;
bslab->slab_size = sz;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cf32f0393369..c0f3718b77a8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -513,7 +513,7 @@ struct cifs_mnt_data {
static inline unsigned int
get_rfc1002_length(void *buf)
{
- return be32_to_cpu(*((__be32 *)buf));
+ return be32_to_cpu(*((__be32 *)buf)) & 0xffffff;
}
static inline void
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 53c15074bb36..834fce759d80 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2579,31 +2579,19 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
struct cifsInodeInfo *cinode = CIFS_I(inode);
struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
ssize_t rc = -EACCES;
+ loff_t lock_pos = pos;
- BUG_ON(iocb->ki_pos != pos);
-
+ if (file->f_flags & O_APPEND)
+ lock_pos = i_size_read(inode);
/*
* We need to hold the sem to be sure nobody modifies lock list
* with a brlock that prevents writing.
*/
down_read(&cinode->lock_sem);
- if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
+ if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs),
server->vals->exclusive_lock_type, NULL,
- CIFS_WRITE_OP)) {
- mutex_lock(&inode->i_mutex);
- rc = __generic_file_aio_write(iocb, iov, nr_segs,
- &iocb->ki_pos);
- mutex_unlock(&inode->i_mutex);
- }
-
- if (rc > 0) {
- ssize_t err;
-
- err = generic_write_sync(file, iocb->ki_pos - rc, rc);
- if (err < 0)
- rc = err;
- }
-
+ CIFS_WRITE_OP))
+ rc = generic_file_aio_write(iocb, iov, nr_segs, pos);
up_read(&cinode->lock_sem);
return rc;
}
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index b37570952846..18cd5650a5fc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -270,6 +270,26 @@ cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx,
iov->iov_len = rqst->rq_pagesz;
}
+static unsigned long
+rqst_len(struct smb_rqst *rqst)
+{
+ unsigned int i;
+ struct kvec *iov = rqst->rq_iov;
+ unsigned long buflen = 0;
+
+ /* total up iov array first */
+ for (i = 0; i < rqst->rq_nvec; i++)
+ buflen += iov[i].iov_len;
+
+ /* add in the page array if there is one */
+ if (rqst->rq_npages) {
+ buflen += rqst->rq_pagesz * (rqst->rq_npages - 1);
+ buflen += rqst->rq_tailsz;
+ }
+
+ return buflen;
+}
+
static int
smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
@@ -277,6 +297,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
struct kvec *iov = rqst->rq_iov;
int n_vec = rqst->rq_nvec;
unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base);
+ unsigned long send_length;
unsigned int i;
size_t total_len = 0, sent;
struct socket *ssocket = server->ssocket;
@@ -285,6 +306,14 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst)
if (ssocket == NULL)
return -ENOTSOCK;
+ /* sanity check send length */
+ send_length = rqst_len(rqst);
+ if (send_length != smb_buf_length + 4) {
+ WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n",
+ send_length, smb_buf_length);
+ return -EIO;
+ }
+
cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length);
dump_smb(iov[0].iov_base, iov[0].iov_len);
diff --git a/fs/compat.c b/fs/compat.c
index 6af20de2c1a3..f86df85dff61 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -72,8 +72,8 @@ int compat_printk(const char *fmt, ...)
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
*/
-asmlinkage long compat_sys_utime(const char __user *filename,
- struct compat_utimbuf __user *t)
+COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename,
+ struct compat_utimbuf __user *, t)
{
struct timespec tv[2];
@@ -87,13 +87,13 @@ asmlinkage long compat_sys_utime(const char __user *filename,
return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
}
-asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags)
+COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags)
{
struct timespec tv[2];
if (t) {
- if (get_compat_timespec(&tv[0], &t[0]) ||
- get_compat_timespec(&tv[1], &t[1]))
+ if (compat_get_timespec(&tv[0], &t[0]) ||
+ compat_get_timespec(&tv[1], &t[1]))
return -EFAULT;
if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT)
@@ -102,7 +102,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filena
return do_utimes(dfd, filename, t ? tv : NULL, flags);
}
-asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t)
+COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t)
{
struct timespec tv[2];
@@ -121,7 +121,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filena
return do_utimes(dfd, filename, t ? tv : NULL, 0);
}
-asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t)
+COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t)
{
return compat_sys_futimesat(AT_FDCWD, filename, t);
}
@@ -159,8 +159,8 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0;
}
-asmlinkage long compat_sys_newstat(const char __user * filename,
- struct compat_stat __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename,
+ struct compat_stat __user *, statbuf)
{
struct kstat stat;
int error;
@@ -171,8 +171,8 @@ asmlinkage long compat_sys_newstat(const char __user * filename,
return cp_compat_stat(&stat, statbuf);
}
-asmlinkage long compat_sys_newlstat(const char __user * filename,
- struct compat_stat __user *statbuf)
+COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename,
+ struct compat_stat __user *, statbuf)
{
struct kstat stat;
int error;
@@ -184,9 +184,9 @@ asmlinkage long compat_sys_newlstat(const char __user * filename,
}
#ifndef __ARCH_WANT_STAT64
-asmlinkage long compat_sys_newfstatat(unsigned int dfd,
- const char __user *filename,
- struct compat_stat __user *statbuf, int flag)
+COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd,
+ const char __user *, filename,
+ struct compat_stat __user *, statbuf, int, flag)
{
struct kstat stat;
int error;
@@ -198,8 +198,8 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd,
}
#endif
-asmlinkage long compat_sys_newfstat(unsigned int fd,
- struct compat_stat __user * statbuf)
+COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd,
+ struct compat_stat __user *, statbuf)
{
struct kstat stat;
int error = vfs_fstat(fd, &stat);
@@ -247,7 +247,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs *
* The following statfs calls are copies of code from fs/statfs.c and
* should be checked against those from time to time
*/
-asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf)
+COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf)
{
struct kstatfs tmp;
int error = user_statfs(pathname, &tmp);
@@ -256,7 +256,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
return error;
}
-asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf)
+COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf)
{
struct kstatfs tmp;
int error = fd_statfs(fd, &tmp);
@@ -298,7 +298,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat
return 0;
}
-asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf)
+COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf)
{
struct kstatfs tmp;
int error;
@@ -312,7 +312,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
return error;
}
-asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf)
+COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf)
{
struct kstatfs tmp;
int error;
@@ -331,7 +331,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
* Given how simple this syscall is that apporach is more maintainable
* than the various conversion hacks.
*/
-asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u)
+COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u)
{
struct compat_ustat tmp;
struct kstatfs sbuf;
@@ -399,8 +399,8 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u
}
#endif
-asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
- unsigned long arg)
+COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
+ compat_ulong_t, arg)
{
mm_segment_t old_fs;
struct flock f;
@@ -468,16 +468,15 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd,
return ret;
}
-asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd,
- unsigned long arg)
+COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
+ compat_ulong_t, arg)
{
if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64))
return -EINVAL;
return compat_sys_fcntl64(fd, cmd, arg);
}
-asmlinkage long
-compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p)
+COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p)
{
long ret;
aio_context_t ctx64;
@@ -496,32 +495,24 @@ compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p)
return ret;
}
-asmlinkage long
-compat_sys_io_getevents(aio_context_t ctx_id,
- unsigned long min_nr,
- unsigned long nr,
- struct io_event __user *events,
- struct compat_timespec __user *timeout)
+COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
+ compat_long_t, min_nr,
+ compat_long_t, nr,
+ struct io_event __user *, events,
+ struct compat_timespec __user *, timeout)
{
- long ret;
struct timespec t;
struct timespec __user *ut = NULL;
- ret = -EFAULT;
- if (unlikely(!access_ok(VERIFY_WRITE, events,
- nr * sizeof(struct io_event))))
- goto out;
if (timeout) {
- if (get_compat_timespec(&t, timeout))
- goto out;
+ if (compat_get_timespec(&t, timeout))
+ return -EFAULT;
ut = compat_alloc_user_space(sizeof(*ut));
if (copy_to_user(ut, &t, sizeof(t)) )
- goto out;
+ return -EFAULT;
}
- ret = sys_io_getevents(ctx_id, min_nr, nr, events, ut);
-out:
- return ret;
+ return sys_io_getevents(ctx_id, min_nr, nr, events, ut);
}
/* A write operation does a read from user space and vice versa */
@@ -617,8 +608,8 @@ copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
-asmlinkage long
-compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb)
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+ int, nr, u32 __user *, iocb)
{
struct iocb __user * __user *iocb64;
long ret;
@@ -770,10 +761,10 @@ static int do_nfs4_super_data_conv(void *raw_data)
#define NCPFS_NAME "ncpfs"
#define NFS4_NAME "nfs4"
-asmlinkage long compat_sys_mount(const char __user * dev_name,
- const char __user * dir_name,
- const char __user * type, unsigned long flags,
- const void __user * data)
+COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
+ const char __user *, dir_name,
+ const char __user *, type, compat_ulong_t, flags,
+ const void __user *, data)
{
char *kernel_type;
unsigned long data_page;
@@ -869,8 +860,8 @@ efault:
return -EFAULT;
}
-asmlinkage long compat_sys_old_readdir(unsigned int fd,
- struct compat_old_linux_dirent __user *dirent, unsigned int count)
+COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd,
+ struct compat_old_linux_dirent __user *, dirent, unsigned int, count)
{
int error;
struct fd f = fdget(fd);
@@ -948,8 +939,8 @@ efault:
return -EFAULT;
}
-asmlinkage long compat_sys_getdents(unsigned int fd,
- struct compat_linux_dirent __user *dirent, unsigned int count)
+COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd,
+ struct compat_linux_dirent __user *, dirent, unsigned int, count)
{
struct fd f;
struct compat_linux_dirent __user * lastdirent;
@@ -981,7 +972,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd,
return error;
}
-#ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64
+#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64
struct compat_getdents_callback64 {
struct dir_context ctx;
@@ -1033,8 +1024,8 @@ efault:
return -EFAULT;
}
-asmlinkage long compat_sys_getdents64(unsigned int fd,
- struct linux_dirent64 __user * dirent, unsigned int count)
+COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd,
+ struct linux_dirent64 __user *, dirent, unsigned int, count)
{
struct fd f;
struct linux_dirent64 __user * lastdirent;
@@ -1066,7 +1057,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd,
fdput(f);
return error;
}
-#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */
+#endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */
/*
* Exactly like fs/open.c:sys_open(), except that it doesn't set the
@@ -1287,9 +1278,9 @@ out_nofds:
return ret;
}
-asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
- compat_ulong_t __user *outp, compat_ulong_t __user *exp,
- struct compat_timeval __user *tvp)
+COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp,
+ compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
+ struct compat_timeval __user *, tvp)
{
struct timespec end_time, *to = NULL;
struct compat_timeval tv;
@@ -1320,7 +1311,7 @@ struct compat_sel_arg_struct {
compat_uptr_t tvp;
};
-asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg)
+COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg)
{
struct compat_sel_arg_struct a;
@@ -1381,9 +1372,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp,
return ret;
}
-asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
- compat_ulong_t __user *outp, compat_ulong_t __user *exp,
- struct compat_timespec __user *tsp, void __user *sig)
+COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp,
+ compat_ulong_t __user *, outp, compat_ulong_t __user *, exp,
+ struct compat_timespec __user *, tsp, void __user *, sig)
{
compat_size_t sigsetsize = 0;
compat_uptr_t up = 0;
@@ -1400,9 +1391,9 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp,
sigsetsize);
}
-asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
- unsigned int nfds, struct compat_timespec __user *tsp,
- const compat_sigset_t __user *sigmask, compat_size_t sigsetsize)
+COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds,
+ unsigned int, nfds, struct compat_timespec __user *, tsp,
+ const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize)
{
compat_sigset_t ss32;
sigset_t ksigmask, sigsaved;
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index a81147e2e4ef..4d24d17bcfc1 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime,
#define ELF_HWCAP COMPAT_ELF_HWCAP
#endif
+#ifdef COMPAT_ELF_HWCAP2
+#undef ELF_HWCAP2
+#define ELF_HWCAP2 COMPAT_ELF_HWCAP2
+#endif
+
#ifdef COMPAT_ARCH_DLINFO
#undef ARCH_DLINFO
#define ARCH_DLINFO COMPAT_ARCH_DLINFO
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 3881610b6438..e82289047272 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1538,9 +1538,10 @@ static int compat_ioctl_check_table(unsigned int xcmd)
return ioctl_pointer[i] == xcmd;
}
-asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd,
- unsigned long arg)
+COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd,
+ compat_ulong_t, arg32)
{
+ unsigned long arg = arg32;
struct fd f = fdget(fd);
int error = -EBADF;
if (!f.file)
diff --git a/fs/dcache.c b/fs/dcache.c
index 265e0ce9769c..ca02c13a84aa 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2833,9 +2833,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
u32 dlen = ACCESS_ONCE(name->len);
char *p;
- if (*buflen < dlen + 1)
- return -ENAMETOOLONG;
*buflen -= dlen + 1;
+ if (*buflen < 0)
+ return -ENAMETOOLONG;
p = *buffer -= dlen + 1;
*p++ = '/';
while (dlen--) {
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index 8dd524f32284..cdb2971192a5 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -21,7 +21,7 @@ static ssize_t efivarfs_file_write(struct file *file,
u32 attributes;
struct inode *inode = file->f_mapping->host;
unsigned long datasize = count - sizeof(attributes);
- ssize_t bytes = 0;
+ ssize_t bytes;
bool set = false;
if (count < sizeof(attributes))
@@ -33,14 +33,9 @@ static ssize_t efivarfs_file_write(struct file *file,
if (attributes & ~(EFI_VARIABLE_MASK))
return -EINVAL;
- data = kmalloc(datasize, GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) {
- bytes = -EFAULT;
- goto out;
- }
+ data = memdup_user(userbuf + sizeof(attributes), datasize);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
bytes = efivar_entry_set_get_size(var, attributes, &datasize,
data, &set);
diff --git a/fs/exec.c b/fs/exec.c
index 3d78fccdd723..4f59402fdda5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1619,9 +1619,9 @@ SYSCALL_DEFINE3(execve,
return do_execve(getname(filename), argv, envp);
}
#ifdef CONFIG_COMPAT
-asmlinkage long compat_sys_execve(const char __user * filename,
- const compat_uptr_t __user * argv,
- const compat_uptr_t __user * envp)
+COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
+ const compat_uptr_t __user *, argv,
+ const compat_uptr_t __user *, envp)
{
return compat_do_execve(getname(filename), argv, envp);
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 6e39895a91b8..24bfd7ff3049 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/aio.h>
+#include <linux/bitops.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -3921,18 +3922,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
+ unsigned int new_fl = 0;
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
if (flags & EXT4_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & EXT4_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & EXT4_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
diff --git a/fs/file.c b/fs/file.c
index db25c2bdfe46..b61293badfb1 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -497,7 +497,7 @@ repeat:
error = fd;
#if 1
/* Sanity check */
- if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
+ if (rcu_access_pointer(fdt->fd[fd]) != NULL) {
printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
rcu_assign_pointer(fdt->fd[fd], NULL);
}
@@ -683,35 +683,54 @@ EXPORT_SYMBOL(fget_raw);
* The fput_needed flag returned by fget_light should be passed to the
* corresponding fput_light.
*/
-struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed)
+static unsigned long __fget_light(unsigned int fd, fmode_t mask)
{
struct files_struct *files = current->files;
struct file *file;
- *fput_needed = 0;
if (atomic_read(&files->count) == 1) {
file = __fcheck_files(files, fd);
- if (file && (file->f_mode & mask))
- file = NULL;
+ if (!file || unlikely(file->f_mode & mask))
+ return 0;
+ return (unsigned long)file;
} else {
file = __fget(fd, mask);
- if (file)
- *fput_needed = 1;
+ if (!file)
+ return 0;
+ return FDPUT_FPUT | (unsigned long)file;
}
-
- return file;
}
-struct file *fget_light(unsigned int fd, int *fput_needed)
+unsigned long __fdget(unsigned int fd)
{
- return __fget_light(fd, FMODE_PATH, fput_needed);
+ return __fget_light(fd, FMODE_PATH);
}
-EXPORT_SYMBOL(fget_light);
+EXPORT_SYMBOL(__fdget);
-struct file *fget_raw_light(unsigned int fd, int *fput_needed)
+unsigned long __fdget_raw(unsigned int fd)
{
- return __fget_light(fd, 0, fput_needed);
+ return __fget_light(fd, 0);
}
+unsigned long __fdget_pos(unsigned int fd)
+{
+ unsigned long v = __fdget(fd);
+ struct file *file = (struct file *)(v & ~3);
+
+ if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
+ if (file_count(file) > 1) {
+ v |= FDPUT_POS_UNLOCK;
+ mutex_lock(&file->f_pos_lock);
+ }
+ }
+ return v;
+}
+
+/*
+ * We only lock f_pos if we have threads or if the file might be
+ * shared with another process. In both cases we'll have an elevated
+ * file count (done either by fdget() or by fork()).
+ */
+
void set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
diff --git a/fs/file_table.c b/fs/file_table.c
index 5fff9030be34..5b24008ea4f6 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -135,6 +135,7 @@ struct file *get_empty_filp(void)
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
spin_lock_init(&f->f_lock);
+ mutex_init(&f->f_pos_lock);
eventpoll_init_file(f);
/* f->f_version: 0 */
return f;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 968ce411db53..32602c667b4a 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -103,6 +103,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry,
folder = &entry->folder;
memset(folder, 0, sizeof(*folder));
folder->type = cpu_to_be16(HFSPLUS_FOLDER);
+ if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags))
+ folder->flags |= cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT);
folder->id = cpu_to_be32(inode->i_ino);
HFSPLUS_I(inode)->create_date =
folder->create_date =
@@ -203,6 +205,36 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
return hfs_brec_find(fd, hfs_find_rec_by_key);
}
+static void hfsplus_subfolders_inc(struct inode *dir)
+{
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
+
+ if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) {
+ /*
+ * Increment subfolder count. Note, the value is only meaningful
+ * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set.
+ */
+ HFSPLUS_I(dir)->subfolders++;
+ }
+}
+
+static void hfsplus_subfolders_dec(struct inode *dir)
+{
+ struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
+
+ if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) {
+ /*
+ * Decrement subfolder count. Note, the value is only meaningful
+ * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set.
+ *
+ * Check for zero. Some subfolders may have been created
+ * by an implementation ignorant of this counter.
+ */
+ if (HFSPLUS_I(dir)->subfolders)
+ HFSPLUS_I(dir)->subfolders--;
+ }
+}
+
int hfsplus_create_cat(u32 cnid, struct inode *dir,
struct qstr *str, struct inode *inode)
{
@@ -247,6 +279,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
goto err1;
dir->i_size++;
+ if (S_ISDIR(inode->i_mode))
+ hfsplus_subfolders_inc(dir);
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
@@ -336,6 +370,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
goto out;
dir->i_size--;
+ if (type == HFSPLUS_FOLDER)
+ hfsplus_subfolders_dec(dir);
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
@@ -380,6 +416,7 @@ int hfsplus_rename_cat(u32 cnid,
hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
src_fd.entrylength);
+ type = be16_to_cpu(entry.type);
/* create new dir entry with the data from the old entry */
hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
@@ -394,6 +431,8 @@ int hfsplus_rename_cat(u32 cnid,
if (err)
goto out;
dst_dir->i_size++;
+ if (type == HFSPLUS_FOLDER)
+ hfsplus_subfolders_inc(dst_dir);
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
/* finally remove the old entry */
@@ -405,6 +444,8 @@ int hfsplus_rename_cat(u32 cnid,
if (err)
goto out;
src_dir->i_size--;
+ if (type == HFSPLUS_FOLDER)
+ hfsplus_subfolders_dec(src_dir);
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
/* remove old thread entry */
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index 08846425b67f..62d571eb69ba 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -242,6 +242,7 @@ struct hfsplus_inode_info {
*/
sector_t fs_blocks;
u8 userflags; /* BSD user file flags */
+ u32 subfolders; /* Subfolder count (HFSX only) */
struct list_head open_dir_list;
loff_t phys_size;
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h
index 8ffb3a8ffe75..5a126828d85e 100644
--- a/fs/hfsplus/hfsplus_raw.h
+++ b/fs/hfsplus/hfsplus_raw.h
@@ -261,7 +261,7 @@ struct hfsplus_cat_folder {
struct DInfo user_info;
struct DXInfo finder_info;
__be32 text_encoding;
- u32 reserved;
+ __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */
} __packed;
/* HFS file info (stolen from hfs.h) */
@@ -301,11 +301,13 @@ struct hfsplus_cat_file {
struct hfsplus_fork_raw rsrc_fork;
} __packed;
-/* File attribute bits */
+/* File and folder flag bits */
#define HFSPLUS_FILE_LOCKED 0x0001
#define HFSPLUS_FILE_THREAD_EXISTS 0x0002
#define HFSPLUS_XATTR_EXISTS 0x0004
#define HFSPLUS_ACL_EXISTS 0x0008
+#define HFSPLUS_HAS_FOLDER_COUNT 0x0010 /* Folder has subfolder count
+ * (HFSX only) */
/* HFS+ catalog thread (part of a cat_entry) */
struct hfsplus_cat_thread {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index fa929f325f87..a4f45bd88a63 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -375,6 +375,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode)
hip->extent_state = 0;
hip->flags = 0;
hip->userflags = 0;
+ hip->subfolders = 0;
memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec));
memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec));
hip->alloc_blocks = 0;
@@ -494,6 +495,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date);
HFSPLUS_I(inode)->create_date = folder->create_date;
HFSPLUS_I(inode)->fs_blocks = 0;
+ if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
+ HFSPLUS_I(inode)->subfolders =
+ be32_to_cpu(folder->subfolders);
+ }
inode->i_op = &hfsplus_dir_inode_operations;
inode->i_fop = &hfsplus_dir_operations;
} else if (type == HFSPLUS_FILE) {
@@ -566,6 +571,10 @@ int hfsplus_cat_write_inode(struct inode *inode)
folder->content_mod_date = hfsp_ut2mt(inode->i_mtime);
folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
folder->valence = cpu_to_be32(inode->i_size - 2);
+ if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
+ folder->subfolders =
+ cpu_to_be32(HFSPLUS_I(inode)->subfolders);
+ }
hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
sizeof(struct hfsplus_cat_folder));
} else if (HFSPLUS_IS_RSRC(inode)) {
diff --git a/fs/kernfs/Kconfig b/fs/kernfs/Kconfig
new file mode 100644
index 000000000000..397b5f7a7a16
--- /dev/null
+++ b/fs/kernfs/Kconfig
@@ -0,0 +1,7 @@
+#
+# KERNFS should be selected by its users
+#
+
+config KERNFS
+ bool
+ default n
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index bd6e18be6e1a..0bd05ab26003 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -8,6 +8,7 @@
* This file is released under the GPLv2.
*/
+#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
@@ -18,9 +19,161 @@
#include "kernfs-internal.h"
DEFINE_MUTEX(kernfs_mutex);
+static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */
+static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
+static bool kernfs_active(struct kernfs_node *kn)
+{
+ lockdep_assert_held(&kernfs_mutex);
+ return atomic_read(&kn->active) >= 0;
+}
+
+static bool kernfs_lockdep(struct kernfs_node *kn)
+{
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ return kn->flags & KERNFS_LOCKDEP;
+#else
+ return false;
+#endif
+}
+
+static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
+{
+ return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
+}
+
+static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
+ size_t buflen)
+{
+ char *p = buf + buflen;
+ int len;
+
+ *--p = '\0';
+
+ do {
+ len = strlen(kn->name);
+ if (p - buf < len + 1) {
+ buf[0] = '\0';
+ p = NULL;
+ break;
+ }
+ p -= len;
+ memcpy(p, kn->name, len);
+ *--p = '/';
+ kn = kn->parent;
+ } while (kn && kn->parent);
+
+ return p;
+}
+
+/**
+ * kernfs_name - obtain the name of a given node
+ * @kn: kernfs_node of interest
+ * @buf: buffer to copy @kn's name into
+ * @buflen: size of @buf
+ *
+ * Copies the name of @kn into @buf of @buflen bytes. The behavior is
+ * similar to strlcpy(). It returns the length of @kn's name and if @buf
+ * isn't long enough, it's filled upto @buflen-1 and nul terminated.
+ *
+ * This function can be called from any context.
+ */
+int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&kernfs_rename_lock, flags);
+ ret = kernfs_name_locked(kn, buf, buflen);
+ spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ return ret;
+}
+
+/**
+ * kernfs_path - build full path of a given node
+ * @kn: kernfs_node of interest
+ * @buf: buffer to copy @kn's name into
+ * @buflen: size of @buf
+ *
+ * Builds and returns the full path of @kn in @buf of @buflen bytes. The
+ * path is built from the end of @buf so the returned pointer usually
+ * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated
+ * and %NULL is returned.
+ */
+char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
+{
+ unsigned long flags;
+ char *p;
+
+ spin_lock_irqsave(&kernfs_rename_lock, flags);
+ p = kernfs_path_locked(kn, buf, buflen);
+ spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ return p;
+}
+
+/**
+ * pr_cont_kernfs_name - pr_cont name of a kernfs_node
+ * @kn: kernfs_node of interest
+ *
+ * This function can be called from any context.
+ */
+void pr_cont_kernfs_name(struct kernfs_node *kn)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&kernfs_rename_lock, flags);
+
+ kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf));
+ pr_cont("%s", kernfs_pr_cont_buf);
+
+ spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+}
+
+/**
+ * pr_cont_kernfs_path - pr_cont path of a kernfs_node
+ * @kn: kernfs_node of interest
+ *
+ * This function can be called from any context.
+ */
+void pr_cont_kernfs_path(struct kernfs_node *kn)
+{
+ unsigned long flags;
+ char *p;
+
+ spin_lock_irqsave(&kernfs_rename_lock, flags);
+
+ p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
+ sizeof(kernfs_pr_cont_buf));
+ if (p)
+ pr_cont("%s", p);
+ else
+ pr_cont("<name too long>");
+
+ spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+}
+
+/**
+ * kernfs_get_parent - determine the parent node and pin it
+ * @kn: kernfs_node of interest
+ *
+ * Determines @kn's parent, pins and returns it. This function can be
+ * called from any context.
+ */
+struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
+{
+ struct kernfs_node *parent;
+ unsigned long flags;
+
+ spin_lock_irqsave(&kernfs_rename_lock, flags);
+ parent = kn->parent;
+ kernfs_get(parent);
+ spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+
+ return parent;
+}
+
/**
* kernfs_name_hash
* @name: Null terminated string to hash
@@ -37,7 +190,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns)
hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
hash &= 0x7fffffffU;
/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
- if (hash < 1)
+ if (hash < 2)
hash += 2;
if (hash >= INT_MAX)
hash = INT_MAX - 1;
@@ -105,18 +258,24 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
* kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
* @kn: kernfs_node of interest
*
- * Unlink @kn from its sibling rbtree which starts from
- * kn->parent->dir.children.
+ * Try to unlink @kn from its sibling rbtree which starts from
+ * kn->parent->dir.children. Returns %true if @kn was actually
+ * removed, %false if @kn wasn't on the rbtree.
*
* Locking:
* mutex_lock(kernfs_mutex)
*/
-static void kernfs_unlink_sibling(struct kernfs_node *kn)
+static bool kernfs_unlink_sibling(struct kernfs_node *kn)
{
+ if (RB_EMPTY_NODE(&kn->rb))
+ return false;
+
if (kernfs_type(kn) == KERNFS_DIR)
kn->parent->dir.subdirs--;
rb_erase(&kn->rb, &kn->parent->dir.children);
+ RB_CLEAR_NODE(&kn->rb);
+ return true;
}
/**
@@ -137,7 +296,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
if (!atomic_inc_unless_negative(&kn->active))
return NULL;
- if (kn->flags & KERNFS_LOCKDEP)
+ if (kernfs_lockdep(kn))
rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
return kn;
}
@@ -151,59 +310,57 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
*/
void kernfs_put_active(struct kernfs_node *kn)
{
+ struct kernfs_root *root = kernfs_root(kn);
int v;
if (unlikely(!kn))
return;
- if (kn->flags & KERNFS_LOCKDEP)
+ if (kernfs_lockdep(kn))
rwsem_release(&kn->dep_map, 1, _RET_IP_);
v = atomic_dec_return(&kn->active);
if (likely(v != KN_DEACTIVATED_BIAS))
return;
- /*
- * atomic_dec_return() is a mb(), we'll always see the updated
- * kn->u.completion.
- */
- complete(kn->u.completion);
+ wake_up_all(&root->deactivate_waitq);
}
/**
- * kernfs_deactivate - deactivate kernfs_node
- * @kn: kernfs_node to deactivate
+ * kernfs_drain - drain kernfs_node
+ * @kn: kernfs_node to drain
*
- * Deny new active references and drain existing ones.
+ * Drain existing usages and nuke all existing mmaps of @kn. Mutiple
+ * removers may invoke this function concurrently on @kn and all will
+ * return after draining is complete.
*/
-static void kernfs_deactivate(struct kernfs_node *kn)
+static void kernfs_drain(struct kernfs_node *kn)
+ __releases(&kernfs_mutex) __acquires(&kernfs_mutex)
{
- DECLARE_COMPLETION_ONSTACK(wait);
- int v;
+ struct kernfs_root *root = kernfs_root(kn);
- BUG_ON(!(kn->flags & KERNFS_REMOVED));
-
- if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF))
- return;
+ lockdep_assert_held(&kernfs_mutex);
+ WARN_ON_ONCE(kernfs_active(kn));
- kn->u.completion = (void *)&wait;
+ mutex_unlock(&kernfs_mutex);
- if (kn->flags & KERNFS_LOCKDEP)
+ if (kernfs_lockdep(kn)) {
rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
- /* atomic_add_return() is a mb(), put_active() will always see
- * the updated kn->u.completion.
- */
- v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
-
- if (v != KN_DEACTIVATED_BIAS) {
- if (kn->flags & KERNFS_LOCKDEP)
+ if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
lock_contended(&kn->dep_map, _RET_IP_);
- wait_for_completion(&wait);
}
- if (kn->flags & KERNFS_LOCKDEP) {
+ /* but everyone should wait for draining */
+ wait_event(root->deactivate_waitq,
+ atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
+
+ if (kernfs_lockdep(kn)) {
lock_acquired(&kn->dep_map, _RET_IP_);
rwsem_release(&kn->dep_map, 1, _RET_IP_);
}
+
+ kernfs_unmap_bin_file(kn);
+
+ mutex_lock(&kernfs_mutex);
}
/**
@@ -234,13 +391,15 @@ void kernfs_put(struct kernfs_node *kn)
return;
root = kernfs_root(kn);
repeat:
- /* Moving/renaming is always done while holding reference.
+ /*
+ * Moving/renaming is always done while holding reference.
* kn->parent won't change beneath us.
*/
parent = kn->parent;
- WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n",
- parent ? parent->name : "", kn->name);
+ WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
+ "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
+ parent ? parent->name : "", kn->name, atomic_read(&kn->active));
if (kernfs_type(kn) == KERNFS_LINK)
kernfs_put(kn->symlink.target_kn);
@@ -282,8 +441,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
kn = dentry->d_fsdata;
mutex_lock(&kernfs_mutex);
- /* The kernfs node has been deleted */
- if (kn->flags & KERNFS_REMOVED)
+ /* The kernfs node has been deactivated */
+ if (!kernfs_active(kn))
goto out_bad;
/* The kernfs node has been moved? */
@@ -328,6 +487,24 @@ const struct dentry_operations kernfs_dops = {
.d_release = kernfs_dop_release,
};
+/**
+ * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
+ * @dentry: the dentry in question
+ *
+ * Return the kernfs_node associated with @dentry. If @dentry is not a
+ * kernfs one, %NULL is returned.
+ *
+ * While the returned kernfs_node will stay accessible as long as @dentry
+ * is accessible, the returned node can be in any state and the caller is
+ * fully responsible for determining what's accessible.
+ */
+struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry)
+{
+ if (dentry->d_sb->s_op == &kernfs_sops)
+ return dentry->d_fsdata;
+ return NULL;
+}
+
static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
const char *name, umode_t mode,
unsigned flags)
@@ -352,11 +529,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
kn->ino = ret;
atomic_set(&kn->count, 1);
- atomic_set(&kn->active, 0);
+ atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
+ RB_CLEAR_NODE(&kn->rb);
kn->name = name;
kn->mode = mode;
- kn->flags = flags | KERNFS_REMOVED;
+ kn->flags = flags;
return kn;
@@ -382,69 +560,44 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
}
/**
- * kernfs_addrm_start - prepare for kernfs_node add/remove
- * @acxt: pointer to kernfs_addrm_cxt to be used
- *
- * This function is called when the caller is about to add or remove
- * kernfs_node. This function acquires kernfs_mutex. @acxt is used
- * to keep and pass context to other addrm functions.
- *
- * LOCKING:
- * Kernel thread context (may sleep). kernfs_mutex is locked on
- * return.
- */
-void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt)
- __acquires(kernfs_mutex)
-{
- memset(acxt, 0, sizeof(*acxt));
-
- mutex_lock(&kernfs_mutex);
-}
-
-/**
* kernfs_add_one - add kernfs_node to parent without warning
- * @acxt: addrm context to use
* @kn: kernfs_node to be added
*
* The caller must already have initialized @kn->parent. This
* function increments nlink of the parent's inode if @kn is a
* directory and link into the children list of the parent.
*
- * This function should be called between calls to
- * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed
- * the same @acxt as passed to kernfs_addrm_start().
- *
- * LOCKING:
- * Determined by kernfs_addrm_start().
- *
* RETURNS:
* 0 on success, -EEXIST if entry with the given name already
* exists.
*/
-int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn)
+int kernfs_add_one(struct kernfs_node *kn)
{
struct kernfs_node *parent = kn->parent;
- bool has_ns = kernfs_ns_enabled(parent);
struct kernfs_iattrs *ps_iattr;
+ bool has_ns;
int ret;
- if (has_ns != (bool)kn->ns) {
- WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
- has_ns ? "required" : "invalid", parent->name, kn->name);
- return -EINVAL;
- }
+ mutex_lock(&kernfs_mutex);
+
+ ret = -EINVAL;
+ has_ns = kernfs_ns_enabled(parent);
+ if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
+ has_ns ? "required" : "invalid", parent->name, kn->name))
+ goto out_unlock;
if (kernfs_type(parent) != KERNFS_DIR)
- return -EINVAL;
+ goto out_unlock;
- if (parent->flags & KERNFS_REMOVED)
- return -ENOENT;
+ ret = -ENOENT;
+ if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent))
+ goto out_unlock;
kn->hash = kernfs_name_hash(kn->name, kn->ns);
ret = kernfs_link_sibling(kn);
if (ret)
- return ret;
+ goto out_unlock;
/* Update timestamps on the parent */
ps_iattr = parent->iattr;
@@ -453,82 +606,22 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn)
ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
}
- /* Mark the entry added into directory tree */
- kn->flags &= ~KERNFS_REMOVED;
-
- return 0;
-}
-
-/**
- * kernfs_remove_one - remove kernfs_node from parent
- * @acxt: addrm context to use
- * @kn: kernfs_node to be removed
- *
- * Mark @kn removed and drop nlink of parent inode if @kn is a
- * directory. @kn is unlinked from the children list.
- *
- * This function should be called between calls to
- * kernfs_addrm_start() and kernfs_addrm_finish() and should be
- * passed the same @acxt as passed to kernfs_addrm_start().
- *
- * LOCKING:
- * Determined by kernfs_addrm_start().
- */
-static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
- struct kernfs_node *kn)
-{
- struct kernfs_iattrs *ps_iattr;
+ mutex_unlock(&kernfs_mutex);
/*
- * Removal can be called multiple times on the same node. Only the
- * first invocation is effective and puts the base ref.
+ * Activate the new node unless CREATE_DEACTIVATED is requested.
+ * If not activated here, the kernfs user is responsible for
+ * activating the node with kernfs_activate(). A node which hasn't
+ * been activated is not visible to userland and its removal won't
+ * trigger deactivation.
*/
- if (kn->flags & KERNFS_REMOVED)
- return;
-
- if (kn->parent) {
- kernfs_unlink_sibling(kn);
-
- /* Update timestamps on the parent */
- ps_iattr = kn->parent->iattr;
- if (ps_iattr) {
- ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
- ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
- }
- }
-
- kn->flags |= KERNFS_REMOVED;
- kn->u.removed_list = acxt->removed;
- acxt->removed = kn;
-}
+ if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
+ kernfs_activate(kn);
+ return 0;
-/**
- * kernfs_addrm_finish - finish up kernfs_node add/remove
- * @acxt: addrm context to finish up
- *
- * Finish up kernfs_node add/remove. Resources acquired by
- * kernfs_addrm_start() are released and removed kernfs_nodes are
- * cleaned up.
- *
- * LOCKING:
- * kernfs_mutex is released.
- */
-void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
- __releases(kernfs_mutex)
-{
- /* release resources acquired by kernfs_addrm_start() */
+out_unlock:
mutex_unlock(&kernfs_mutex);
-
- /* kill removed kernfs_nodes */
- while (acxt->removed) {
- struct kernfs_node *kn = acxt->removed;
-
- acxt->removed = kn->u.removed_list;
-
- kernfs_deactivate(kn);
- kernfs_unmap_bin_file(kn);
- kernfs_put(kn);
- }
+ return ret;
}
/**
@@ -599,13 +692,15 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
/**
* kernfs_create_root - create a new kernfs hierarchy
- * @kdops: optional directory syscall operations for the hierarchy
+ * @scops: optional syscall operations for the hierarchy
+ * @flags: KERNFS_ROOT_* flags
* @priv: opaque data associated with the new directory
*
* Returns the root of the new hierarchy on success, ERR_PTR() value on
* failure.
*/
-struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
+struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
+ unsigned int flags, void *priv)
{
struct kernfs_root *root;
struct kernfs_node *kn;
@@ -624,12 +719,16 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
return ERR_PTR(-ENOMEM);
}
- kn->flags &= ~KERNFS_REMOVED;
kn->priv = priv;
kn->dir.root = root;
- root->dir_ops = kdops;
+ root->syscall_ops = scops;
+ root->flags = flags;
root->kn = kn;
+ init_waitqueue_head(&root->deactivate_waitq);
+
+ if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED))
+ kernfs_activate(kn);
return root;
}
@@ -660,7 +759,6 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
const char *name, umode_t mode,
void *priv, const void *ns)
{
- struct kernfs_addrm_cxt acxt;
struct kernfs_node *kn;
int rc;
@@ -674,10 +772,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
kn->priv = priv;
/* link in */
- kernfs_addrm_start(&acxt);
- rc = kernfs_add_one(&acxt, kn);
- kernfs_addrm_finish(&acxt);
-
+ rc = kernfs_add_one(kn);
if (!rc)
return kn;
@@ -703,7 +798,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
/* no such entry */
- if (!kn) {
+ if (!kn || !kernfs_active(kn)) {
ret = NULL;
goto out_unlock;
}
@@ -728,23 +823,37 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
umode_t mode)
{
struct kernfs_node *parent = dir->i_private;
- struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;
+ struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops;
+ int ret;
- if (!kdops || !kdops->mkdir)
+ if (!scops || !scops->mkdir)
return -EPERM;
- return kdops->mkdir(parent, dentry->d_name.name, mode);
+ if (!kernfs_get_active(parent))
+ return -ENODEV;
+
+ ret = scops->mkdir(parent, dentry->d_name.name, mode);
+
+ kernfs_put_active(parent);
+ return ret;
}
static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
struct kernfs_node *kn = dentry->d_fsdata;
- struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
+ struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
+ int ret;
- if (!kdops || !kdops->rmdir)
+ if (!scops || !scops->rmdir)
return -EPERM;
- return kdops->rmdir(kn);
+ if (!kernfs_get_active(kn))
+ return -ENODEV;
+
+ ret = scops->rmdir(kn);
+
+ kernfs_put_active(kn);
+ return ret;
}
static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
@@ -752,12 +861,25 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
{
struct kernfs_node *kn = old_dentry->d_fsdata;
struct kernfs_node *new_parent = new_dir->i_private;
- struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;
+ struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops;
+ int ret;
- if (!kdops || !kdops->rename)
+ if (!scops || !scops->rename)
return -EPERM;
- return kdops->rename(kn, new_parent, new_dentry->d_name.name);
+ if (!kernfs_get_active(kn))
+ return -ENODEV;
+
+ if (!kernfs_get_active(new_parent)) {
+ kernfs_put_active(kn);
+ return -ENODEV;
+ }
+
+ ret = scops->rename(kn, new_parent, new_dentry->d_name.name);
+
+ kernfs_put_active(new_parent);
+ kernfs_put_active(kn);
+ return ret;
}
const struct inode_operations kernfs_dir_iops = {
@@ -830,23 +952,104 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
return pos->parent;
}
-static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
- struct kernfs_node *kn)
+/**
+ * kernfs_activate - activate a node which started deactivated
+ * @kn: kernfs_node whose subtree is to be activated
+ *
+ * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node
+ * needs to be explicitly activated. A node which hasn't been activated
+ * isn't visible to userland and deactivation is skipped during its
+ * removal. This is useful to construct atomic init sequences where
+ * creation of multiple nodes should either succeed or fail atomically.
+ *
+ * The caller is responsible for ensuring that this function is not called
+ * after kernfs_remove*() is invoked on @kn.
+ */
+void kernfs_activate(struct kernfs_node *kn)
{
- struct kernfs_node *pos, *next;
+ struct kernfs_node *pos;
- if (!kn)
+ mutex_lock(&kernfs_mutex);
+
+ pos = NULL;
+ while ((pos = kernfs_next_descendant_post(pos, kn))) {
+ if (!pos || (pos->flags & KERNFS_ACTIVATED))
+ continue;
+
+ WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb));
+ WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS);
+
+ atomic_sub(KN_DEACTIVATED_BIAS, &pos->active);
+ pos->flags |= KERNFS_ACTIVATED;
+ }
+
+ mutex_unlock(&kernfs_mutex);
+}
+
+static void __kernfs_remove(struct kernfs_node *kn)
+{
+ struct kernfs_node *pos;
+
+ lockdep_assert_held(&kernfs_mutex);
+
+ /*
+ * Short-circuit if non-root @kn has already finished removal.
+ * This is for kernfs_remove_self() which plays with active ref
+ * after removal.
+ */
+ if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb)))
return;
pr_debug("kernfs %s: removing\n", kn->name);
- next = NULL;
+ /* prevent any new usage under @kn by deactivating all nodes */
+ pos = NULL;
+ while ((pos = kernfs_next_descendant_post(pos, kn)))
+ if (kernfs_active(pos))
+ atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
+
+ /* deactivate and unlink the subtree node-by-node */
do {
- pos = next;
- next = kernfs_next_descendant_post(pos, kn);
- if (pos)
- kernfs_remove_one(acxt, pos);
- } while (next);
+ pos = kernfs_leftmost_descendant(kn);
+
+ /*
+ * kernfs_drain() drops kernfs_mutex temporarily and @pos's
+ * base ref could have been put by someone else by the time
+ * the function returns. Make sure it doesn't go away
+ * underneath us.
+ */
+ kernfs_get(pos);
+
+ /*
+ * Drain iff @kn was activated. This avoids draining and
+ * its lockdep annotations for nodes which have never been
+ * activated and allows embedding kernfs_remove() in create
+ * error paths without worrying about draining.
+ */
+ if (kn->flags & KERNFS_ACTIVATED)
+ kernfs_drain(pos);
+ else
+ WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
+
+ /*
+ * kernfs_unlink_sibling() succeeds once per node. Use it
+ * to decide who's responsible for cleanups.
+ */
+ if (!pos->parent || kernfs_unlink_sibling(pos)) {
+ struct kernfs_iattrs *ps_iattr =
+ pos->parent ? pos->parent->iattr : NULL;
+
+ /* update timestamps on the parent */
+ if (ps_iattr) {
+ ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
+ ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
+ }
+
+ kernfs_put(pos);
+ }
+
+ kernfs_put(pos);
+ } while (pos != kn);
}
/**
@@ -857,11 +1060,140 @@ static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
*/
void kernfs_remove(struct kernfs_node *kn)
{
- struct kernfs_addrm_cxt acxt;
+ mutex_lock(&kernfs_mutex);
+ __kernfs_remove(kn);
+ mutex_unlock(&kernfs_mutex);
+}
- kernfs_addrm_start(&acxt);
- __kernfs_remove(&acxt, kn);
- kernfs_addrm_finish(&acxt);
+/**
+ * kernfs_break_active_protection - break out of active protection
+ * @kn: the self kernfs_node
+ *
+ * The caller must be running off of a kernfs operation which is invoked
+ * with an active reference - e.g. one of kernfs_ops. Each invocation of
+ * this function must also be matched with an invocation of
+ * kernfs_unbreak_active_protection().
+ *
+ * This function releases the active reference of @kn the caller is
+ * holding. Once this function is called, @kn may be removed at any point
+ * and the caller is solely responsible for ensuring that the objects it
+ * dereferences are accessible.
+ */
+void kernfs_break_active_protection(struct kernfs_node *kn)
+{
+ /*
+ * Take out ourself out of the active ref dependency chain. If
+ * we're called without an active ref, lockdep will complain.
+ */
+ kernfs_put_active(kn);
+}
+
+/**
+ * kernfs_unbreak_active_protection - undo kernfs_break_active_protection()
+ * @kn: the self kernfs_node
+ *
+ * If kernfs_break_active_protection() was called, this function must be
+ * invoked before finishing the kernfs operation. Note that while this
+ * function restores the active reference, it doesn't and can't actually
+ * restore the active protection - @kn may already or be in the process of
+ * being removed. Once kernfs_break_active_protection() is invoked, that
+ * protection is irreversibly gone for the kernfs operation instance.
+ *
+ * While this function may be called at any point after
+ * kernfs_break_active_protection() is invoked, its most useful location
+ * would be right before the enclosing kernfs operation returns.
+ */
+void kernfs_unbreak_active_protection(struct kernfs_node *kn)
+{
+ /*
+ * @kn->active could be in any state; however, the increment we do
+ * here will be undone as soon as the enclosing kernfs operation
+ * finishes and this temporary bump can't break anything. If @kn
+ * is alive, nothing changes. If @kn is being deactivated, the
+ * soon-to-follow put will either finish deactivation or restore
+ * deactivated state. If @kn is already removed, the temporary
+ * bump is guaranteed to be gone before @kn is released.
+ */
+ atomic_inc(&kn->active);
+ if (kernfs_lockdep(kn))
+ rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_);
+}
+
+/**
+ * kernfs_remove_self - remove a kernfs_node from its own method
+ * @kn: the self kernfs_node to remove
+ *
+ * The caller must be running off of a kernfs operation which is invoked
+ * with an active reference - e.g. one of kernfs_ops. This can be used to
+ * implement a file operation which deletes itself.
+ *
+ * For example, the "delete" file for a sysfs device directory can be
+ * implemented by invoking kernfs_remove_self() on the "delete" file
+ * itself. This function breaks the circular dependency of trying to
+ * deactivate self while holding an active ref itself. It isn't necessary
+ * to modify the usual removal path to use kernfs_remove_self(). The
+ * "delete" implementation can simply invoke kernfs_remove_self() on self
+ * before proceeding with the usual removal path. kernfs will ignore later
+ * kernfs_remove() on self.
+ *
+ * kernfs_remove_self() can be called multiple times concurrently on the
+ * same kernfs_node. Only the first one actually performs removal and
+ * returns %true. All others will wait until the kernfs operation which
+ * won self-removal finishes and return %false. Note that the losers wait
+ * for the completion of not only the winning kernfs_remove_self() but also
+ * the whole kernfs_ops which won the arbitration. This can be used to
+ * guarantee, for example, all concurrent writes to a "delete" file to
+ * finish only after the whole operation is complete.
+ */
+bool kernfs_remove_self(struct kernfs_node *kn)
+{
+ bool ret;
+
+ mutex_lock(&kernfs_mutex);
+ kernfs_break_active_protection(kn);
+
+ /*
+ * SUICIDAL is used to arbitrate among competing invocations. Only
+ * the first one will actually perform removal. When the removal
+ * is complete, SUICIDED is set and the active ref is restored
+ * while holding kernfs_mutex. The ones which lost arbitration
+ * waits for SUICDED && drained which can happen only after the
+ * enclosing kernfs operation which executed the winning instance
+ * of kernfs_remove_self() finished.
+ */
+ if (!(kn->flags & KERNFS_SUICIDAL)) {
+ kn->flags |= KERNFS_SUICIDAL;
+ __kernfs_remove(kn);
+ kn->flags |= KERNFS_SUICIDED;
+ ret = true;
+ } else {
+ wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq;
+ DEFINE_WAIT(wait);
+
+ while (true) {
+ prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE);
+
+ if ((kn->flags & KERNFS_SUICIDED) &&
+ atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
+ break;
+
+ mutex_unlock(&kernfs_mutex);
+ schedule();
+ mutex_lock(&kernfs_mutex);
+ }
+ finish_wait(waitq, &wait);
+ WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb));
+ ret = false;
+ }
+
+ /*
+ * This must be done while holding kernfs_mutex; otherwise, waiting
+ * for SUICIDED && deactivated could finish prematurely.
+ */
+ kernfs_unbreak_active_protection(kn);
+
+ mutex_unlock(&kernfs_mutex);
+ return ret;
}
/**
@@ -876,7 +1208,6 @@ void kernfs_remove(struct kernfs_node *kn)
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
const void *ns)
{
- struct kernfs_addrm_cxt acxt;
struct kernfs_node *kn;
if (!parent) {
@@ -885,13 +1216,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
return -ENOENT;
}
- kernfs_addrm_start(&acxt);
+ mutex_lock(&kernfs_mutex);
kn = kernfs_find_ns(parent, name, ns);
if (kn)
- __kernfs_remove(&acxt, kn);
+ __kernfs_remove(kn);
- kernfs_addrm_finish(&acxt);
+ mutex_unlock(&kernfs_mutex);
if (kn)
return 0;
@@ -909,12 +1240,18 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
const char *new_name, const void *new_ns)
{
+ struct kernfs_node *old_parent;
+ const char *old_name = NULL;
int error;
+ /* can't move or rename root */
+ if (!kn->parent)
+ return -EINVAL;
+
mutex_lock(&kernfs_mutex);
error = -ENOENT;
- if ((kn->flags | new_parent->flags) & KERNFS_REMOVED)
+ if (!kernfs_active(kn) || !kernfs_active(new_parent))
goto out;
error = 0;
@@ -932,13 +1269,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
new_name = kstrdup(new_name, GFP_KERNEL);
if (!new_name)
goto out;
-
- if (kn->flags & KERNFS_STATIC_NAME)
- kn->flags &= ~KERNFS_STATIC_NAME;
- else
- kfree(kn->name);
-
- kn->name = new_name;
+ } else {
+ new_name = NULL;
}
/*
@@ -946,12 +1278,29 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
*/
kernfs_unlink_sibling(kn);
kernfs_get(new_parent);
- kernfs_put(kn->parent);
+
+ /* rename_lock protects ->parent and ->name accessors */
+ spin_lock_irq(&kernfs_rename_lock);
+
+ old_parent = kn->parent;
+ kn->parent = new_parent;
+
kn->ns = new_ns;
+ if (new_name) {
+ if (!(kn->flags & KERNFS_STATIC_NAME))
+ old_name = kn->name;
+ kn->flags &= ~KERNFS_STATIC_NAME;
+ kn->name = new_name;
+ }
+
+ spin_unlock_irq(&kernfs_rename_lock);
+
kn->hash = kernfs_name_hash(kn->name, kn->ns);
- kn->parent = new_parent;
kernfs_link_sibling(kn);
+ kernfs_put(old_parent);
+ kfree(old_name);
+
error = 0;
out:
mutex_unlock(&kernfs_mutex);
@@ -974,7 +1323,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns,
struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
{
if (pos) {
- int valid = !(pos->flags & KERNFS_REMOVED) &&
+ int valid = kernfs_active(pos) &&
pos->parent == parent && hash == pos->hash;
kernfs_put(pos);
if (!valid)
@@ -993,8 +1342,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns,
break;
}
}
- /* Skip over entries in the wrong namespace */
- while (pos && pos->ns != ns) {
+ /* Skip over entries which are dying/dead or in the wrong namespace */
+ while (pos && (!kernfs_active(pos) || pos->ns != ns)) {
struct rb_node *node = rb_next(&pos->rb);
if (!node)
pos = NULL;
@@ -1008,14 +1357,15 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
{
pos = kernfs_dir_pos(ns, parent, ino, pos);
- if (pos)
+ if (pos) {
do {
struct rb_node *node = rb_next(&pos->rb);
if (!node)
pos = NULL;
else
pos = rb_to_kn(node);
- } while (pos && pos->ns != ns);
+ } while (pos && (!kernfs_active(pos) || pos->ns != ns));
+ }
return pos;
}
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index dbf397bfdff2..8034706a7af8 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -252,10 +252,18 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
struct kernfs_open_file *of = kernfs_of(file);
- ssize_t len = min_t(size_t, count, PAGE_SIZE);
const struct kernfs_ops *ops;
+ size_t len;
char *buf;
+ if (of->atomic_write_len) {
+ len = count;
+ if (len > of->atomic_write_len)
+ return -E2BIG;
+ } else {
+ len = min_t(size_t, count, PAGE_SIZE);
+ }
+
buf = kmalloc(len + 1, GFP_KERNEL);
if (!buf)
return -ENOMEM;
@@ -653,6 +661,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
of->file = file;
/*
+ * Write path needs to atomic_write_len outside active reference.
+ * Cache it in open_file. See kernfs_fop_write() for details.
+ */
+ of->atomic_write_len = ops->atomic_write_len;
+
+ /*
* Always instantiate seq_file even if read access doesn't use
* seq_file or is not requested. This unifies private data access
* and readable regular files are the vast majority anyway.
@@ -820,7 +834,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
bool name_is_static,
struct lock_class_key *key)
{
- struct kernfs_addrm_cxt acxt;
struct kernfs_node *kn;
unsigned flags;
int rc;
@@ -855,10 +868,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
if (ops->mmap)
kn->flags |= KERNFS_HAS_MMAP;
- kernfs_addrm_start(&acxt);
- rc = kernfs_add_one(&acxt, kn);
- kernfs_addrm_finish(&acxt);
-
+ rc = kernfs_add_one(kn);
if (rc) {
kernfs_put(kn);
return ERR_PTR(rc);
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index eb536b76374a..8be13b2a079b 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -26,7 +26,8 @@ struct kernfs_iattrs {
struct simple_xattrs xattrs;
};
-#define KN_DEACTIVATED_BIAS INT_MIN
+/* +1 to avoid triggering overflow warning when negating it */
+#define KN_DEACTIVATED_BIAS (INT_MIN + 1)
/* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */
@@ -45,13 +46,6 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
}
/*
- * Context structure to be used while adding/removing nodes.
- */
-struct kernfs_addrm_cxt {
- struct kernfs_node *removed;
-};
-
-/*
* mount.c
*/
struct kernfs_super_info {
@@ -71,6 +65,7 @@ struct kernfs_super_info {
};
#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
+extern const struct super_operations kernfs_sops;
extern struct kmem_cache *kernfs_node_cache;
/*
@@ -100,9 +95,7 @@ extern const struct inode_operations kernfs_dir_iops;
struct kernfs_node *kernfs_get_active(struct kernfs_node *kn);
void kernfs_put_active(struct kernfs_node *kn);
-void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt);
-int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn);
-void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt);
+int kernfs_add_one(struct kernfs_node *kn);
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
const char *name, umode_t mode,
unsigned flags);
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 0f4152defe7b..6a5f04ac8704 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -19,12 +19,49 @@
struct kmem_cache *kernfs_node_cache;
-static const struct super_operations kernfs_sops = {
+static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+ struct kernfs_root *root = kernfs_info(sb)->root;
+ struct kernfs_syscall_ops *scops = root->syscall_ops;
+
+ if (scops && scops->remount_fs)
+ return scops->remount_fs(root, flags, data);
+ return 0;
+}
+
+static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
+{
+ struct kernfs_root *root = kernfs_root(dentry->d_fsdata);
+ struct kernfs_syscall_ops *scops = root->syscall_ops;
+
+ if (scops && scops->show_options)
+ return scops->show_options(sf, root);
+ return 0;
+}
+
+const struct super_operations kernfs_sops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
.evict_inode = kernfs_evict_inode,
+
+ .remount_fs = kernfs_sop_remount_fs,
+ .show_options = kernfs_sop_show_options,
};
+/**
+ * kernfs_root_from_sb - determine kernfs_root associated with a super_block
+ * @sb: the super_block in question
+ *
+ * Return the kernfs_root associated with @sb. If @sb is not a kernfs one,
+ * %NULL is returned.
+ */
+struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
+{
+ if (sb->s_op == &kernfs_sops)
+ return kernfs_info(sb)->root;
+ return NULL;
+}
+
static int kernfs_fill_super(struct super_block *sb)
{
struct kernfs_super_info *info = kernfs_info(sb);
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 4d457055acb9..8a198898e39a 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -27,7 +27,6 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
struct kernfs_node *target)
{
struct kernfs_node *kn;
- struct kernfs_addrm_cxt acxt;
int error;
kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK);
@@ -39,10 +38,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
kn->symlink.target_kn = target;
kernfs_get(target); /* ref owned by symlink */
- kernfs_addrm_start(&acxt);
- error = kernfs_add_one(&acxt, kn);
- kernfs_addrm_finish(&acxt);
-
+ error = kernfs_add_one(kn);
if (!error)
return kn;
diff --git a/fs/mount.h b/fs/mount.h
index a17458ca6f29..b29e42f05f34 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -19,13 +19,13 @@ struct mnt_pcp {
};
struct mountpoint {
- struct list_head m_hash;
+ struct hlist_node m_hash;
struct dentry *m_dentry;
int m_count;
};
struct mount {
- struct list_head mnt_hash;
+ struct hlist_node mnt_hash;
struct mount *mnt_parent;
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
diff --git a/fs/namei.c b/fs/namei.c
index 385f7817bfcc..4b491b431990 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
return false;
if (!d_mountpoint(path->dentry))
- break;
+ return true;
mounted = __lookup_mnt(path->mnt, path->dentry);
if (!mounted)
@@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
*/
*inode = path->dentry->d_inode;
}
- return true;
-}
-
-static void follow_mount_rcu(struct nameidata *nd)
-{
- while (d_mountpoint(nd->path.dentry)) {
- struct mount *mounted;
- mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
- if (!mounted)
- break;
- nd->path.mnt = &mounted->mnt;
- nd->path.dentry = mounted->mnt.mnt_root;
- nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
- }
+ return read_seqretry(&mount_lock, nd->m_seq);
}
static int follow_dotdot_rcu(struct nameidata *nd)
@@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd)
break;
nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
}
- follow_mount_rcu(nd);
+ while (d_mountpoint(nd->path.dentry)) {
+ struct mount *mounted;
+ mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
+ if (!mounted)
+ break;
+ nd->path.mnt = &mounted->mnt;
+ nd->path.dentry = mounted->mnt.mnt_root;
+ nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
+ if (!read_seqretry(&mount_lock, nd->m_seq))
+ goto failed;
+ }
nd->inode = nd->path.dentry->d_inode;
return 0;
@@ -1884,7 +1881,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
- if (f.need_put)
+ if (f.flags & FDPUT_FPUT)
*fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
diff --git a/fs/namespace.c b/fs/namespace.c
index 22e536705c45..2ffc5a2905d4 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -23,11 +23,34 @@
#include <linux/uaccess.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
+#include <linux/bootmem.h>
#include "pnode.h"
#include "internal.h"
-#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
-#define HASH_SIZE (1UL << HASH_SHIFT)
+static unsigned int m_hash_mask __read_mostly;
+static unsigned int m_hash_shift __read_mostly;
+static unsigned int mp_hash_mask __read_mostly;
+static unsigned int mp_hash_shift __read_mostly;
+
+static __initdata unsigned long mhash_entries;
+static int __init set_mhash_entries(char *str)
+{
+ if (!str)
+ return 0;
+ mhash_entries = simple_strtoul(str, &str, 0);
+ return 1;
+}
+__setup("mhash_entries=", set_mhash_entries);
+
+static __initdata unsigned long mphash_entries;
+static int __init set_mphash_entries(char *str)
+{
+ if (!str)
+ return 0;
+ mphash_entries = simple_strtoul(str, &str, 0);
+ return 1;
+}
+__setup("mphash_entries=", set_mphash_entries);
static int event;
static DEFINE_IDA(mnt_id_ida);
@@ -36,8 +59,8 @@ static DEFINE_SPINLOCK(mnt_id_lock);
static int mnt_id_start = 0;
static int mnt_group_start = 1;
-static struct list_head *mount_hashtable __read_mostly;
-static struct list_head *mountpoint_hashtable __read_mostly;
+static struct hlist_head *mount_hashtable __read_mostly;
+static struct hlist_head *mountpoint_hashtable __read_mostly;
static struct kmem_cache *mnt_cache __read_mostly;
static DECLARE_RWSEM(namespace_sem);
@@ -55,12 +78,19 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
-static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
+static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
{
unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
tmp += ((unsigned long)dentry / L1_CACHE_BYTES);
- tmp = tmp + (tmp >> HASH_SHIFT);
- return tmp & (HASH_SIZE - 1);
+ tmp = tmp + (tmp >> m_hash_shift);
+ return &mount_hashtable[tmp & m_hash_mask];
+}
+
+static inline struct hlist_head *mp_hash(struct dentry *dentry)
+{
+ unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES);
+ tmp = tmp + (tmp >> mp_hash_shift);
+ return &mountpoint_hashtable[tmp & mp_hash_mask];
}
/*
@@ -187,7 +217,7 @@ static struct mount *alloc_vfsmnt(const char *name)
mnt->mnt_writers = 0;
#endif
- INIT_LIST_HEAD(&mnt->mnt_hash);
+ INIT_HLIST_NODE(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
INIT_LIST_HEAD(&mnt->mnt_mounts);
INIT_LIST_HEAD(&mnt->mnt_list);
@@ -575,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq)
*/
struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
{
- struct list_head *head = mount_hashtable + hash(mnt, dentry);
+ struct hlist_head *head = m_hash(mnt, dentry);
struct mount *p;
- list_for_each_entry_rcu(p, head, mnt_hash)
+ hlist_for_each_entry_rcu(p, head, mnt_hash)
if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
return p;
return NULL;
@@ -590,13 +620,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
*/
struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
{
- struct list_head *head = mount_hashtable + hash(mnt, dentry);
- struct mount *p;
-
- list_for_each_entry_reverse(p, head, mnt_hash)
- if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry)
- return p;
- return NULL;
+ struct mount *p, *res;
+ res = p = __lookup_mnt(mnt, dentry);
+ if (!p)
+ goto out;
+ hlist_for_each_entry_continue(p, mnt_hash) {
+ if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
+ break;
+ res = p;
+ }
+out:
+ return res;
}
/*
@@ -633,11 +667,11 @@ struct vfsmount *lookup_mnt(struct path *path)
static struct mountpoint *new_mountpoint(struct dentry *dentry)
{
- struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry);
+ struct hlist_head *chain = mp_hash(dentry);
struct mountpoint *mp;
int ret;
- list_for_each_entry(mp, chain, m_hash) {
+ hlist_for_each_entry(mp, chain, m_hash) {
if (mp->m_dentry == dentry) {
/* might be worth a WARN_ON() */
if (d_unlinked(dentry))
@@ -659,7 +693,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry)
mp->m_dentry = dentry;
mp->m_count = 1;
- list_add(&mp->m_hash, chain);
+ hlist_add_head(&mp->m_hash, chain);
return mp;
}
@@ -670,7 +704,7 @@ static void put_mountpoint(struct mountpoint *mp)
spin_lock(&dentry->d_lock);
dentry->d_flags &= ~DCACHE_MOUNTED;
spin_unlock(&dentry->d_lock);
- list_del(&mp->m_hash);
+ hlist_del(&mp->m_hash);
kfree(mp);
}
}
@@ -712,7 +746,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path)
mnt->mnt_parent = mnt;
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
list_del_init(&mnt->mnt_child);
- list_del_init(&mnt->mnt_hash);
+ hlist_del_init_rcu(&mnt->mnt_hash);
put_mountpoint(mnt->mnt_mp);
mnt->mnt_mp = NULL;
}
@@ -739,15 +773,14 @@ static void attach_mnt(struct mount *mnt,
struct mountpoint *mp)
{
mnt_set_mountpoint(parent, mp, mnt);
- list_add_tail(&mnt->mnt_hash, mount_hashtable +
- hash(&parent->mnt, mp->m_dentry));
+ hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
}
/*
* vfsmount lock must be held for write
*/
-static void commit_tree(struct mount *mnt)
+static void commit_tree(struct mount *mnt, struct mount *shadows)
{
struct mount *parent = mnt->mnt_parent;
struct mount *m;
@@ -762,8 +795,11 @@ static void commit_tree(struct mount *mnt)
list_splice(&head, n->list.prev);
- list_add_tail(&mnt->mnt_hash, mount_hashtable +
- hash(&parent->mnt, mnt->mnt_mountpoint));
+ if (shadows)
+ hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash);
+ else
+ hlist_add_head_rcu(&mnt->mnt_hash,
+ m_hash(&parent->mnt, mnt->mnt_mountpoint));
list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
touch_mnt_namespace(n);
}
@@ -1153,26 +1189,28 @@ int may_umount(struct vfsmount *mnt)
EXPORT_SYMBOL(may_umount);
-static LIST_HEAD(unmounted); /* protected by namespace_sem */
+static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static void namespace_unlock(void)
{
struct mount *mnt;
- LIST_HEAD(head);
+ struct hlist_head head = unmounted;
- if (likely(list_empty(&unmounted))) {
+ if (likely(hlist_empty(&head))) {
up_write(&namespace_sem);
return;
}
- list_splice_init(&unmounted, &head);
+ head.first->pprev = &head.first;
+ INIT_HLIST_HEAD(&unmounted);
+
up_write(&namespace_sem);
synchronize_rcu();
- while (!list_empty(&head)) {
- mnt = list_first_entry(&head, struct mount, mnt_hash);
- list_del_init(&mnt->mnt_hash);
+ while (!hlist_empty(&head)) {
+ mnt = hlist_entry(head.first, struct mount, mnt_hash);
+ hlist_del_init(&mnt->mnt_hash);
if (mnt->mnt_ex_mountpoint.mnt)
path_put(&mnt->mnt_ex_mountpoint);
mntput(&mnt->mnt);
@@ -1193,16 +1231,19 @@ static inline void namespace_lock(void)
*/
void umount_tree(struct mount *mnt, int how)
{
- LIST_HEAD(tmp_list);
+ HLIST_HEAD(tmp_list);
struct mount *p;
+ struct mount *last = NULL;
- for (p = mnt; p; p = next_mnt(p, mnt))
- list_move(&p->mnt_hash, &tmp_list);
+ for (p = mnt; p; p = next_mnt(p, mnt)) {
+ hlist_del_init_rcu(&p->mnt_hash);
+ hlist_add_head(&p->mnt_hash, &tmp_list);
+ }
if (how)
propagate_umount(&tmp_list);
- list_for_each_entry(p, &tmp_list, mnt_hash) {
+ hlist_for_each_entry(p, &tmp_list, mnt_hash) {
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
__touch_mnt_namespace(p->mnt_ns);
@@ -1220,8 +1261,13 @@ void umount_tree(struct mount *mnt, int how)
p->mnt_mp = NULL;
}
change_mnt_propagation(p, MS_PRIVATE);
+ last = p;
+ }
+ if (last) {
+ last->mnt_hash.next = unmounted.first;
+ unmounted.first = tmp_list.first;
+ unmounted.first->pprev = &unmounted.first;
}
- list_splice(&tmp_list, &unmounted);
}
static void shrink_submounts(struct mount *mnt);
@@ -1605,24 +1651,23 @@ static int attach_recursive_mnt(struct mount *source_mnt,
struct mountpoint *dest_mp,
struct path *parent_path)
{
- LIST_HEAD(tree_list);
+ HLIST_HEAD(tree_list);
struct mount *child, *p;
+ struct hlist_node *n;
int err;
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
goto out;
- }
- err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
- if (err)
- goto out_cleanup_ids;
-
- lock_mount_hash();
-
- if (IS_MNT_SHARED(dest_mnt)) {
+ err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list);
+ if (err)
+ goto out_cleanup_ids;
+ lock_mount_hash();
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
+ } else {
+ lock_mount_hash();
}
if (parent_path) {
detach_mnt(source_mnt, parent_path);
@@ -1630,20 +1675,22 @@ static int attach_recursive_mnt(struct mount *source_mnt,
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
- commit_tree(source_mnt);
+ commit_tree(source_mnt, NULL);
}
- list_for_each_entry_safe(child, p, &tree_list, mnt_hash) {
- list_del_init(&child->mnt_hash);
- commit_tree(child);
+ hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
+ struct mount *q;
+ hlist_del_init(&child->mnt_hash);
+ q = __lookup_mnt_last(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ commit_tree(child, q);
}
unlock_mount_hash();
return 0;
out_cleanup_ids:
- if (IS_MNT_SHARED(dest_mnt))
- cleanup_group_ids(source_mnt, NULL);
+ cleanup_group_ids(source_mnt, NULL);
out:
return err;
}
@@ -2777,18 +2824,24 @@ void __init mnt_init(void)
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
- mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
- mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
+ mount_hashtable = alloc_large_system_hash("Mount-cache",
+ sizeof(struct hlist_head),
+ mhash_entries, 19,
+ 0,
+ &m_hash_shift, &m_hash_mask, 0, 0);
+ mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache",
+ sizeof(struct hlist_head),
+ mphash_entries, 19,
+ 0,
+ &mp_hash_shift, &mp_hash_mask, 0, 0);
if (!mount_hashtable || !mountpoint_hashtable)
panic("Failed to allocate mount hash table\n");
- printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE);
-
- for (u = 0; u < HASH_SIZE; u++)
- INIT_LIST_HEAD(&mount_hashtable[u]);
- for (u = 0; u < HASH_SIZE; u++)
- INIT_LIST_HEAD(&mountpoint_hashtable[u]);
+ for (u = 0; u <= m_hash_mask; u++)
+ INIT_HLIST_HEAD(&mount_hashtable[u]);
+ for (u = 0; u <= mp_hash_mask; u++)
+ INIT_HLIST_HEAD(&mountpoint_hashtable[u]);
kernfs_init();
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 017d3cb5e99b..6d7be3f80356 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -449,6 +449,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
fh_lock(fhp);
host_err = notify_change(dentry, iap, NULL);
fh_unlock(fhp);
+ err = nfserrno(host_err);
out_put_write_access:
if (size_change)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 8450262bcf2a..51632c40e896 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2393,8 +2393,8 @@ out_dio:
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
if (ret < 0)
written = ret;
@@ -2407,8 +2407,8 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
}
/*
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 1324e6600e57..ca5ce14cbddc 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -346,7 +346,9 @@ int ocfs2_cluster_connect(const char *stack_name,
strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
new_conn->cc_namelen = grouplen;
- strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1);
+ if (cluster_name_len)
+ strlcpy(new_conn->cc_cluster_name, cluster_name,
+ CLUSTER_NAME_MAX + 1);
new_conn->cc_cluster_name_len = cluster_name_len;
new_conn->cc_recovery_handler = recovery_handler;
new_conn->cc_recovery_data = recovery_data;
diff --git a/fs/open.c b/fs/open.c
index 4b3e1edf2fe4..b9ed8b25c108 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f,
return 0;
}
+ /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
+ if (S_ISREG(inode->i_mode))
+ f->f_mode |= FMODE_ATOMIC_POS;
+
f->f_op = fops_get(inode->i_fop);
if (unlikely(WARN_ON(!f->f_op))) {
error = -ENODEV;
diff --git a/fs/pnode.c b/fs/pnode.c
index c7221bb19801..88396df725b4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -220,14 +220,14 @@ static struct mount *get_source(struct mount *dest,
* @tree_list : list of heads of trees to be attached.
*/
int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
- struct mount *source_mnt, struct list_head *tree_list)
+ struct mount *source_mnt, struct hlist_head *tree_list)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
struct mount *m, *child;
int ret = 0;
struct mount *prev_dest_mnt = dest_mnt;
struct mount *prev_src_mnt = source_mnt;
- LIST_HEAD(tmp_list);
+ HLIST_HEAD(tmp_list);
for (m = propagation_next(dest_mnt, dest_mnt); m;
m = propagation_next(m, dest_mnt)) {
@@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
child = copy_tree(source, source->mnt.mnt_root, type);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
- list_splice(tree_list, tmp_list.prev);
+ tmp_list = *tree_list;
+ tmp_list.first->pprev = &tmp_list.first;
+ INIT_HLIST_HEAD(tree_list);
goto out;
}
if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) {
mnt_set_mountpoint(m, dest_mp, child);
- list_add_tail(&child->mnt_hash, tree_list);
+ hlist_add_head(&child->mnt_hash, tree_list);
} else {
/*
* This can happen if the parent mount was bind mounted
* on some subdirectory of a shared/slave mount.
*/
- list_add_tail(&child->mnt_hash, &tmp_list);
+ hlist_add_head(&child->mnt_hash, &tmp_list);
}
prev_dest_mnt = m;
prev_src_mnt = child;
}
out:
lock_mount_hash();
- while (!list_empty(&tmp_list)) {
- child = list_first_entry(&tmp_list, struct mount, mnt_hash);
+ while (!hlist_empty(&tmp_list)) {
+ child = hlist_entry(tmp_list.first, struct mount, mnt_hash);
umount_tree(child, 0);
}
unlock_mount_hash();
@@ -338,8 +340,10 @@ static void __propagate_umount(struct mount *mnt)
* umount the child only if the child has no
* other children
*/
- if (child && list_empty(&child->mnt_mounts))
- list_move_tail(&child->mnt_hash, &mnt->mnt_hash);
+ if (child && list_empty(&child->mnt_mounts)) {
+ hlist_del_init_rcu(&child->mnt_hash);
+ hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash);
+ }
}
}
@@ -350,11 +354,11 @@ static void __propagate_umount(struct mount *mnt)
*
* vfsmount lock must be held for write
*/
-int propagate_umount(struct list_head *list)
+int propagate_umount(struct hlist_head *list)
{
struct mount *mnt;
- list_for_each_entry(mnt, list, mnt_hash)
+ hlist_for_each_entry(mnt, list, mnt_hash)
__propagate_umount(mnt);
return 0;
}
diff --git a/fs/pnode.h b/fs/pnode.h
index 59e7eda1851e..fc28a27fa892 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct mount *mnt)
void change_mnt_propagation(struct mount *, int);
int propagate_mnt(struct mount *, struct mountpoint *, struct mount *,
- struct list_head *);
-int propagate_umount(struct list_head *);
+ struct hlist_head *);
+int propagate_umount(struct hlist_head *);
int propagate_mount_busy(struct mount *, int);
void mnt_release_group_id(struct mount *);
int get_dominating_id(struct mount *mnt, const struct path *root);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 51507065263b..b9760628e1fd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1824,6 +1824,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
+ rc = -ENOENT;
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 6f599c62f0cc..9d231e9e5f0e 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -9,7 +9,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/irqnr.h>
-#include <asm/cputime.h>
+#include <linux/cputime.h>
#include <linux/tick.h>
#ifndef arch_irq_stat_cpu
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 7141b8d0ca9e..33de567c25af 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -5,7 +5,7 @@
#include <linux/seq_file.h>
#include <linux/time.h>
#include <linux/kernel_stat.h>
-#include <asm/cputime.h>
+#include <linux/cputime.h>
static int uptime_proc_show(struct seq_file *m, void *v)
{
diff --git a/fs/read_write.c b/fs/read_write.c
index edc5746a902a..31c6efa43183 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -264,10 +264,22 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
}
EXPORT_SYMBOL(vfs_llseek);
+static inline struct fd fdget_pos(int fd)
+{
+ return __to_fd(__fdget_pos(fd));
+}
+
+static inline void fdput_pos(struct fd f)
+{
+ if (f.flags & FDPUT_POS_UNLOCK)
+ mutex_unlock(&f.file->f_pos_lock);
+ fdput(f);
+}
+
SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
{
off_t retval;
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
if (!f.file)
return -EBADF;
@@ -278,7 +290,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
- fdput(f);
+ fdput_pos(f);
return retval;
}
@@ -295,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
unsigned int, whence)
{
int retval;
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
loff_t offset;
if (!f.file)
@@ -315,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
retval = 0;
}
out_putf:
- fdput(f);
+ fdput_pos(f);
return retval;
}
#endif
@@ -498,7 +510,7 @@ static inline void file_pos_write(struct file *file, loff_t pos)
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -506,7 +518,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
ret = vfs_read(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
return ret;
}
@@ -514,7 +526,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -522,7 +534,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
ret = vfs_write(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
return ret;
@@ -797,7 +809,7 @@ EXPORT_SYMBOL(vfs_writev);
SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -805,7 +817,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_readv(f.file, vec, vlen, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
if (ret > 0)
@@ -817,7 +829,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
@@ -825,7 +837,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_writev(f.file, vec, vlen, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
- fdput(f);
+ fdput_pos(f);
}
if (ret > 0)
@@ -968,7 +980,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret;
loff_t pos;
@@ -978,13 +990,13 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
ret = compat_readv(f.file, vec, vlen, &pos);
if (ret >= 0)
f.file->f_pos = pos;
- fdput(f);
+ fdput_pos(f);
return ret;
}
-COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
- const struct compat_iovec __user *,vec,
- unsigned long, vlen, loff_t, pos)
+static long __compat_sys_preadv64(unsigned long fd,
+ const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t pos)
{
struct fd f;
ssize_t ret;
@@ -1001,12 +1013,22 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
return ret;
}
+#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
+COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, loff_t, pos)
+{
+ return __compat_sys_preadv64(fd, vec, vlen, pos);
+}
+#endif
+
COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return compat_sys_preadv64(fd, vec, vlen, pos);
+
+ return __compat_sys_preadv64(fd, vec, vlen, pos);
}
static size_t compat_writev(struct file *file,
@@ -1035,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec,
compat_ulong_t, vlen)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_pos(fd);
ssize_t ret;
loff_t pos;
@@ -1045,13 +1067,13 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
ret = compat_writev(f.file, vec, vlen, &pos);
if (ret >= 0)
f.file->f_pos = pos;
- fdput(f);
+ fdput_pos(f);
return ret;
}
-COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
- const struct compat_iovec __user *,vec,
- unsigned long, vlen, loff_t, pos)
+static long __compat_sys_pwritev64(unsigned long fd,
+ const struct compat_iovec __user *vec,
+ unsigned long vlen, loff_t pos)
{
struct fd f;
ssize_t ret;
@@ -1068,12 +1090,22 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
return ret;
}
+#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
+COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
+ const struct compat_iovec __user *,vec,
+ unsigned long, vlen, loff_t, pos)
+{
+ return __compat_sys_pwritev64(fd, vec, vlen, pos);
+}
+#endif
+
COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return compat_sys_pwritev64(fd, vec, vlen, pos);
+
+ return __compat_sys_pwritev64(fd, vec, vlen, pos);
}
#endif
diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig
index 8c41feacbac5..b2756014508c 100644
--- a/fs/sysfs/Kconfig
+++ b/fs/sysfs/Kconfig
@@ -1,6 +1,7 @@
config SYSFS
bool "sysfs file system support" if EXPERT
default y
+ select KERNFS
help
The sysfs filesystem is a virtual filesystem that the kernel uses to
export internal kernel objects, their attributes, and their
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index ee0d761c3179..0b45ff42f374 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -19,39 +19,18 @@
DEFINE_SPINLOCK(sysfs_symlink_target_lock);
-/**
- * sysfs_pathname - return full path to sysfs dirent
- * @kn: kernfs_node whose path we want
- * @path: caller allocated buffer of size PATH_MAX
- *
- * Gives the name "/" to the sysfs_root entry; any path returned
- * is relative to wherever sysfs is mounted.
- */
-static char *sysfs_pathname(struct kernfs_node *kn, char *path)
-{
- if (kn->parent) {
- sysfs_pathname(kn->parent, path);
- strlcat(path, "/", PATH_MAX);
- }
- strlcat(path, kn->name, PATH_MAX);
- return path;
-}
-
void sysfs_warn_dup(struct kernfs_node *parent, const char *name)
{
- char *path;
+ char *buf, *path = NULL;
- path = kzalloc(PATH_MAX, GFP_KERNEL);
- if (path) {
- sysfs_pathname(parent, path);
- strlcat(path, "/", PATH_MAX);
- strlcat(path, name, PATH_MAX);
- }
+ buf = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (buf)
+ path = kernfs_path(parent, buf, PATH_MAX);
- WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n",
- path ? path : name);
+ WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n",
+ path, name);
- kfree(path);
+ kfree(buf);
}
/**
@@ -122,9 +101,13 @@ void sysfs_remove_dir(struct kobject *kobj)
int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name,
const void *new_ns)
{
- struct kernfs_node *parent = kobj->sd->parent;
+ struct kernfs_node *parent;
+ int ret;
- return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns);
+ parent = kernfs_get_parent(kobj->sd);
+ ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns);
+ kernfs_put(parent);
+ return ret;
}
int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
@@ -133,7 +116,6 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj,
struct kernfs_node *kn = kobj->sd;
struct kernfs_node *new_parent;
- BUG_ON(!kn->parent);
new_parent = new_parent_kobj && new_parent_kobj->sd ?
new_parent_kobj->sd : sysfs_root_kn;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 810cf6e613e5..1b8b91b67fdb 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -372,6 +372,29 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
+/**
+ * sysfs_remove_file_self - remove an object attribute from its own method
+ * @kobj: object we're acting for
+ * @attr: attribute descriptor
+ *
+ * See kernfs_remove_self() for details.
+ */
+bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr)
+{
+ struct kernfs_node *parent = kobj->sd;
+ struct kernfs_node *kn;
+ bool ret;
+
+ kn = kernfs_find_and_get(parent, attr->name);
+ if (WARN_ON_ONCE(!kn))
+ return false;
+
+ ret = kernfs_remove_self(kn);
+
+ kernfs_put(kn);
+ return ret;
+}
+
void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
{
int i;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 6b579387c67a..aa0406895b53 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -70,8 +70,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj,
if (grp->bin_attrs) {
for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
if (update)
- sysfs_remove_bin_file(kobj, *bin_attr);
- error = sysfs_create_bin_file(kobj, *bin_attr);
+ kernfs_remove_by_name(parent,
+ (*bin_attr)->attr.name);
+ error = sysfs_add_file_mode_ns(parent,
+ &(*bin_attr)->attr, true,
+ (*bin_attr)->attr.mode, NULL);
if (error)
break;
}
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 3eaf5c6622eb..a66ad6196f59 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -63,7 +63,7 @@ int __init sysfs_init(void)
{
int err;
- sysfs_root = kernfs_create_root(NULL, NULL);
+ sysfs_root = kernfs_create_root(NULL, 0, NULL);
if (IS_ERR(sysfs_root))
return PTR_ERR(sysfs_root);
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 929312180dd0..0013142c0475 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -317,6 +317,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
(clockid != CLOCK_MONOTONIC &&
clockid != CLOCK_REALTIME &&
clockid != CLOCK_REALTIME_ALARM &&
+ clockid != CLOCK_BOOTTIME &&
clockid != CLOCK_BOOTTIME_ALARM))
return -EINVAL;