diff options
Diffstat (limited to 'fs')
48 files changed, 1148 insertions, 586 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index 7385e54be4b9..312393f32948 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -96,6 +96,7 @@ endif # BLOCK menu "Pseudo filesystems" source "fs/proc/Kconfig" +source "fs/kernfs/Kconfig" source "fs/sysfs/Kconfig" config TMPFS diff --git a/fs/Makefile b/fs/Makefile index 47ac07bb4acc..f9cb9876e466 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -52,7 +52,8 @@ obj-$(CONFIG_FHANDLE) += fhandle.o obj-y += quota/ obj-$(CONFIG_PROC_FS) += proc/ -obj-$(CONFIG_SYSFS) += sysfs/ kernfs/ +obj-$(CONFIG_KERNFS) += kernfs/ +obj-$(CONFIG_SYSFS) += sysfs/ obj-$(CONFIG_CONFIGFS_FS) += configfs/ obj-y += devpts/ diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6621f8008122..be75b500005d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -75,6 +75,7 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ + work_func_t async_workfn; struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ struct sk_buff_head rx_queue; /* received packets */ diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 8ad8c2a0703a..ef943df73b8c 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -644,7 +644,7 @@ static void afs_process_async_call(struct work_struct *work) /* we can't just delete the call because the work item may be * queued */ - PREPARE_WORK(&call->async_work, afs_delete_async_call); + call->async_workfn = afs_delete_async_call; queue_work(afs_async_calls, &call->async_work); } @@ -663,6 +663,13 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) call->reply_size += len; } +static void afs_async_workfn(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, async_work); + + call->async_workfn(work); +} + /* * accept the backlog of incoming calls */ @@ -685,7 +692,8 @@ static void afs_collect_incoming_call(struct work_struct *work) return; } - INIT_WORK(&call->async_work, afs_process_async_call); + call->async_workfn = afs_process_async_call; + INIT_WORK(&call->async_work, afs_async_workfn); call->wait_mode = &afs_async_incoming_call; call->type = &afs_RXCMxxxx; init_waitqueue_head(&call->waitq); diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 24084732b1d0..80ef38c73e5a 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -41,19 +41,8 @@ static const struct dentry_operations anon_inodefs_dentry_operations = { static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { - struct dentry *root; - root = mount_pseudo(fs_type, "anon_inode:", NULL, + return mount_pseudo(fs_type, "anon_inode:", NULL, &anon_inodefs_dentry_operations, ANON_INODE_FS_MAGIC); - if (!IS_ERR(root)) { - struct super_block *s = root->d_sb; - anon_inode_inode = alloc_anon_inode(s); - if (IS_ERR(anon_inode_inode)) { - dput(root); - deactivate_locked_super(s); - root = ERR_CAST(anon_inode_inode); - } - } - return root; } static struct file_system_type anon_inode_fs_type = { @@ -175,22 +164,15 @@ EXPORT_SYMBOL_GPL(anon_inode_getfd); static int __init anon_inode_init(void) { - int error; - - error = register_filesystem(&anon_inode_fs_type); - if (error) - goto err_exit; anon_inode_mnt = kern_mount(&anon_inode_fs_type); - if (IS_ERR(anon_inode_mnt)) { - error = PTR_ERR(anon_inode_mnt); - goto err_unregister_filesystem; - } - return 0; + if (IS_ERR(anon_inode_mnt)) + panic("anon_inode_init() kernel mount failed (%ld)\n", PTR_ERR(anon_inode_mnt)); -err_unregister_filesystem: - unregister_filesystem(&anon_inode_fs_type); -err_exit: - panic(KERN_ERR "anon_inode_init() failed (%d)\n", error); + anon_inode_inode = alloc_anon_inode(anon_inode_mnt->mnt_sb); + if (IS_ERR(anon_inode_inode)) + panic("anon_inode_init() inode allocation failed (%ld)\n", PTR_ERR(anon_inode_inode)); + + return 0; } fs_initcall(anon_inode_init); diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 4f70f383132c..29696b78d1f4 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -301,25 +301,25 @@ int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) EXPORT_SYMBOL(bio_integrity_get_tag); /** - * bio_integrity_generate - Generate integrity metadata for a bio - * @bio: bio to generate integrity metadata for - * - * Description: Generates integrity metadata for a bio by calling the - * block device's generation callback function. The bio must have a - * bip attached with enough room to accommodate the generated - * integrity metadata. + * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio + * @bio: bio to generate/verify integrity metadata for + * @operate: operate number, 1 for generate, 0 for verify */ -static void bio_integrity_generate(struct bio *bio) +static int bio_integrity_generate_verify(struct bio *bio, int operate) { struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); struct blk_integrity_exchg bix; struct bio_vec bv; struct bvec_iter iter; - sector_t sector = bio->bi_iter.bi_sector; - unsigned int sectors, total; + sector_t sector; + unsigned int sectors, ret = 0; void *prot_buf = bio->bi_integrity->bip_buf; - total = 0; + if (operate) + sector = bio->bi_iter.bi_sector; + else + sector = bio->bi_integrity->bip_iter.bi_sector; + bix.disk_name = bio->bi_bdev->bd_disk->disk_name; bix.sector_size = bi->sector_size; @@ -330,16 +330,37 @@ static void bio_integrity_generate(struct bio *bio) bix.prot_buf = prot_buf; bix.sector = sector; - bi->generate_fn(&bix); + if (operate) { + bi->generate_fn(&bix); + } else { + ret = bi->verify_fn(&bix); + if (ret) { + kunmap_atomic(kaddr); + return ret; + } + } sectors = bv.bv_len / bi->sector_size; sector += sectors; prot_buf += sectors * bi->tuple_size; - total += sectors * bi->tuple_size; - BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); kunmap_atomic(kaddr); } + return ret; +} + +/** + * bio_integrity_generate - Generate integrity metadata for a bio + * @bio: bio to generate integrity metadata for + * + * Description: Generates integrity metadata for a bio by calling the + * block device's generation callback function. The bio must have a + * bip attached with enough room to accommodate the generated + * integrity metadata. + */ +static void bio_integrity_generate(struct bio *bio) +{ + bio_integrity_generate_verify(bio, 1); } static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) @@ -454,40 +475,7 @@ EXPORT_SYMBOL(bio_integrity_prep); */ static int bio_integrity_verify(struct bio *bio) { - struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); - struct blk_integrity_exchg bix; - struct bio_vec *bv; - sector_t sector = bio->bi_integrity->bip_iter.bi_sector; - unsigned int sectors, ret = 0; - void *prot_buf = bio->bi_integrity->bip_buf; - int i; - - bix.disk_name = bio->bi_bdev->bd_disk->disk_name; - bix.sector_size = bi->sector_size; - - bio_for_each_segment_all(bv, bio, i) { - void *kaddr = kmap_atomic(bv->bv_page); - - bix.data_buf = kaddr + bv->bv_offset; - bix.data_size = bv->bv_len; - bix.prot_buf = prot_buf; - bix.sector = sector; - - ret = bi->verify_fn(&bix); - - if (ret) { - kunmap_atomic(kaddr); - return ret; - } - - sectors = bv->bv_len / bi->sector_size; - sector += sectors; - prot_buf += sectors * bi->tuple_size; - - kunmap_atomic(kaddr); - } - - return ret; + return bio_integrity_generate_verify(bio, 0); } /** @@ -116,7 +116,6 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) if (!slab) goto out_unlock; - printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry); bslab->slab = slab; bslab->slab_ref = 1; bslab->slab_size = sz; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index cf32f0393369..c0f3718b77a8 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -513,7 +513,7 @@ struct cifs_mnt_data { static inline unsigned int get_rfc1002_length(void *buf) { - return be32_to_cpu(*((__be32 *)buf)); + return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } static inline void diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 53c15074bb36..834fce759d80 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2579,31 +2579,19 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; ssize_t rc = -EACCES; + loff_t lock_pos = pos; - BUG_ON(iocb->ki_pos != pos); - + if (file->f_flags & O_APPEND) + lock_pos = i_size_read(inode); /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents writing. */ down_read(&cinode->lock_sem); - if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), server->vals->exclusive_lock_type, NULL, - CIFS_WRITE_OP)) { - mutex_lock(&inode->i_mutex); - rc = __generic_file_aio_write(iocb, iov, nr_segs, - &iocb->ki_pos); - mutex_unlock(&inode->i_mutex); - } - - if (rc > 0) { - ssize_t err; - - err = generic_write_sync(file, iocb->ki_pos - rc, rc); - if (err < 0) - rc = err; - } - + CIFS_WRITE_OP)) + rc = generic_file_aio_write(iocb, iov, nr_segs, pos); up_read(&cinode->lock_sem); return rc; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index b37570952846..18cd5650a5fc 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -270,6 +270,26 @@ cifs_rqst_page_to_kvec(struct smb_rqst *rqst, unsigned int idx, iov->iov_len = rqst->rq_pagesz; } +static unsigned long +rqst_len(struct smb_rqst *rqst) +{ + unsigned int i; + struct kvec *iov = rqst->rq_iov; + unsigned long buflen = 0; + + /* total up iov array first */ + for (i = 0; i < rqst->rq_nvec; i++) + buflen += iov[i].iov_len; + + /* add in the page array if there is one */ + if (rqst->rq_npages) { + buflen += rqst->rq_pagesz * (rqst->rq_npages - 1); + buflen += rqst->rq_tailsz; + } + + return buflen; +} + static int smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) { @@ -277,6 +297,7 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) struct kvec *iov = rqst->rq_iov; int n_vec = rqst->rq_nvec; unsigned int smb_buf_length = get_rfc1002_length(iov[0].iov_base); + unsigned long send_length; unsigned int i; size_t total_len = 0, sent; struct socket *ssocket = server->ssocket; @@ -285,6 +306,14 @@ smb_send_rqst(struct TCP_Server_Info *server, struct smb_rqst *rqst) if (ssocket == NULL) return -ENOTSOCK; + /* sanity check send length */ + send_length = rqst_len(rqst); + if (send_length != smb_buf_length + 4) { + WARN(1, "Send length mismatch(send_length=%lu smb_buf_length=%u)\n", + send_length, smb_buf_length); + return -EIO; + } + cifs_dbg(FYI, "Sending smb: smb_len=%u\n", smb_buf_length); dump_smb(iov[0].iov_base, iov[0].iov_len); diff --git a/fs/compat.c b/fs/compat.c index 6af20de2c1a3..f86df85dff61 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -72,8 +72,8 @@ int compat_printk(const char *fmt, ...) * Not all architectures have sys_utime, so implement this in terms * of sys_utimes. */ -asmlinkage long compat_sys_utime(const char __user *filename, - struct compat_utimbuf __user *t) +COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, + struct compat_utimbuf __user *, t) { struct timespec tv[2]; @@ -87,13 +87,13 @@ asmlinkage long compat_sys_utime(const char __user *filename, return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); } -asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags) +COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) { struct timespec tv[2]; if (t) { - if (get_compat_timespec(&tv[0], &t[0]) || - get_compat_timespec(&tv[1], &t[1])) + if (compat_get_timespec(&tv[0], &t[0]) || + compat_get_timespec(&tv[1], &t[1])) return -EFAULT; if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) @@ -102,7 +102,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filena return do_utimes(dfd, filename, t ? tv : NULL, flags); } -asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t) +COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) { struct timespec tv[2]; @@ -121,7 +121,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filena return do_utimes(dfd, filename, t ? tv : NULL, 0); } -asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t) +COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) { return compat_sys_futimesat(AT_FDCWD, filename, t); } @@ -159,8 +159,8 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; } -asmlinkage long compat_sys_newstat(const char __user * filename, - struct compat_stat __user *statbuf) +COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, + struct compat_stat __user *, statbuf) { struct kstat stat; int error; @@ -171,8 +171,8 @@ asmlinkage long compat_sys_newstat(const char __user * filename, return cp_compat_stat(&stat, statbuf); } -asmlinkage long compat_sys_newlstat(const char __user * filename, - struct compat_stat __user *statbuf) +COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, + struct compat_stat __user *, statbuf) { struct kstat stat; int error; @@ -184,9 +184,9 @@ asmlinkage long compat_sys_newlstat(const char __user * filename, } #ifndef __ARCH_WANT_STAT64 -asmlinkage long compat_sys_newfstatat(unsigned int dfd, - const char __user *filename, - struct compat_stat __user *statbuf, int flag) +COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, + const char __user *, filename, + struct compat_stat __user *, statbuf, int, flag) { struct kstat stat; int error; @@ -198,8 +198,8 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, } #endif -asmlinkage long compat_sys_newfstat(unsigned int fd, - struct compat_stat __user * statbuf) +COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, + struct compat_stat __user *, statbuf) { struct kstat stat; int error = vfs_fstat(fd, &stat); @@ -247,7 +247,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * * The following statfs calls are copies of code from fs/statfs.c and * should be checked against those from time to time */ -asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) +COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) { struct kstatfs tmp; int error = user_statfs(pathname, &tmp); @@ -256,7 +256,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta return error; } -asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) +COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) { struct kstatfs tmp; int error = fd_statfs(fd, &tmp); @@ -298,7 +298,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat return 0; } -asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) +COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) { struct kstatfs tmp; int error; @@ -312,7 +312,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s return error; } -asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) +COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) { struct kstatfs tmp; int error; @@ -331,7 +331,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c * Given how simple this syscall is that apporach is more maintainable * than the various conversion hacks. */ -asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) +COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) { struct compat_ustat tmp; struct kstatfs sbuf; @@ -399,8 +399,8 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u } #endif -asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) { mm_segment_t old_fs; struct flock f; @@ -468,16 +468,15 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, return ret; } -asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg) { if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64)) return -EINVAL; return compat_sys_fcntl64(fd, cmd, arg); } -asmlinkage long -compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) +COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) { long ret; aio_context_t ctx64; @@ -496,32 +495,24 @@ compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) return ret; } -asmlinkage long -compat_sys_io_getevents(aio_context_t ctx_id, - unsigned long min_nr, - unsigned long nr, - struct io_event __user *events, - struct compat_timespec __user *timeout) +COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, + compat_long_t, min_nr, + compat_long_t, nr, + struct io_event __user *, events, + struct compat_timespec __user *, timeout) { - long ret; struct timespec t; struct timespec __user *ut = NULL; - ret = -EFAULT; - if (unlikely(!access_ok(VERIFY_WRITE, events, - nr * sizeof(struct io_event)))) - goto out; if (timeout) { - if (get_compat_timespec(&t, timeout)) - goto out; + if (compat_get_timespec(&t, timeout)) + return -EFAULT; ut = compat_alloc_user_space(sizeof(*ut)); if (copy_to_user(ut, &t, sizeof(t)) ) - goto out; + return -EFAULT; } - ret = sys_io_getevents(ctx_id, min_nr, nr, events, ut); -out: - return ret; + return sys_io_getevents(ctx_id, min_nr, nr, events, ut); } /* A write operation does a read from user space and vice versa */ @@ -617,8 +608,8 @@ copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) -asmlinkage long -compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) +COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, + int, nr, u32 __user *, iocb) { struct iocb __user * __user *iocb64; long ret; @@ -770,10 +761,10 @@ static int do_nfs4_super_data_conv(void *raw_data) #define NCPFS_NAME "ncpfs" #define NFS4_NAME "nfs4" -asmlinkage long compat_sys_mount(const char __user * dev_name, - const char __user * dir_name, - const char __user * type, unsigned long flags, - const void __user * data) +COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, + const char __user *, dir_name, + const char __user *, type, compat_ulong_t, flags, + const void __user *, data) { char *kernel_type; unsigned long data_page; @@ -869,8 +860,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_old_readdir(unsigned int fd, - struct compat_old_linux_dirent __user *dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, + struct compat_old_linux_dirent __user *, dirent, unsigned int, count) { int error; struct fd f = fdget(fd); @@ -948,8 +939,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_getdents(unsigned int fd, - struct compat_linux_dirent __user *dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, + struct compat_linux_dirent __user *, dirent, unsigned int, count) { struct fd f; struct compat_linux_dirent __user * lastdirent; @@ -981,7 +972,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, return error; } -#ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 +#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 struct compat_getdents_callback64 { struct dir_context ctx; @@ -1033,8 +1024,8 @@ efault: return -EFAULT; } -asmlinkage long compat_sys_getdents64(unsigned int fd, - struct linux_dirent64 __user * dirent, unsigned int count) +COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, + struct linux_dirent64 __user *, dirent, unsigned int, count) { struct fd f; struct linux_dirent64 __user * lastdirent; @@ -1066,7 +1057,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, fdput(f); return error; } -#endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ +#endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ /* * Exactly like fs/open.c:sys_open(), except that it doesn't set the @@ -1287,9 +1278,9 @@ out_nofds: return ret; } -asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timeval __user *tvp) +COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timeval __user *, tvp) { struct timespec end_time, *to = NULL; struct compat_timeval tv; @@ -1320,7 +1311,7 @@ struct compat_sel_arg_struct { compat_uptr_t tvp; }; -asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg) +COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) { struct compat_sel_arg_struct a; @@ -1381,9 +1372,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, return ret; } -asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, - compat_ulong_t __user *outp, compat_ulong_t __user *exp, - struct compat_timespec __user *tsp, void __user *sig) +COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, + compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, + struct compat_timespec __user *, tsp, void __user *, sig) { compat_size_t sigsetsize = 0; compat_uptr_t up = 0; @@ -1400,9 +1391,9 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, sigsetsize); } -asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, - unsigned int nfds, struct compat_timespec __user *tsp, - const compat_sigset_t __user *sigmask, compat_size_t sigsetsize) +COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, + unsigned int, nfds, struct compat_timespec __user *, tsp, + const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) { compat_sigset_t ss32; sigset_t ksigmask, sigsaved; diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index a81147e2e4ef..4d24d17bcfc1 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c @@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime, #define ELF_HWCAP COMPAT_ELF_HWCAP #endif +#ifdef COMPAT_ELF_HWCAP2 +#undef ELF_HWCAP2 +#define ELF_HWCAP2 COMPAT_ELF_HWCAP2 +#endif + #ifdef COMPAT_ARCH_DLINFO #undef ARCH_DLINFO #define ARCH_DLINFO COMPAT_ARCH_DLINFO diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3881610b6438..e82289047272 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -1538,9 +1538,10 @@ static int compat_ioctl_check_table(unsigned int xcmd) return ioctl_pointer[i] == xcmd; } -asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, - unsigned long arg) +COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, + compat_ulong_t, arg32) { + unsigned long arg = arg32; struct fd f = fdget(fd); int error = -EBADF; if (!f.file) diff --git a/fs/dcache.c b/fs/dcache.c index 265e0ce9769c..ca02c13a84aa 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2833,9 +2833,9 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name) u32 dlen = ACCESS_ONCE(name->len); char *p; - if (*buflen < dlen + 1) - return -ENAMETOOLONG; *buflen -= dlen + 1; + if (*buflen < 0) + return -ENAMETOOLONG; p = *buffer -= dlen + 1; *p++ = '/'; while (dlen--) { diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 8dd524f32284..cdb2971192a5 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c @@ -21,7 +21,7 @@ static ssize_t efivarfs_file_write(struct file *file, u32 attributes; struct inode *inode = file->f_mapping->host; unsigned long datasize = count - sizeof(attributes); - ssize_t bytes = 0; + ssize_t bytes; bool set = false; if (count < sizeof(attributes)) @@ -33,14 +33,9 @@ static ssize_t efivarfs_file_write(struct file *file, if (attributes & ~(EFI_VARIABLE_MASK)) return -EINVAL; - data = kmalloc(datasize, GFP_KERNEL); - if (!data) - return -ENOMEM; - - if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) { - bytes = -EFAULT; - goto out; - } + data = memdup_user(userbuf + sizeof(attributes), datasize); + if (IS_ERR(data)) + return PTR_ERR(data); bytes = efivar_entry_set_get_size(var, attributes, &datasize, data, &set); diff --git a/fs/exec.c b/fs/exec.c index 3d78fccdd723..4f59402fdda5 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1619,9 +1619,9 @@ SYSCALL_DEFINE3(execve, return do_execve(getname(filename), argv, envp); } #ifdef CONFIG_COMPAT -asmlinkage long compat_sys_execve(const char __user * filename, - const compat_uptr_t __user * argv, - const compat_uptr_t __user * envp) +COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, + const compat_uptr_t __user *, argv, + const compat_uptr_t __user *, envp) { return compat_do_execve(getname(filename), argv, envp); } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 6e39895a91b8..24bfd7ff3049 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -38,6 +38,7 @@ #include <linux/slab.h> #include <linux/ratelimit.h> #include <linux/aio.h> +#include <linux/bitops.h> #include "ext4_jbd2.h" #include "xattr.h" @@ -3921,18 +3922,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) void ext4_set_inode_flags(struct inode *inode) { unsigned int flags = EXT4_I(inode)->i_flags; + unsigned int new_fl = 0; - inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); if (flags & EXT4_SYNC_FL) - inode->i_flags |= S_SYNC; + new_fl |= S_SYNC; if (flags & EXT4_APPEND_FL) - inode->i_flags |= S_APPEND; + new_fl |= S_APPEND; if (flags & EXT4_IMMUTABLE_FL) - inode->i_flags |= S_IMMUTABLE; + new_fl |= S_IMMUTABLE; if (flags & EXT4_NOATIME_FL) - inode->i_flags |= S_NOATIME; + new_fl |= S_NOATIME; if (flags & EXT4_DIRSYNC_FL) - inode->i_flags |= S_DIRSYNC; + new_fl |= S_DIRSYNC; + set_mask_bits(&inode->i_flags, + S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); } /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ diff --git a/fs/file.c b/fs/file.c index db25c2bdfe46..b61293badfb1 100644 --- a/fs/file.c +++ b/fs/file.c @@ -497,7 +497,7 @@ repeat: error = fd; #if 1 /* Sanity check */ - if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { + if (rcu_access_pointer(fdt->fd[fd]) != NULL) { printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); rcu_assign_pointer(fdt->fd[fd], NULL); } @@ -683,35 +683,54 @@ EXPORT_SYMBOL(fget_raw); * The fput_needed flag returned by fget_light should be passed to the * corresponding fput_light. */ -struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed) +static unsigned long __fget_light(unsigned int fd, fmode_t mask) { struct files_struct *files = current->files; struct file *file; - *fput_needed = 0; if (atomic_read(&files->count) == 1) { file = __fcheck_files(files, fd); - if (file && (file->f_mode & mask)) - file = NULL; + if (!file || unlikely(file->f_mode & mask)) + return 0; + return (unsigned long)file; } else { file = __fget(fd, mask); - if (file) - *fput_needed = 1; + if (!file) + return 0; + return FDPUT_FPUT | (unsigned long)file; } - - return file; } -struct file *fget_light(unsigned int fd, int *fput_needed) +unsigned long __fdget(unsigned int fd) { - return __fget_light(fd, FMODE_PATH, fput_needed); + return __fget_light(fd, FMODE_PATH); } -EXPORT_SYMBOL(fget_light); +EXPORT_SYMBOL(__fdget); -struct file *fget_raw_light(unsigned int fd, int *fput_needed) +unsigned long __fdget_raw(unsigned int fd) { - return __fget_light(fd, 0, fput_needed); + return __fget_light(fd, 0); } +unsigned long __fdget_pos(unsigned int fd) +{ + unsigned long v = __fdget(fd); + struct file *file = (struct file *)(v & ~3); + + if (file && (file->f_mode & FMODE_ATOMIC_POS)) { + if (file_count(file) > 1) { + v |= FDPUT_POS_UNLOCK; + mutex_lock(&file->f_pos_lock); + } + } + return v; +} + +/* + * We only lock f_pos if we have threads or if the file might be + * shared with another process. In both cases we'll have an elevated + * file count (done either by fdget() or by fork()). + */ + void set_close_on_exec(unsigned int fd, int flag) { struct files_struct *files = current->files; diff --git a/fs/file_table.c b/fs/file_table.c index 5fff9030be34..5b24008ea4f6 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -135,6 +135,7 @@ struct file *get_empty_filp(void) atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); spin_lock_init(&f->f_lock); + mutex_init(&f->f_pos_lock); eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 968ce411db53..32602c667b4a 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -103,6 +103,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, folder = &entry->folder; memset(folder, 0, sizeof(*folder)); folder->type = cpu_to_be16(HFSPLUS_FOLDER); + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) + folder->flags |= cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT); folder->id = cpu_to_be32(inode->i_ino); HFSPLUS_I(inode)->create_date = folder->create_date = @@ -203,6 +205,36 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, return hfs_brec_find(fd, hfs_find_rec_by_key); } +static void hfsplus_subfolders_inc(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Increment subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + */ + HFSPLUS_I(dir)->subfolders++; + } +} + +static void hfsplus_subfolders_dec(struct inode *dir) +{ + struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); + + if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { + /* + * Decrement subfolder count. Note, the value is only meaningful + * for folders with HFSPLUS_HAS_FOLDER_COUNT flag set. + * + * Check for zero. Some subfolders may have been created + * by an implementation ignorant of this counter. + */ + if (HFSPLUS_I(dir)->subfolders) + HFSPLUS_I(dir)->subfolders--; + } +} + int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) { @@ -247,6 +279,8 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, goto err1; dir->i_size++; + if (S_ISDIR(inode->i_mode)) + hfsplus_subfolders_inc(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -336,6 +370,8 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) goto out; dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(dir); dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC; hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY); @@ -380,6 +416,7 @@ int hfsplus_rename_cat(u32 cnid, hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); + type = be16_to_cpu(entry.type); /* create new dir entry with the data from the old entry */ hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); @@ -394,6 +431,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; dst_dir->i_size++; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_inc(dst_dir); dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; /* finally remove the old entry */ @@ -405,6 +444,8 @@ int hfsplus_rename_cat(u32 cnid, if (err) goto out; src_dir->i_size--; + if (type == HFSPLUS_FOLDER) + hfsplus_subfolders_dec(src_dir); src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; /* remove old thread entry */ diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 08846425b67f..62d571eb69ba 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -242,6 +242,7 @@ struct hfsplus_inode_info { */ sector_t fs_blocks; u8 userflags; /* BSD user file flags */ + u32 subfolders; /* Subfolder count (HFSX only) */ struct list_head open_dir_list; loff_t phys_size; diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index 8ffb3a8ffe75..5a126828d85e 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h @@ -261,7 +261,7 @@ struct hfsplus_cat_folder { struct DInfo user_info; struct DXInfo finder_info; __be32 text_encoding; - u32 reserved; + __be32 subfolders; /* Subfolder count in HFSX. Reserved in HFS+. */ } __packed; /* HFS file info (stolen from hfs.h) */ @@ -301,11 +301,13 @@ struct hfsplus_cat_file { struct hfsplus_fork_raw rsrc_fork; } __packed; -/* File attribute bits */ +/* File and folder flag bits */ #define HFSPLUS_FILE_LOCKED 0x0001 #define HFSPLUS_FILE_THREAD_EXISTS 0x0002 #define HFSPLUS_XATTR_EXISTS 0x0004 #define HFSPLUS_ACL_EXISTS 0x0008 +#define HFSPLUS_HAS_FOLDER_COUNT 0x0010 /* Folder has subfolder count + * (HFSX only) */ /* HFS+ catalog thread (part of a cat_entry) */ struct hfsplus_cat_thread { diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index fa929f325f87..a4f45bd88a63 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -375,6 +375,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, umode_t mode) hip->extent_state = 0; hip->flags = 0; hip->userflags = 0; + hip->subfolders = 0; memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); hip->alloc_blocks = 0; @@ -494,6 +495,10 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); HFSPLUS_I(inode)->create_date = folder->create_date; HFSPLUS_I(inode)->fs_blocks = 0; + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + HFSPLUS_I(inode)->subfolders = + be32_to_cpu(folder->subfolders); + } inode->i_op = &hfsplus_dir_inode_operations; inode->i_fop = &hfsplus_dir_operations; } else if (type == HFSPLUS_FILE) { @@ -566,6 +571,10 @@ int hfsplus_cat_write_inode(struct inode *inode) folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); folder->valence = cpu_to_be32(inode->i_size - 2); + if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) { + folder->subfolders = + cpu_to_be32(HFSPLUS_I(inode)->subfolders); + } hfs_bnode_write(fd.bnode, &entry, fd.entryoffset, sizeof(struct hfsplus_cat_folder)); } else if (HFSPLUS_IS_RSRC(inode)) { diff --git a/fs/kernfs/Kconfig b/fs/kernfs/Kconfig new file mode 100644 index 000000000000..397b5f7a7a16 --- /dev/null +++ b/fs/kernfs/Kconfig @@ -0,0 +1,7 @@ +# +# KERNFS should be selected by its users +# + +config KERNFS + bool + default n diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index bd6e18be6e1a..0bd05ab26003 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -8,6 +8,7 @@ * This file is released under the GPLv2. */ +#include <linux/sched.h> #include <linux/fs.h> #include <linux/namei.h> #include <linux/idr.h> @@ -18,9 +19,161 @@ #include "kernfs-internal.h" DEFINE_MUTEX(kernfs_mutex); +static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ +static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) +static bool kernfs_active(struct kernfs_node *kn) +{ + lockdep_assert_held(&kernfs_mutex); + return atomic_read(&kn->active) >= 0; +} + +static bool kernfs_lockdep(struct kernfs_node *kn) +{ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + return kn->flags & KERNFS_LOCKDEP; +#else + return false; +#endif +} + +static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) +{ + return strlcpy(buf, kn->parent ? kn->name : "/", buflen); +} + +static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf, + size_t buflen) +{ + char *p = buf + buflen; + int len; + + *--p = '\0'; + + do { + len = strlen(kn->name); + if (p - buf < len + 1) { + buf[0] = '\0'; + p = NULL; + break; + } + p -= len; + memcpy(p, kn->name, len); + *--p = '/'; + kn = kn->parent; + } while (kn && kn->parent); + + return p; +} + +/** + * kernfs_name - obtain the name of a given node + * @kn: kernfs_node of interest + * @buf: buffer to copy @kn's name into + * @buflen: size of @buf + * + * Copies the name of @kn into @buf of @buflen bytes. The behavior is + * similar to strlcpy(). It returns the length of @kn's name and if @buf + * isn't long enough, it's filled upto @buflen-1 and nul terminated. + * + * This function can be called from any context. + */ +int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + ret = kernfs_name_locked(kn, buf, buflen); + spin_unlock_irqrestore(&kernfs_rename_lock, flags); + return ret; +} + +/** + * kernfs_path - build full path of a given node + * @kn: kernfs_node of interest + * @buf: buffer to copy @kn's name into + * @buflen: size of @buf + * + * Builds and returns the full path of @kn in @buf of @buflen bytes. The + * path is built from the end of @buf so the returned pointer usually + * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated + * and %NULL is returned. + */ +char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) +{ + unsigned long flags; + char *p; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + p = kernfs_path_locked(kn, buf, buflen); + spin_unlock_irqrestore(&kernfs_rename_lock, flags); + return p; +} + +/** + * pr_cont_kernfs_name - pr_cont name of a kernfs_node + * @kn: kernfs_node of interest + * + * This function can be called from any context. + */ +void pr_cont_kernfs_name(struct kernfs_node *kn) +{ + unsigned long flags; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + + kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); + pr_cont("%s", kernfs_pr_cont_buf); + + spin_unlock_irqrestore(&kernfs_rename_lock, flags); +} + +/** + * pr_cont_kernfs_path - pr_cont path of a kernfs_node + * @kn: kernfs_node of interest + * + * This function can be called from any context. + */ +void pr_cont_kernfs_path(struct kernfs_node *kn) +{ + unsigned long flags; + char *p; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + + p = kernfs_path_locked(kn, kernfs_pr_cont_buf, + sizeof(kernfs_pr_cont_buf)); + if (p) + pr_cont("%s", p); + else + pr_cont("<name too long>"); + + spin_unlock_irqrestore(&kernfs_rename_lock, flags); +} + +/** + * kernfs_get_parent - determine the parent node and pin it + * @kn: kernfs_node of interest + * + * Determines @kn's parent, pins and returns it. This function can be + * called from any context. + */ +struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) +{ + struct kernfs_node *parent; + unsigned long flags; + + spin_lock_irqsave(&kernfs_rename_lock, flags); + parent = kn->parent; + kernfs_get(parent); + spin_unlock_irqrestore(&kernfs_rename_lock, flags); + + return parent; +} + /** * kernfs_name_hash * @name: Null terminated string to hash @@ -37,7 +190,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); hash &= 0x7fffffffU; /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ - if (hash < 1) + if (hash < 2) hash += 2; if (hash >= INT_MAX) hash = INT_MAX - 1; @@ -105,18 +258,24 @@ static int kernfs_link_sibling(struct kernfs_node *kn) * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree * @kn: kernfs_node of interest * - * Unlink @kn from its sibling rbtree which starts from - * kn->parent->dir.children. + * Try to unlink @kn from its sibling rbtree which starts from + * kn->parent->dir.children. Returns %true if @kn was actually + * removed, %false if @kn wasn't on the rbtree. * * Locking: * mutex_lock(kernfs_mutex) */ -static void kernfs_unlink_sibling(struct kernfs_node *kn) +static bool kernfs_unlink_sibling(struct kernfs_node *kn) { + if (RB_EMPTY_NODE(&kn->rb)) + return false; + if (kernfs_type(kn) == KERNFS_DIR) kn->parent->dir.subdirs--; rb_erase(&kn->rb, &kn->parent->dir.children); + RB_CLEAR_NODE(&kn->rb); + return true; } /** @@ -137,7 +296,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) if (!atomic_inc_unless_negative(&kn->active)) return NULL; - if (kn->flags & KERNFS_LOCKDEP) + if (kernfs_lockdep(kn)) rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); return kn; } @@ -151,59 +310,57 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) */ void kernfs_put_active(struct kernfs_node *kn) { + struct kernfs_root *root = kernfs_root(kn); int v; if (unlikely(!kn)) return; - if (kn->flags & KERNFS_LOCKDEP) + if (kernfs_lockdep(kn)) rwsem_release(&kn->dep_map, 1, _RET_IP_); v = atomic_dec_return(&kn->active); if (likely(v != KN_DEACTIVATED_BIAS)) return; - /* - * atomic_dec_return() is a mb(), we'll always see the updated - * kn->u.completion. - */ - complete(kn->u.completion); + wake_up_all(&root->deactivate_waitq); } /** - * kernfs_deactivate - deactivate kernfs_node - * @kn: kernfs_node to deactivate + * kernfs_drain - drain kernfs_node + * @kn: kernfs_node to drain * - * Deny new active references and drain existing ones. + * Drain existing usages and nuke all existing mmaps of @kn. Mutiple + * removers may invoke this function concurrently on @kn and all will + * return after draining is complete. */ -static void kernfs_deactivate(struct kernfs_node *kn) +static void kernfs_drain(struct kernfs_node *kn) + __releases(&kernfs_mutex) __acquires(&kernfs_mutex) { - DECLARE_COMPLETION_ONSTACK(wait); - int v; + struct kernfs_root *root = kernfs_root(kn); - BUG_ON(!(kn->flags & KERNFS_REMOVED)); - - if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF)) - return; + lockdep_assert_held(&kernfs_mutex); + WARN_ON_ONCE(kernfs_active(kn)); - kn->u.completion = (void *)&wait; + mutex_unlock(&kernfs_mutex); - if (kn->flags & KERNFS_LOCKDEP) + if (kernfs_lockdep(kn)) { rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); - /* atomic_add_return() is a mb(), put_active() will always see - * the updated kn->u.completion. - */ - v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active); - - if (v != KN_DEACTIVATED_BIAS) { - if (kn->flags & KERNFS_LOCKDEP) + if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) lock_contended(&kn->dep_map, _RET_IP_); - wait_for_completion(&wait); } - if (kn->flags & KERNFS_LOCKDEP) { + /* but everyone should wait for draining */ + wait_event(root->deactivate_waitq, + atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); + + if (kernfs_lockdep(kn)) { lock_acquired(&kn->dep_map, _RET_IP_); rwsem_release(&kn->dep_map, 1, _RET_IP_); } + + kernfs_unmap_bin_file(kn); + + mutex_lock(&kernfs_mutex); } /** @@ -234,13 +391,15 @@ void kernfs_put(struct kernfs_node *kn) return; root = kernfs_root(kn); repeat: - /* Moving/renaming is always done while holding reference. + /* + * Moving/renaming is always done while holding reference. * kn->parent won't change beneath us. */ parent = kn->parent; - WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n", - parent ? parent->name : "", kn->name); + WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, + "kernfs_put: %s/%s: released with incorrect active_ref %d\n", + parent ? parent->name : "", kn->name, atomic_read(&kn->active)); if (kernfs_type(kn) == KERNFS_LINK) kernfs_put(kn->symlink.target_kn); @@ -282,8 +441,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) kn = dentry->d_fsdata; mutex_lock(&kernfs_mutex); - /* The kernfs node has been deleted */ - if (kn->flags & KERNFS_REMOVED) + /* The kernfs node has been deactivated */ + if (!kernfs_active(kn)) goto out_bad; /* The kernfs node has been moved? */ @@ -328,6 +487,24 @@ const struct dentry_operations kernfs_dops = { .d_release = kernfs_dop_release, }; +/** + * kernfs_node_from_dentry - determine kernfs_node associated with a dentry + * @dentry: the dentry in question + * + * Return the kernfs_node associated with @dentry. If @dentry is not a + * kernfs one, %NULL is returned. + * + * While the returned kernfs_node will stay accessible as long as @dentry + * is accessible, the returned node can be in any state and the caller is + * fully responsible for determining what's accessible. + */ +struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) +{ + if (dentry->d_sb->s_op == &kernfs_sops) + return dentry->d_fsdata; + return NULL; +} + static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, const char *name, umode_t mode, unsigned flags) @@ -352,11 +529,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, kn->ino = ret; atomic_set(&kn->count, 1); - atomic_set(&kn->active, 0); + atomic_set(&kn->active, KN_DEACTIVATED_BIAS); + RB_CLEAR_NODE(&kn->rb); kn->name = name; kn->mode = mode; - kn->flags = flags | KERNFS_REMOVED; + kn->flags = flags; return kn; @@ -382,69 +560,44 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, } /** - * kernfs_addrm_start - prepare for kernfs_node add/remove - * @acxt: pointer to kernfs_addrm_cxt to be used - * - * This function is called when the caller is about to add or remove - * kernfs_node. This function acquires kernfs_mutex. @acxt is used - * to keep and pass context to other addrm functions. - * - * LOCKING: - * Kernel thread context (may sleep). kernfs_mutex is locked on - * return. - */ -void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt) - __acquires(kernfs_mutex) -{ - memset(acxt, 0, sizeof(*acxt)); - - mutex_lock(&kernfs_mutex); -} - -/** * kernfs_add_one - add kernfs_node to parent without warning - * @acxt: addrm context to use * @kn: kernfs_node to be added * * The caller must already have initialized @kn->parent. This * function increments nlink of the parent's inode if @kn is a * directory and link into the children list of the parent. * - * This function should be called between calls to - * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed - * the same @acxt as passed to kernfs_addrm_start(). - * - * LOCKING: - * Determined by kernfs_addrm_start(). - * * RETURNS: * 0 on success, -EEXIST if entry with the given name already * exists. */ -int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) +int kernfs_add_one(struct kernfs_node *kn) { struct kernfs_node *parent = kn->parent; - bool has_ns = kernfs_ns_enabled(parent); struct kernfs_iattrs *ps_iattr; + bool has_ns; int ret; - if (has_ns != (bool)kn->ns) { - WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", - has_ns ? "required" : "invalid", parent->name, kn->name); - return -EINVAL; - } + mutex_lock(&kernfs_mutex); + + ret = -EINVAL; + has_ns = kernfs_ns_enabled(parent); + if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", + has_ns ? "required" : "invalid", parent->name, kn->name)) + goto out_unlock; if (kernfs_type(parent) != KERNFS_DIR) - return -EINVAL; + goto out_unlock; - if (parent->flags & KERNFS_REMOVED) - return -ENOENT; + ret = -ENOENT; + if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) + goto out_unlock; kn->hash = kernfs_name_hash(kn->name, kn->ns); ret = kernfs_link_sibling(kn); if (ret) - return ret; + goto out_unlock; /* Update timestamps on the parent */ ps_iattr = parent->iattr; @@ -453,82 +606,22 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; } - /* Mark the entry added into directory tree */ - kn->flags &= ~KERNFS_REMOVED; - - return 0; -} - -/** - * kernfs_remove_one - remove kernfs_node from parent - * @acxt: addrm context to use - * @kn: kernfs_node to be removed - * - * Mark @kn removed and drop nlink of parent inode if @kn is a - * directory. @kn is unlinked from the children list. - * - * This function should be called between calls to - * kernfs_addrm_start() and kernfs_addrm_finish() and should be - * passed the same @acxt as passed to kernfs_addrm_start(). - * - * LOCKING: - * Determined by kernfs_addrm_start(). - */ -static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt, - struct kernfs_node *kn) -{ - struct kernfs_iattrs *ps_iattr; + mutex_unlock(&kernfs_mutex); /* - * Removal can be called multiple times on the same node. Only the - * first invocation is effective and puts the base ref. + * Activate the new node unless CREATE_DEACTIVATED is requested. + * If not activated here, the kernfs user is responsible for + * activating the node with kernfs_activate(). A node which hasn't + * been activated is not visible to userland and its removal won't + * trigger deactivation. */ - if (kn->flags & KERNFS_REMOVED) - return; - - if (kn->parent) { - kernfs_unlink_sibling(kn); - - /* Update timestamps on the parent */ - ps_iattr = kn->parent->iattr; - if (ps_iattr) { - ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; - ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; - } - } - - kn->flags |= KERNFS_REMOVED; - kn->u.removed_list = acxt->removed; - acxt->removed = kn; -} + if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) + kernfs_activate(kn); + return 0; -/** - * kernfs_addrm_finish - finish up kernfs_node add/remove - * @acxt: addrm context to finish up - * - * Finish up kernfs_node add/remove. Resources acquired by - * kernfs_addrm_start() are released and removed kernfs_nodes are - * cleaned up. - * - * LOCKING: - * kernfs_mutex is released. - */ -void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt) - __releases(kernfs_mutex) -{ - /* release resources acquired by kernfs_addrm_start() */ +out_unlock: mutex_unlock(&kernfs_mutex); - - /* kill removed kernfs_nodes */ - while (acxt->removed) { - struct kernfs_node *kn = acxt->removed; - - acxt->removed = kn->u.removed_list; - - kernfs_deactivate(kn); - kernfs_unmap_bin_file(kn); - kernfs_put(kn); - } + return ret; } /** @@ -599,13 +692,15 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); /** * kernfs_create_root - create a new kernfs hierarchy - * @kdops: optional directory syscall operations for the hierarchy + * @scops: optional syscall operations for the hierarchy + * @flags: KERNFS_ROOT_* flags * @priv: opaque data associated with the new directory * * Returns the root of the new hierarchy on success, ERR_PTR() value on * failure. */ -struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) +struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, + unsigned int flags, void *priv) { struct kernfs_root *root; struct kernfs_node *kn; @@ -624,12 +719,16 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) return ERR_PTR(-ENOMEM); } - kn->flags &= ~KERNFS_REMOVED; kn->priv = priv; kn->dir.root = root; - root->dir_ops = kdops; + root->syscall_ops = scops; + root->flags = flags; root->kn = kn; + init_waitqueue_head(&root->deactivate_waitq); + + if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) + kernfs_activate(kn); return root; } @@ -660,7 +759,6 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, const char *name, umode_t mode, void *priv, const void *ns) { - struct kernfs_addrm_cxt acxt; struct kernfs_node *kn; int rc; @@ -674,10 +772,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, kn->priv = priv; /* link in */ - kernfs_addrm_start(&acxt); - rc = kernfs_add_one(&acxt, kn); - kernfs_addrm_finish(&acxt); - + rc = kernfs_add_one(kn); if (!rc) return kn; @@ -703,7 +798,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, kn = kernfs_find_ns(parent, dentry->d_name.name, ns); /* no such entry */ - if (!kn) { + if (!kn || !kernfs_active(kn)) { ret = NULL; goto out_unlock; } @@ -728,23 +823,37 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) { struct kernfs_node *parent = dir->i_private; - struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; + struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; + int ret; - if (!kdops || !kdops->mkdir) + if (!scops || !scops->mkdir) return -EPERM; - return kdops->mkdir(parent, dentry->d_name.name, mode); + if (!kernfs_get_active(parent)) + return -ENODEV; + + ret = scops->mkdir(parent, dentry->d_name.name, mode); + + kernfs_put_active(parent); + return ret; } static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) { struct kernfs_node *kn = dentry->d_fsdata; - struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; + struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; + int ret; - if (!kdops || !kdops->rmdir) + if (!scops || !scops->rmdir) return -EPERM; - return kdops->rmdir(kn); + if (!kernfs_get_active(kn)) + return -ENODEV; + + ret = scops->rmdir(kn); + + kernfs_put_active(kn); + return ret; } static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, @@ -752,12 +861,25 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, { struct kernfs_node *kn = old_dentry->d_fsdata; struct kernfs_node *new_parent = new_dir->i_private; - struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; + struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; + int ret; - if (!kdops || !kdops->rename) + if (!scops || !scops->rename) return -EPERM; - return kdops->rename(kn, new_parent, new_dentry->d_name.name); + if (!kernfs_get_active(kn)) + return -ENODEV; + + if (!kernfs_get_active(new_parent)) { + kernfs_put_active(kn); + return -ENODEV; + } + + ret = scops->rename(kn, new_parent, new_dentry->d_name.name); + + kernfs_put_active(new_parent); + kernfs_put_active(kn); + return ret; } const struct inode_operations kernfs_dir_iops = { @@ -830,23 +952,104 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, return pos->parent; } -static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, - struct kernfs_node *kn) +/** + * kernfs_activate - activate a node which started deactivated + * @kn: kernfs_node whose subtree is to be activated + * + * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node + * needs to be explicitly activated. A node which hasn't been activated + * isn't visible to userland and deactivation is skipped during its + * removal. This is useful to construct atomic init sequences where + * creation of multiple nodes should either succeed or fail atomically. + * + * The caller is responsible for ensuring that this function is not called + * after kernfs_remove*() is invoked on @kn. + */ +void kernfs_activate(struct kernfs_node *kn) { - struct kernfs_node *pos, *next; + struct kernfs_node *pos; - if (!kn) + mutex_lock(&kernfs_mutex); + + pos = NULL; + while ((pos = kernfs_next_descendant_post(pos, kn))) { + if (!pos || (pos->flags & KERNFS_ACTIVATED)) + continue; + + WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); + WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); + + atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); + pos->flags |= KERNFS_ACTIVATED; + } + + mutex_unlock(&kernfs_mutex); +} + +static void __kernfs_remove(struct kernfs_node *kn) +{ + struct kernfs_node *pos; + + lockdep_assert_held(&kernfs_mutex); + + /* + * Short-circuit if non-root @kn has already finished removal. + * This is for kernfs_remove_self() which plays with active ref + * after removal. + */ + if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) return; pr_debug("kernfs %s: removing\n", kn->name); - next = NULL; + /* prevent any new usage under @kn by deactivating all nodes */ + pos = NULL; + while ((pos = kernfs_next_descendant_post(pos, kn))) + if (kernfs_active(pos)) + atomic_add(KN_DEACTIVATED_BIAS, &pos->active); + + /* deactivate and unlink the subtree node-by-node */ do { - pos = next; - next = kernfs_next_descendant_post(pos, kn); - if (pos) - kernfs_remove_one(acxt, pos); - } while (next); + pos = kernfs_leftmost_descendant(kn); + + /* + * kernfs_drain() drops kernfs_mutex temporarily and @pos's + * base ref could have been put by someone else by the time + * the function returns. Make sure it doesn't go away + * underneath us. + */ + kernfs_get(pos); + + /* + * Drain iff @kn was activated. This avoids draining and + * its lockdep annotations for nodes which have never been + * activated and allows embedding kernfs_remove() in create + * error paths without worrying about draining. + */ + if (kn->flags & KERNFS_ACTIVATED) + kernfs_drain(pos); + else + WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); + + /* + * kernfs_unlink_sibling() succeeds once per node. Use it + * to decide who's responsible for cleanups. + */ + if (!pos->parent || kernfs_unlink_sibling(pos)) { + struct kernfs_iattrs *ps_iattr = + pos->parent ? pos->parent->iattr : NULL; + + /* update timestamps on the parent */ + if (ps_iattr) { + ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; + ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; + } + + kernfs_put(pos); + } + + kernfs_put(pos); + } while (pos != kn); } /** @@ -857,11 +1060,140 @@ static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, */ void kernfs_remove(struct kernfs_node *kn) { - struct kernfs_addrm_cxt acxt; + mutex_lock(&kernfs_mutex); + __kernfs_remove(kn); + mutex_unlock(&kernfs_mutex); +} - kernfs_addrm_start(&acxt); - __kernfs_remove(&acxt, kn); - kernfs_addrm_finish(&acxt); +/** + * kernfs_break_active_protection - break out of active protection + * @kn: the self kernfs_node + * + * The caller must be running off of a kernfs operation which is invoked + * with an active reference - e.g. one of kernfs_ops. Each invocation of + * this function must also be matched with an invocation of + * kernfs_unbreak_active_protection(). + * + * This function releases the active reference of @kn the caller is + * holding. Once this function is called, @kn may be removed at any point + * and the caller is solely responsible for ensuring that the objects it + * dereferences are accessible. + */ +void kernfs_break_active_protection(struct kernfs_node *kn) +{ + /* + * Take out ourself out of the active ref dependency chain. If + * we're called without an active ref, lockdep will complain. + */ + kernfs_put_active(kn); +} + +/** + * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() + * @kn: the self kernfs_node + * + * If kernfs_break_active_protection() was called, this function must be + * invoked before finishing the kernfs operation. Note that while this + * function restores the active reference, it doesn't and can't actually + * restore the active protection - @kn may already or be in the process of + * being removed. Once kernfs_break_active_protection() is invoked, that + * protection is irreversibly gone for the kernfs operation instance. + * + * While this function may be called at any point after + * kernfs_break_active_protection() is invoked, its most useful location + * would be right before the enclosing kernfs operation returns. + */ +void kernfs_unbreak_active_protection(struct kernfs_node *kn) +{ + /* + * @kn->active could be in any state; however, the increment we do + * here will be undone as soon as the enclosing kernfs operation + * finishes and this temporary bump can't break anything. If @kn + * is alive, nothing changes. If @kn is being deactivated, the + * soon-to-follow put will either finish deactivation or restore + * deactivated state. If @kn is already removed, the temporary + * bump is guaranteed to be gone before @kn is released. + */ + atomic_inc(&kn->active); + if (kernfs_lockdep(kn)) + rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); +} + +/** + * kernfs_remove_self - remove a kernfs_node from its own method + * @kn: the self kernfs_node to remove + * + * The caller must be running off of a kernfs operation which is invoked + * with an active reference - e.g. one of kernfs_ops. This can be used to + * implement a file operation which deletes itself. + * + * For example, the "delete" file for a sysfs device directory can be + * implemented by invoking kernfs_remove_self() on the "delete" file + * itself. This function breaks the circular dependency of trying to + * deactivate self while holding an active ref itself. It isn't necessary + * to modify the usual removal path to use kernfs_remove_self(). The + * "delete" implementation can simply invoke kernfs_remove_self() on self + * before proceeding with the usual removal path. kernfs will ignore later + * kernfs_remove() on self. + * + * kernfs_remove_self() can be called multiple times concurrently on the + * same kernfs_node. Only the first one actually performs removal and + * returns %true. All others will wait until the kernfs operation which + * won self-removal finishes and return %false. Note that the losers wait + * for the completion of not only the winning kernfs_remove_self() but also + * the whole kernfs_ops which won the arbitration. This can be used to + * guarantee, for example, all concurrent writes to a "delete" file to + * finish only after the whole operation is complete. + */ +bool kernfs_remove_self(struct kernfs_node *kn) +{ + bool ret; + + mutex_lock(&kernfs_mutex); + kernfs_break_active_protection(kn); + + /* + * SUICIDAL is used to arbitrate among competing invocations. Only + * the first one will actually perform removal. When the removal + * is complete, SUICIDED is set and the active ref is restored + * while holding kernfs_mutex. The ones which lost arbitration + * waits for SUICDED && drained which can happen only after the + * enclosing kernfs operation which executed the winning instance + * of kernfs_remove_self() finished. + */ + if (!(kn->flags & KERNFS_SUICIDAL)) { + kn->flags |= KERNFS_SUICIDAL; + __kernfs_remove(kn); + kn->flags |= KERNFS_SUICIDED; + ret = true; + } else { + wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; + DEFINE_WAIT(wait); + + while (true) { + prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); + + if ((kn->flags & KERNFS_SUICIDED) && + atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) + break; + + mutex_unlock(&kernfs_mutex); + schedule(); + mutex_lock(&kernfs_mutex); + } + finish_wait(waitq, &wait); + WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); + ret = false; + } + + /* + * This must be done while holding kernfs_mutex; otherwise, waiting + * for SUICIDED && deactivated could finish prematurely. + */ + kernfs_unbreak_active_protection(kn); + + mutex_unlock(&kernfs_mutex); + return ret; } /** @@ -876,7 +1208,6 @@ void kernfs_remove(struct kernfs_node *kn) int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, const void *ns) { - struct kernfs_addrm_cxt acxt; struct kernfs_node *kn; if (!parent) { @@ -885,13 +1216,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, return -ENOENT; } - kernfs_addrm_start(&acxt); + mutex_lock(&kernfs_mutex); kn = kernfs_find_ns(parent, name, ns); if (kn) - __kernfs_remove(&acxt, kn); + __kernfs_remove(kn); - kernfs_addrm_finish(&acxt); + mutex_unlock(&kernfs_mutex); if (kn) return 0; @@ -909,12 +1240,18 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, const char *new_name, const void *new_ns) { + struct kernfs_node *old_parent; + const char *old_name = NULL; int error; + /* can't move or rename root */ + if (!kn->parent) + return -EINVAL; + mutex_lock(&kernfs_mutex); error = -ENOENT; - if ((kn->flags | new_parent->flags) & KERNFS_REMOVED) + if (!kernfs_active(kn) || !kernfs_active(new_parent)) goto out; error = 0; @@ -932,13 +1269,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, new_name = kstrdup(new_name, GFP_KERNEL); if (!new_name) goto out; - - if (kn->flags & KERNFS_STATIC_NAME) - kn->flags &= ~KERNFS_STATIC_NAME; - else - kfree(kn->name); - - kn->name = new_name; + } else { + new_name = NULL; } /* @@ -946,12 +1278,29 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, */ kernfs_unlink_sibling(kn); kernfs_get(new_parent); - kernfs_put(kn->parent); + + /* rename_lock protects ->parent and ->name accessors */ + spin_lock_irq(&kernfs_rename_lock); + + old_parent = kn->parent; + kn->parent = new_parent; + kn->ns = new_ns; + if (new_name) { + if (!(kn->flags & KERNFS_STATIC_NAME)) + old_name = kn->name; + kn->flags &= ~KERNFS_STATIC_NAME; + kn->name = new_name; + } + + spin_unlock_irq(&kernfs_rename_lock); + kn->hash = kernfs_name_hash(kn->name, kn->ns); - kn->parent = new_parent; kernfs_link_sibling(kn); + kernfs_put(old_parent); + kfree(old_name); + error = 0; out: mutex_unlock(&kernfs_mutex); @@ -974,7 +1323,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) { if (pos) { - int valid = !(pos->flags & KERNFS_REMOVED) && + int valid = kernfs_active(pos) && pos->parent == parent && hash == pos->hash; kernfs_put(pos); if (!valid) @@ -993,8 +1342,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, break; } } - /* Skip over entries in the wrong namespace */ - while (pos && pos->ns != ns) { + /* Skip over entries which are dying/dead or in the wrong namespace */ + while (pos && (!kernfs_active(pos) || pos->ns != ns)) { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; @@ -1008,14 +1357,15 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns, struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) { pos = kernfs_dir_pos(ns, parent, ino, pos); - if (pos) + if (pos) { do { struct rb_node *node = rb_next(&pos->rb); if (!node) pos = NULL; else pos = rb_to_kn(node); - } while (pos && pos->ns != ns); + } while (pos && (!kernfs_active(pos) || pos->ns != ns)); + } return pos; } diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index dbf397bfdff2..8034706a7af8 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -252,10 +252,18 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct kernfs_open_file *of = kernfs_of(file); - ssize_t len = min_t(size_t, count, PAGE_SIZE); const struct kernfs_ops *ops; + size_t len; char *buf; + if (of->atomic_write_len) { + len = count; + if (len > of->atomic_write_len) + return -E2BIG; + } else { + len = min_t(size_t, count, PAGE_SIZE); + } + buf = kmalloc(len + 1, GFP_KERNEL); if (!buf) return -ENOMEM; @@ -653,6 +661,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) of->file = file; /* + * Write path needs to atomic_write_len outside active reference. + * Cache it in open_file. See kernfs_fop_write() for details. + */ + of->atomic_write_len = ops->atomic_write_len; + + /* * Always instantiate seq_file even if read access doesn't use * seq_file or is not requested. This unifies private data access * and readable regular files are the vast majority anyway. @@ -820,7 +834,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, bool name_is_static, struct lock_class_key *key) { - struct kernfs_addrm_cxt acxt; struct kernfs_node *kn; unsigned flags; int rc; @@ -855,10 +868,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, if (ops->mmap) kn->flags |= KERNFS_HAS_MMAP; - kernfs_addrm_start(&acxt); - rc = kernfs_add_one(&acxt, kn); - kernfs_addrm_finish(&acxt); - + rc = kernfs_add_one(kn); if (rc) { kernfs_put(kn); return ERR_PTR(rc); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index eb536b76374a..8be13b2a079b 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -26,7 +26,8 @@ struct kernfs_iattrs { struct simple_xattrs xattrs; }; -#define KN_DEACTIVATED_BIAS INT_MIN +/* +1 to avoid triggering overflow warning when negating it */ +#define KN_DEACTIVATED_BIAS (INT_MIN + 1) /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ @@ -45,13 +46,6 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) } /* - * Context structure to be used while adding/removing nodes. - */ -struct kernfs_addrm_cxt { - struct kernfs_node *removed; -}; - -/* * mount.c */ struct kernfs_super_info { @@ -71,6 +65,7 @@ struct kernfs_super_info { }; #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) +extern const struct super_operations kernfs_sops; extern struct kmem_cache *kernfs_node_cache; /* @@ -100,9 +95,7 @@ extern const struct inode_operations kernfs_dir_iops; struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); void kernfs_put_active(struct kernfs_node *kn); -void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt); -int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn); -void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt); +int kernfs_add_one(struct kernfs_node *kn); struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, const char *name, umode_t mode, unsigned flags); diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 0f4152defe7b..6a5f04ac8704 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c @@ -19,12 +19,49 @@ struct kmem_cache *kernfs_node_cache; -static const struct super_operations kernfs_sops = { +static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) +{ + struct kernfs_root *root = kernfs_info(sb)->root; + struct kernfs_syscall_ops *scops = root->syscall_ops; + + if (scops && scops->remount_fs) + return scops->remount_fs(root, flags, data); + return 0; +} + +static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) +{ + struct kernfs_root *root = kernfs_root(dentry->d_fsdata); + struct kernfs_syscall_ops *scops = root->syscall_ops; + + if (scops && scops->show_options) + return scops->show_options(sf, root); + return 0; +} + +const struct super_operations kernfs_sops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .evict_inode = kernfs_evict_inode, + + .remount_fs = kernfs_sop_remount_fs, + .show_options = kernfs_sop_show_options, }; +/** + * kernfs_root_from_sb - determine kernfs_root associated with a super_block + * @sb: the super_block in question + * + * Return the kernfs_root associated with @sb. If @sb is not a kernfs one, + * %NULL is returned. + */ +struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) +{ + if (sb->s_op == &kernfs_sops) + return kernfs_info(sb)->root; + return NULL; +} + static int kernfs_fill_super(struct super_block *sb) { struct kernfs_super_info *info = kernfs_info(sb); diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 4d457055acb9..8a198898e39a 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -27,7 +27,6 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, struct kernfs_node *target) { struct kernfs_node *kn; - struct kernfs_addrm_cxt acxt; int error; kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); @@ -39,10 +38,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, kn->symlink.target_kn = target; kernfs_get(target); /* ref owned by symlink */ - kernfs_addrm_start(&acxt); - error = kernfs_add_one(&acxt, kn); - kernfs_addrm_finish(&acxt); - + error = kernfs_add_one(kn); if (!error) return kn; diff --git a/fs/mount.h b/fs/mount.h index a17458ca6f29..b29e42f05f34 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -19,13 +19,13 @@ struct mnt_pcp { }; struct mountpoint { - struct list_head m_hash; + struct hlist_node m_hash; struct dentry *m_dentry; int m_count; }; struct mount { - struct list_head mnt_hash; + struct hlist_node mnt_hash; struct mount *mnt_parent; struct dentry *mnt_mountpoint; struct vfsmount mnt; diff --git a/fs/namei.c b/fs/namei.c index 385f7817bfcc..4b491b431990 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1109,7 +1109,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, return false; if (!d_mountpoint(path->dentry)) - break; + return true; mounted = __lookup_mnt(path->mnt, path->dentry); if (!mounted) @@ -1125,20 +1125,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, */ *inode = path->dentry->d_inode; } - return true; -} - -static void follow_mount_rcu(struct nameidata *nd) -{ - while (d_mountpoint(nd->path.dentry)) { - struct mount *mounted; - mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); - if (!mounted) - break; - nd->path.mnt = &mounted->mnt; - nd->path.dentry = mounted->mnt.mnt_root; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - } + return read_seqretry(&mount_lock, nd->m_seq); } static int follow_dotdot_rcu(struct nameidata *nd) @@ -1166,7 +1153,17 @@ static int follow_dotdot_rcu(struct nameidata *nd) break; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - follow_mount_rcu(nd); + while (d_mountpoint(nd->path.dentry)) { + struct mount *mounted; + mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (!mounted) + break; + nd->path.mnt = &mounted->mnt; + nd->path.dentry = mounted->mnt.mnt_root; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); + if (!read_seqretry(&mount_lock, nd->m_seq)) + goto failed; + } nd->inode = nd->path.dentry->d_inode; return 0; @@ -1884,7 +1881,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (f.need_put) + if (f.flags & FDPUT_FPUT) *fp = f.file; nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); rcu_read_lock(); diff --git a/fs/namespace.c b/fs/namespace.c index 22e536705c45..2ffc5a2905d4 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -23,11 +23,34 @@ #include <linux/uaccess.h> #include <linux/proc_ns.h> #include <linux/magic.h> +#include <linux/bootmem.h> #include "pnode.h" #include "internal.h" -#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) -#define HASH_SIZE (1UL << HASH_SHIFT) +static unsigned int m_hash_mask __read_mostly; +static unsigned int m_hash_shift __read_mostly; +static unsigned int mp_hash_mask __read_mostly; +static unsigned int mp_hash_shift __read_mostly; + +static __initdata unsigned long mhash_entries; +static int __init set_mhash_entries(char *str) +{ + if (!str) + return 0; + mhash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mhash_entries=", set_mhash_entries); + +static __initdata unsigned long mphash_entries; +static int __init set_mphash_entries(char *str) +{ + if (!str) + return 0; + mphash_entries = simple_strtoul(str, &str, 0); + return 1; +} +__setup("mphash_entries=", set_mphash_entries); static int event; static DEFINE_IDA(mnt_id_ida); @@ -36,8 +59,8 @@ static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; -static struct list_head *mount_hashtable __read_mostly; -static struct list_head *mountpoint_hashtable __read_mostly; +static struct hlist_head *mount_hashtable __read_mostly; +static struct hlist_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static DECLARE_RWSEM(namespace_sem); @@ -55,12 +78,19 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) +static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); tmp += ((unsigned long)dentry / L1_CACHE_BYTES); - tmp = tmp + (tmp >> HASH_SHIFT); - return tmp & (HASH_SIZE - 1); + tmp = tmp + (tmp >> m_hash_shift); + return &mount_hashtable[tmp & m_hash_mask]; +} + +static inline struct hlist_head *mp_hash(struct dentry *dentry) +{ + unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); + tmp = tmp + (tmp >> mp_hash_shift); + return &mountpoint_hashtable[tmp & mp_hash_mask]; } /* @@ -187,7 +217,7 @@ static struct mount *alloc_vfsmnt(const char *name) mnt->mnt_writers = 0; #endif - INIT_LIST_HEAD(&mnt->mnt_hash); + INIT_HLIST_NODE(&mnt->mnt_hash); INIT_LIST_HEAD(&mnt->mnt_child); INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); @@ -575,10 +605,10 @@ bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) */ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); + struct hlist_head *head = m_hash(mnt, dentry); struct mount *p; - list_for_each_entry_rcu(p, head, mnt_hash) + hlist_for_each_entry_rcu(p, head, mnt_hash) if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) return p; return NULL; @@ -590,13 +620,17 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry) */ struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry) { - struct list_head *head = mount_hashtable + hash(mnt, dentry); - struct mount *p; - - list_for_each_entry_reverse(p, head, mnt_hash) - if (&p->mnt_parent->mnt == mnt && p->mnt_mountpoint == dentry) - return p; - return NULL; + struct mount *p, *res; + res = p = __lookup_mnt(mnt, dentry); + if (!p) + goto out; + hlist_for_each_entry_continue(p, mnt_hash) { + if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry) + break; + res = p; + } +out: + return res; } /* @@ -633,11 +667,11 @@ struct vfsmount *lookup_mnt(struct path *path) static struct mountpoint *new_mountpoint(struct dentry *dentry) { - struct list_head *chain = mountpoint_hashtable + hash(NULL, dentry); + struct hlist_head *chain = mp_hash(dentry); struct mountpoint *mp; int ret; - list_for_each_entry(mp, chain, m_hash) { + hlist_for_each_entry(mp, chain, m_hash) { if (mp->m_dentry == dentry) { /* might be worth a WARN_ON() */ if (d_unlinked(dentry)) @@ -659,7 +693,7 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) mp->m_dentry = dentry; mp->m_count = 1; - list_add(&mp->m_hash, chain); + hlist_add_head(&mp->m_hash, chain); return mp; } @@ -670,7 +704,7 @@ static void put_mountpoint(struct mountpoint *mp) spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); - list_del(&mp->m_hash); + hlist_del(&mp->m_hash); kfree(mp); } } @@ -712,7 +746,7 @@ static void detach_mnt(struct mount *mnt, struct path *old_path) mnt->mnt_parent = mnt; mnt->mnt_mountpoint = mnt->mnt.mnt_root; list_del_init(&mnt->mnt_child); - list_del_init(&mnt->mnt_hash); + hlist_del_init_rcu(&mnt->mnt_hash); put_mountpoint(mnt->mnt_mp); mnt->mnt_mp = NULL; } @@ -739,15 +773,14 @@ static void attach_mnt(struct mount *mnt, struct mountpoint *mp) { mnt_set_mountpoint(parent, mp, mnt); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mp->m_dentry)); + hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); } /* * vfsmount lock must be held for write */ -static void commit_tree(struct mount *mnt) +static void commit_tree(struct mount *mnt, struct mount *shadows) { struct mount *parent = mnt->mnt_parent; struct mount *m; @@ -762,8 +795,11 @@ static void commit_tree(struct mount *mnt) list_splice(&head, n->list.prev); - list_add_tail(&mnt->mnt_hash, mount_hashtable + - hash(&parent->mnt, mnt->mnt_mountpoint)); + if (shadows) + hlist_add_after_rcu(&shadows->mnt_hash, &mnt->mnt_hash); + else + hlist_add_head_rcu(&mnt->mnt_hash, + m_hash(&parent->mnt, mnt->mnt_mountpoint)); list_add_tail(&mnt->mnt_child, &parent->mnt_mounts); touch_mnt_namespace(n); } @@ -1153,26 +1189,28 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); -static LIST_HEAD(unmounted); /* protected by namespace_sem */ +static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static void namespace_unlock(void) { struct mount *mnt; - LIST_HEAD(head); + struct hlist_head head = unmounted; - if (likely(list_empty(&unmounted))) { + if (likely(hlist_empty(&head))) { up_write(&namespace_sem); return; } - list_splice_init(&unmounted, &head); + head.first->pprev = &head.first; + INIT_HLIST_HEAD(&unmounted); + up_write(&namespace_sem); synchronize_rcu(); - while (!list_empty(&head)) { - mnt = list_first_entry(&head, struct mount, mnt_hash); - list_del_init(&mnt->mnt_hash); + while (!hlist_empty(&head)) { + mnt = hlist_entry(head.first, struct mount, mnt_hash); + hlist_del_init(&mnt->mnt_hash); if (mnt->mnt_ex_mountpoint.mnt) path_put(&mnt->mnt_ex_mountpoint); mntput(&mnt->mnt); @@ -1193,16 +1231,19 @@ static inline void namespace_lock(void) */ void umount_tree(struct mount *mnt, int how) { - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); struct mount *p; + struct mount *last = NULL; - for (p = mnt; p; p = next_mnt(p, mnt)) - list_move(&p->mnt_hash, &tmp_list); + for (p = mnt; p; p = next_mnt(p, mnt)) { + hlist_del_init_rcu(&p->mnt_hash); + hlist_add_head(&p->mnt_hash, &tmp_list); + } if (how) propagate_umount(&tmp_list); - list_for_each_entry(p, &tmp_list, mnt_hash) { + hlist_for_each_entry(p, &tmp_list, mnt_hash) { list_del_init(&p->mnt_expire); list_del_init(&p->mnt_list); __touch_mnt_namespace(p->mnt_ns); @@ -1220,8 +1261,13 @@ void umount_tree(struct mount *mnt, int how) p->mnt_mp = NULL; } change_mnt_propagation(p, MS_PRIVATE); + last = p; + } + if (last) { + last->mnt_hash.next = unmounted.first; + unmounted.first = tmp_list.first; + unmounted.first->pprev = &unmounted.first; } - list_splice(&tmp_list, &unmounted); } static void shrink_submounts(struct mount *mnt); @@ -1605,24 +1651,23 @@ static int attach_recursive_mnt(struct mount *source_mnt, struct mountpoint *dest_mp, struct path *parent_path) { - LIST_HEAD(tree_list); + HLIST_HEAD(tree_list); struct mount *child, *p; + struct hlist_node *n; int err; if (IS_MNT_SHARED(dest_mnt)) { err = invent_group_ids(source_mnt, true); if (err) goto out; - } - err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); - if (err) - goto out_cleanup_ids; - - lock_mount_hash(); - - if (IS_MNT_SHARED(dest_mnt)) { + err = propagate_mnt(dest_mnt, dest_mp, source_mnt, &tree_list); + if (err) + goto out_cleanup_ids; + lock_mount_hash(); for (p = source_mnt; p; p = next_mnt(p, source_mnt)) set_mnt_shared(p); + } else { + lock_mount_hash(); } if (parent_path) { detach_mnt(source_mnt, parent_path); @@ -1630,20 +1675,22 @@ static int attach_recursive_mnt(struct mount *source_mnt, touch_mnt_namespace(source_mnt->mnt_ns); } else { mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt); - commit_tree(source_mnt); + commit_tree(source_mnt, NULL); } - list_for_each_entry_safe(child, p, &tree_list, mnt_hash) { - list_del_init(&child->mnt_hash); - commit_tree(child); + hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) { + struct mount *q; + hlist_del_init(&child->mnt_hash); + q = __lookup_mnt_last(&child->mnt_parent->mnt, + child->mnt_mountpoint); + commit_tree(child, q); } unlock_mount_hash(); return 0; out_cleanup_ids: - if (IS_MNT_SHARED(dest_mnt)) - cleanup_group_ids(source_mnt, NULL); + cleanup_group_ids(source_mnt, NULL); out: return err; } @@ -2777,18 +2824,24 @@ void __init mnt_init(void) mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); - mountpoint_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC); + mount_hashtable = alloc_large_system_hash("Mount-cache", + sizeof(struct hlist_head), + mhash_entries, 19, + 0, + &m_hash_shift, &m_hash_mask, 0, 0); + mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", + sizeof(struct hlist_head), + mphash_entries, 19, + 0, + &mp_hash_shift, &mp_hash_mask, 0, 0); if (!mount_hashtable || !mountpoint_hashtable) panic("Failed to allocate mount hash table\n"); - printk(KERN_INFO "Mount-cache hash table entries: %lu\n", HASH_SIZE); - - for (u = 0; u < HASH_SIZE; u++) - INIT_LIST_HEAD(&mount_hashtable[u]); - for (u = 0; u < HASH_SIZE; u++) - INIT_LIST_HEAD(&mountpoint_hashtable[u]); + for (u = 0; u <= m_hash_mask; u++) + INIT_HLIST_HEAD(&mount_hashtable[u]); + for (u = 0; u <= mp_hash_mask; u++) + INIT_HLIST_HEAD(&mountpoint_hashtable[u]); kernfs_init(); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 017d3cb5e99b..6d7be3f80356 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -449,6 +449,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, fh_lock(fhp); host_err = notify_change(dentry, iap, NULL); fh_unlock(fhp); + err = nfserrno(host_err); out_put_write_access: if (size_change) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8450262bcf2a..51632c40e896 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2393,8 +2393,8 @@ out_dio: if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) || ((file->f_flags & O_DIRECT) && !direct_io)) { - ret = filemap_fdatawrite_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawrite_range(file->f_mapping, *ppos, + *ppos + count - 1); if (ret < 0) written = ret; @@ -2407,8 +2407,8 @@ out_dio: } if (!ret) - ret = filemap_fdatawait_range(file->f_mapping, pos, - pos + count - 1); + ret = filemap_fdatawait_range(file->f_mapping, *ppos, + *ppos + count - 1); } /* diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 1324e6600e57..ca5ce14cbddc 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c @@ -346,7 +346,9 @@ int ocfs2_cluster_connect(const char *stack_name, strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1); new_conn->cc_namelen = grouplen; - strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1); + if (cluster_name_len) + strlcpy(new_conn->cc_cluster_name, cluster_name, + CLUSTER_NAME_MAX + 1); new_conn->cc_cluster_name_len = cluster_name_len; new_conn->cc_recovery_handler = recovery_handler; new_conn->cc_recovery_data = recovery_data; diff --git a/fs/open.c b/fs/open.c index 4b3e1edf2fe4..b9ed8b25c108 100644 --- a/fs/open.c +++ b/fs/open.c @@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f, return 0; } + /* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */ + if (S_ISREG(inode->i_mode)) + f->f_mode |= FMODE_ATOMIC_POS; + f->f_op = fops_get(inode->i_fop); if (unlikely(WARN_ON(!f->f_op))) { error = -ENODEV; diff --git a/fs/pnode.c b/fs/pnode.c index c7221bb19801..88396df725b4 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -220,14 +220,14 @@ static struct mount *get_source(struct mount *dest, * @tree_list : list of heads of trees to be attached. */ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, - struct mount *source_mnt, struct list_head *tree_list) + struct mount *source_mnt, struct hlist_head *tree_list) { struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns; struct mount *m, *child; int ret = 0; struct mount *prev_dest_mnt = dest_mnt; struct mount *prev_src_mnt = source_mnt; - LIST_HEAD(tmp_list); + HLIST_HEAD(tmp_list); for (m = propagation_next(dest_mnt, dest_mnt); m; m = propagation_next(m, dest_mnt)) { @@ -246,27 +246,29 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp, child = copy_tree(source, source->mnt.mnt_root, type); if (IS_ERR(child)) { ret = PTR_ERR(child); - list_splice(tree_list, tmp_list.prev); + tmp_list = *tree_list; + tmp_list.first->pprev = &tmp_list.first; + INIT_HLIST_HEAD(tree_list); goto out; } if (is_subdir(dest_mp->m_dentry, m->mnt.mnt_root)) { mnt_set_mountpoint(m, dest_mp, child); - list_add_tail(&child->mnt_hash, tree_list); + hlist_add_head(&child->mnt_hash, tree_list); } else { /* * This can happen if the parent mount was bind mounted * on some subdirectory of a shared/slave mount. */ - list_add_tail(&child->mnt_hash, &tmp_list); + hlist_add_head(&child->mnt_hash, &tmp_list); } prev_dest_mnt = m; prev_src_mnt = child; } out: lock_mount_hash(); - while (!list_empty(&tmp_list)) { - child = list_first_entry(&tmp_list, struct mount, mnt_hash); + while (!hlist_empty(&tmp_list)) { + child = hlist_entry(tmp_list.first, struct mount, mnt_hash); umount_tree(child, 0); } unlock_mount_hash(); @@ -338,8 +340,10 @@ static void __propagate_umount(struct mount *mnt) * umount the child only if the child has no * other children */ - if (child && list_empty(&child->mnt_mounts)) - list_move_tail(&child->mnt_hash, &mnt->mnt_hash); + if (child && list_empty(&child->mnt_mounts)) { + hlist_del_init_rcu(&child->mnt_hash); + hlist_add_before_rcu(&child->mnt_hash, &mnt->mnt_hash); + } } } @@ -350,11 +354,11 @@ static void __propagate_umount(struct mount *mnt) * * vfsmount lock must be held for write */ -int propagate_umount(struct list_head *list) +int propagate_umount(struct hlist_head *list) { struct mount *mnt; - list_for_each_entry(mnt, list, mnt_hash) + hlist_for_each_entry(mnt, list, mnt_hash) __propagate_umount(mnt); return 0; } diff --git a/fs/pnode.h b/fs/pnode.h index 59e7eda1851e..fc28a27fa892 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -36,8 +36,8 @@ static inline void set_mnt_shared(struct mount *mnt) void change_mnt_propagation(struct mount *, int); int propagate_mnt(struct mount *, struct mountpoint *, struct mount *, - struct list_head *); -int propagate_umount(struct list_head *); + struct hlist_head *); +int propagate_umount(struct hlist_head *); int propagate_mount_busy(struct mount *, int); void mnt_release_group_id(struct mount *); int get_dominating_id(struct mount *mnt, const struct path *root); diff --git a/fs/proc/base.c b/fs/proc/base.c index 51507065263b..b9760628e1fd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1824,6 +1824,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path) if (rc) goto out_mmput; + rc = -ENOENT; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6f599c62f0cc..9d231e9e5f0e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -9,7 +9,7 @@ #include <linux/slab.h> #include <linux/time.h> #include <linux/irqnr.h> -#include <asm/cputime.h> +#include <linux/cputime.h> #include <linux/tick.h> #ifndef arch_irq_stat_cpu diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 7141b8d0ca9e..33de567c25af 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c @@ -5,7 +5,7 @@ #include <linux/seq_file.h> #include <linux/time.h> #include <linux/kernel_stat.h> -#include <asm/cputime.h> +#include <linux/cputime.h> static int uptime_proc_show(struct seq_file *m, void *v) { diff --git a/fs/read_write.c b/fs/read_write.c index edc5746a902a..31c6efa43183 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -264,10 +264,22 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence) } EXPORT_SYMBOL(vfs_llseek); +static inline struct fd fdget_pos(int fd) +{ + return __to_fd(__fdget_pos(fd)); +} + +static inline void fdput_pos(struct fd f) +{ + if (f.flags & FDPUT_POS_UNLOCK) + mutex_unlock(&f.file->f_pos_lock); + fdput(f); +} + SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) { off_t retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); if (!f.file) return -EBADF; @@ -278,7 +290,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) if (res != (loff_t)retval) retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ } - fdput(f); + fdput_pos(f); return retval; } @@ -295,7 +307,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned int, whence) { int retval; - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); loff_t offset; if (!f.file) @@ -315,7 +327,7 @@ SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, retval = 0; } out_putf: - fdput(f); + fdput_pos(f); return retval; } #endif @@ -498,7 +510,7 @@ static inline void file_pos_write(struct file *file, loff_t pos) SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -506,7 +518,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) ret = vfs_read(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; } @@ -514,7 +526,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, size_t, count) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -522,7 +534,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, ret = vfs_write(f.file, buf, count, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } return ret; @@ -797,7 +809,7 @@ EXPORT_SYMBOL(vfs_writev); SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -805,7 +817,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_readv(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -817,7 +829,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, unsigned long, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret = -EBADF; if (f.file) { @@ -825,7 +837,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, ret = vfs_writev(f.file, vec, vlen, &pos); if (ret >= 0) file_pos_write(f.file, pos); - fdput(f); + fdput_pos(f); } if (ret > 0) @@ -968,7 +980,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -978,13 +990,13 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, ret = compat_readv(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } -COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, - const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos) +static long __compat_sys_preadv64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos) { struct fd f; ssize_t ret; @@ -1001,12 +1013,22 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, return ret; } +#ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 +COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos) +{ + return __compat_sys_preadv64(fd, vec, vlen, pos); +} +#endif + COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return compat_sys_preadv64(fd, vec, vlen, pos); + + return __compat_sys_preadv64(fd, vec, vlen, pos); } static size_t compat_writev(struct file *file, @@ -1035,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, const struct compat_iovec __user *, vec, compat_ulong_t, vlen) { - struct fd f = fdget(fd); + struct fd f = fdget_pos(fd); ssize_t ret; loff_t pos; @@ -1045,13 +1067,13 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, ret = compat_writev(f.file, vec, vlen, &pos); if (ret >= 0) f.file->f_pos = pos; - fdput(f); + fdput_pos(f); return ret; } -COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, - const struct compat_iovec __user *,vec, - unsigned long, vlen, loff_t, pos) +static long __compat_sys_pwritev64(unsigned long fd, + const struct compat_iovec __user *vec, + unsigned long vlen, loff_t pos) { struct fd f; ssize_t ret; @@ -1068,12 +1090,22 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, return ret; } +#ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 +COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, + const struct compat_iovec __user *,vec, + unsigned long, vlen, loff_t, pos) +{ + return __compat_sys_pwritev64(fd, vec, vlen, pos); +} +#endif + COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, const struct compat_iovec __user *,vec, compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; - return compat_sys_pwritev64(fd, vec, vlen, pos); + + return __compat_sys_pwritev64(fd, vec, vlen, pos); } #endif diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig index 8c41feacbac5..b2756014508c 100644 --- a/fs/sysfs/Kconfig +++ b/fs/sysfs/Kconfig @@ -1,6 +1,7 @@ config SYSFS bool "sysfs file system support" if EXPERT default y + select KERNFS help The sysfs filesystem is a virtual filesystem that the kernel uses to export internal kernel objects, their attributes, and their diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index ee0d761c3179..0b45ff42f374 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -19,39 +19,18 @@ DEFINE_SPINLOCK(sysfs_symlink_target_lock); -/** - * sysfs_pathname - return full path to sysfs dirent - * @kn: kernfs_node whose path we want - * @path: caller allocated buffer of size PATH_MAX - * - * Gives the name "/" to the sysfs_root entry; any path returned - * is relative to wherever sysfs is mounted. - */ -static char *sysfs_pathname(struct kernfs_node *kn, char *path) -{ - if (kn->parent) { - sysfs_pathname(kn->parent, path); - strlcat(path, "/", PATH_MAX); - } - strlcat(path, kn->name, PATH_MAX); - return path; -} - void sysfs_warn_dup(struct kernfs_node *parent, const char *name) { - char *path; + char *buf, *path = NULL; - path = kzalloc(PATH_MAX, GFP_KERNEL); - if (path) { - sysfs_pathname(parent, path); - strlcat(path, "/", PATH_MAX); - strlcat(path, name, PATH_MAX); - } + buf = kzalloc(PATH_MAX, GFP_KERNEL); + if (buf) + path = kernfs_path(parent, buf, PATH_MAX); - WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", - path ? path : name); + WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n", + path, name); - kfree(path); + kfree(buf); } /** @@ -122,9 +101,13 @@ void sysfs_remove_dir(struct kobject *kobj) int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, const void *new_ns) { - struct kernfs_node *parent = kobj->sd->parent; + struct kernfs_node *parent; + int ret; - return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); + parent = kernfs_get_parent(kobj->sd); + ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); + kernfs_put(parent); + return ret; } int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, @@ -133,7 +116,6 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, struct kernfs_node *kn = kobj->sd; struct kernfs_node *new_parent; - BUG_ON(!kn->parent); new_parent = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : sysfs_root_kn; diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 810cf6e613e5..1b8b91b67fdb 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -372,6 +372,29 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, } EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); +/** + * sysfs_remove_file_self - remove an object attribute from its own method + * @kobj: object we're acting for + * @attr: attribute descriptor + * + * See kernfs_remove_self() for details. + */ +bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) +{ + struct kernfs_node *parent = kobj->sd; + struct kernfs_node *kn; + bool ret; + + kn = kernfs_find_and_get(parent, attr->name); + if (WARN_ON_ONCE(!kn)) + return false; + + ret = kernfs_remove_self(kn); + + kernfs_put(kn); + return ret; +} + void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) { int i; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 6b579387c67a..aa0406895b53 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -70,8 +70,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, if (grp->bin_attrs) { for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { if (update) - sysfs_remove_bin_file(kobj, *bin_attr); - error = sysfs_create_bin_file(kobj, *bin_attr); + kernfs_remove_by_name(parent, + (*bin_attr)->attr.name); + error = sysfs_add_file_mode_ns(parent, + &(*bin_attr)->attr, true, + (*bin_attr)->attr.mode, NULL); if (error) break; } diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 3eaf5c6622eb..a66ad6196f59 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -63,7 +63,7 @@ int __init sysfs_init(void) { int err; - sysfs_root = kernfs_create_root(NULL, NULL); + sysfs_root = kernfs_create_root(NULL, 0, NULL); if (IS_ERR(sysfs_root)) return PTR_ERR(sysfs_root); diff --git a/fs/timerfd.c b/fs/timerfd.c index 929312180dd0..0013142c0475 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -317,6 +317,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) (clockid != CLOCK_MONOTONIC && clockid != CLOCK_REALTIME && clockid != CLOCK_REALTIME_ALARM && + clockid != CLOCK_BOOTTIME && clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; |