diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-05 09:16:18 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-01-05 09:16:18 -0800 |
commit | a65981109f294ba7e64b33ad3b4575a4636fce66 (patch) | |
tree | 1061a49f11544e18775630938a8bc53920fa0421 /fs | |
parent | 3fed6ae4b027f9c93be18520f87bd06bdffd196b (diff) | |
parent | b685a7350ae76bc0f388e24b36d06a63776c68ee (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton:
- procfs updates
- various misc bits
- lib/ updates
- epoll updates
- autofs
- fatfs
- a few more MM bits
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (58 commits)
mm/page_io.c: fix polled swap page in
checkpatch: add Co-developed-by to signature tags
docs: fix Co-Developed-by docs
drivers/base/platform.c: kmemleak ignore a known leak
fs: don't open code lru_to_page()
fs/: remove caller signal_pending branch predictions
mm/: remove caller signal_pending branch predictions
arch/arc/mm/fault.c: remove caller signal_pending_branch predictions
kernel/sched/: remove caller signal_pending branch predictions
kernel/locking/mutex.c: remove caller signal_pending branch predictions
mm: select HAVE_MOVE_PMD on x86 for faster mremap
mm: speed up mremap by 20x on large regions
mm: treewide: remove unused address argument from pte_alloc functions
initramfs: cleanup incomplete rootfs
scripts/gdb: fix lx-version string output
kernel/kcov.c: mark write_comp_data() as notrace
kernel/sysctl: add panic_print into sysctl
panic: add options to print system info when panic happens
bfs: extra sanity checking and static inode bitmap
exec: separate MM_ANONPAGES and RLIMIT_STACK accounting
...
Diffstat (limited to 'fs')
36 files changed, 399 insertions, 313 deletions
diff --git a/fs/afs/file.c b/fs/afs/file.c index d6bc3f5d784b..323ae9912203 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -17,6 +17,7 @@ #include <linux/writeback.h> #include <linux/gfp.h> #include <linux/task_io_accounting_ops.h> +#include <linux/mm.h> #include "internal.h" static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); @@ -441,7 +442,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, /* Count the number of contiguous pages at the front of the list. Note * that the list goes prev-wards rather than next-wards. */ - first = list_entry(pages->prev, struct page, lru); + first = lru_to_page(pages); index = first->index + 1; n = 1; for (p = first->lru.prev; p != pages; p = p->prev) { @@ -473,7 +474,7 @@ static int afs_readpages_one(struct file *file, struct address_space *mapping, * page at the end of the file. */ do { - page = list_entry(pages->prev, struct page, lru); + page = lru_to_page(pages); list_del(&page->lru); index = page->index; if (add_to_page_cache_lru(page, mapping, index, diff --git a/fs/afs/fs_probe.c b/fs/afs/fs_probe.c index fde6b4d4121e..3a9eaec06756 100644 --- a/fs/afs/fs_probe.c +++ b/fs/afs/fs_probe.c @@ -247,7 +247,7 @@ int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried) } } - if (!still_probing || unlikely(signal_pending(current))) + if (!still_probing || signal_pending(current)) goto stop; schedule(); } diff --git a/fs/afs/vl_probe.c b/fs/afs/vl_probe.c index f0b032976487..f402ee8171a1 100644 --- a/fs/afs/vl_probe.c +++ b/fs/afs/vl_probe.c @@ -248,7 +248,7 @@ int afs_wait_for_vl_probes(struct afs_vlserver_list *vllist, } } - if (!still_probing || unlikely(signal_pending(current))) + if (!still_probing || signal_pending(current)) goto stop; schedule(); } diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 9f9cadbfbd7a..3e59f0ed777b 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -42,6 +42,8 @@ #endif #define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__ +extern struct file_system_type autofs_fs_type; + /* * Unified info structure. This is pointed to by both the dentry and * inode structures. Each file in the filesystem has an instance of this @@ -101,16 +103,19 @@ struct autofs_wait_queue { #define AUTOFS_SBI_MAGIC 0x6d4a556d +#define AUTOFS_SBI_CATATONIC 0x0001 +#define AUTOFS_SBI_STRICTEXPIRE 0x0002 + struct autofs_sb_info { u32 magic; int pipefd; struct file *pipe; struct pid *oz_pgrp; - int catatonic; int version; int sub_version; int min_proto; int max_proto; + unsigned int flags; unsigned long exp_timeout; unsigned int type; struct super_block *sb; @@ -126,8 +131,7 @@ struct autofs_sb_info { static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb) { - return sb->s_magic != AUTOFS_SUPER_MAGIC ? - NULL : (struct autofs_sb_info *)(sb->s_fs_info); + return (struct autofs_sb_info *)(sb->s_fs_info); } static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry) @@ -141,7 +145,8 @@ static inline struct autofs_info *autofs_dentry_ino(struct dentry *dentry) */ static inline int autofs_oz_mode(struct autofs_sb_info *sbi) { - return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp; + return ((sbi->flags & AUTOFS_SBI_CATATONIC) || + task_pgrp(current) == sbi->oz_pgrp); } struct inode *autofs_get_inode(struct super_block *, umode_t); diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index 86eafda4a652..e9fe74d1541b 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -151,22 +151,6 @@ out: return err; } -/* - * Get the autofs super block info struct from the file opened on - * the autofs mount point. - */ -static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) -{ - struct autofs_sb_info *sbi = NULL; - struct inode *inode; - - if (f) { - inode = file_inode(f); - sbi = autofs_sbi(inode->i_sb); - } - return sbi; -} - /* Return autofs dev ioctl version */ static int autofs_dev_ioctl_version(struct file *fp, struct autofs_sb_info *sbi, @@ -366,7 +350,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, pipefd = param->setpipefd.pipefd; mutex_lock(&sbi->wq_mutex); - if (!sbi->catatonic) { + if (!(sbi->flags & AUTOFS_SBI_CATATONIC)) { mutex_unlock(&sbi->wq_mutex); return -EBUSY; } else { @@ -393,7 +377,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, swap(sbi->oz_pgrp, new_pid); sbi->pipefd = pipefd; sbi->pipe = pipe; - sbi->catatonic = 0; + sbi->flags &= ~AUTOFS_SBI_CATATONIC; } out: put_pid(new_pid); @@ -658,6 +642,8 @@ static int _autofs_dev_ioctl(unsigned int command, if (cmd != AUTOFS_DEV_IOCTL_VERSION_CMD && cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { + struct super_block *sb; + fp = fget(param->ioctlfd); if (!fp) { if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) @@ -666,12 +652,13 @@ static int _autofs_dev_ioctl(unsigned int command, goto out; } - sbi = autofs_dev_ioctl_sbi(fp); - if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { + sb = file_inode(fp)->i_sb; + if (sb->s_type != &autofs_fs_type) { err = -EINVAL; fput(fp); goto out; } + sbi = autofs_sbi(sb); /* * Admin needs to be able to set the mount catatonic in diff --git a/fs/autofs/init.c b/fs/autofs/init.c index 79ae07d9592f..c0c1db2cc6ea 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -16,7 +16,7 @@ static struct dentry *autofs_mount(struct file_system_type *fs_type, return mount_nodev(fs_type, flags, data, autofs_fill_super); } -static struct file_system_type autofs_fs_type = { +struct file_system_type autofs_fs_type = { .owner = THIS_MODULE, .name = "autofs", .mount = autofs_mount, diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index 846c052569dd..0e8ea2d9a2bb 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -87,6 +87,8 @@ static int autofs_show_options(struct seq_file *m, struct dentry *root) seq_printf(m, ",direct"); else seq_printf(m, ",indirect"); + if (sbi->flags & AUTOFS_SBI_STRICTEXPIRE) + seq_printf(m, ",strictexpire"); #ifdef CONFIG_CHECKPOINT_RESTORE if (sbi->pipe) seq_printf(m, ",pipe_ino=%ld", file_inode(sbi->pipe)->i_ino); @@ -109,7 +111,7 @@ static const struct super_operations autofs_sops = { }; enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, - Opt_indirect, Opt_direct, Opt_offset}; + Opt_indirect, Opt_direct, Opt_offset, Opt_strictexpire}; static const match_table_t tokens = { {Opt_fd, "fd=%u"}, @@ -121,24 +123,28 @@ static const match_table_t tokens = { {Opt_indirect, "indirect"}, {Opt_direct, "direct"}, {Opt_offset, "offset"}, + {Opt_strictexpire, "strictexpire"}, {Opt_err, NULL} }; -static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, - int *pgrp, bool *pgrp_set, unsigned int *type, - int *minproto, int *maxproto) +static int parse_options(char *options, + struct inode *root, int *pgrp, bool *pgrp_set, + struct autofs_sb_info *sbi) { char *p; substring_t args[MAX_OPT_ARGS]; int option; + int pipefd = -1; + kuid_t uid; + kgid_t gid; - *uid = current_uid(); - *gid = current_gid(); + root->i_uid = current_uid(); + root->i_gid = current_gid(); - *minproto = AUTOFS_MIN_PROTO_VERSION; - *maxproto = AUTOFS_MAX_PROTO_VERSION; + sbi->min_proto = AUTOFS_MIN_PROTO_VERSION; + sbi->max_proto = AUTOFS_MAX_PROTO_VERSION; - *pipefd = -1; + sbi->pipefd = -1; if (!options) return 1; @@ -152,22 +158,25 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, token = match_token(p, tokens, args); switch (token) { case Opt_fd: - if (match_int(args, pipefd)) + if (match_int(args, &pipefd)) return 1; + sbi->pipefd = pipefd; break; case Opt_uid: if (match_int(args, &option)) return 1; - *uid = make_kuid(current_user_ns(), option); - if (!uid_valid(*uid)) + uid = make_kuid(current_user_ns(), option); + if (!uid_valid(uid)) return 1; + root->i_uid = uid; break; case Opt_gid: if (match_int(args, &option)) return 1; - *gid = make_kgid(current_user_ns(), option); - if (!gid_valid(*gid)) + gid = make_kgid(current_user_ns(), option); + if (!gid_valid(gid)) return 1; + root->i_gid = gid; break; case Opt_pgrp: if (match_int(args, &option)) @@ -178,27 +187,30 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, case Opt_minproto: if (match_int(args, &option)) return 1; - *minproto = option; + sbi->min_proto = option; break; case Opt_maxproto: if (match_int(args, &option)) return 1; - *maxproto = option; + sbi->max_proto = option; break; case Opt_indirect: - set_autofs_type_indirect(type); + set_autofs_type_indirect(&sbi->type); break; case Opt_direct: - set_autofs_type_direct(type); + set_autofs_type_direct(&sbi->type); break; case Opt_offset: - set_autofs_type_offset(type); + set_autofs_type_offset(&sbi->type); + break; + case Opt_strictexpire: + sbi->flags |= AUTOFS_SBI_STRICTEXPIRE; break; default: return 1; } } - return (*pipefd < 0); + return (sbi->pipefd < 0); } int autofs_fill_super(struct super_block *s, void *data, int silent) @@ -206,7 +218,6 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) struct inode *root_inode; struct dentry *root; struct file *pipe; - int pipefd; struct autofs_sb_info *sbi; struct autofs_info *ino; int pgrp = 0; @@ -222,12 +233,12 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) sbi->magic = AUTOFS_SBI_MAGIC; sbi->pipefd = -1; sbi->pipe = NULL; - sbi->catatonic = 1; sbi->exp_timeout = 0; sbi->oz_pgrp = NULL; sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; + sbi->flags = AUTOFS_SBI_CATATONIC; set_autofs_type_indirect(&sbi->type); sbi->min_proto = 0; sbi->max_proto = 0; @@ -262,9 +273,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) root->d_fsdata = ino; /* Can this call block? */ - if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid, - &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto, - &sbi->max_proto)) { + if (parse_options(data, root_inode, &pgrp, &pgrp_set, sbi)) { pr_err("called with bogus options\n"); goto fail_dput; } @@ -303,8 +312,9 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) root_inode->i_fop = &autofs_root_operations; root_inode->i_op = &autofs_dir_inode_operations; - pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp)); - pipe = fget(pipefd); + pr_debug("pipe fd = %d, pgrp = %u\n", + sbi->pipefd, pid_nr(sbi->oz_pgrp)); + pipe = fget(sbi->pipefd); if (!pipe) { pr_err("could not open pipe file descriptor\n"); @@ -314,8 +324,7 @@ int autofs_fill_super(struct super_block *s, void *data, int silent) if (ret < 0) goto fail_fput; sbi->pipe = pipe; - sbi->pipefd = pipefd; - sbi->catatonic = 0; + sbi->flags &= ~AUTOFS_SBI_CATATONIC; /* * Success! Install the root dentry now to indicate completion. diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 782e57b911ab..1246f396bf0e 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -275,8 +275,11 @@ static int autofs_mount_wait(const struct path *path, bool rcu_walk) pr_debug("waiting for mount name=%pd\n", path->dentry); status = autofs_wait(sbi, path, NFY_MOUNT); pr_debug("mount wait done status=%d\n", status); + ino->last_used = jiffies; + return status; } - ino->last_used = jiffies; + if (!(sbi->flags & AUTOFS_SBI_STRICTEXPIRE)) + ino->last_used = jiffies; return status; } @@ -510,7 +513,8 @@ static struct dentry *autofs_lookup(struct inode *dir, sbi = autofs_sbi(dir->i_sb); pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n", - current->pid, task_pgrp_nr(current), sbi->catatonic, + current->pid, task_pgrp_nr(current), + sbi->flags & AUTOFS_SBI_CATATONIC, autofs_oz_mode(sbi)); active = autofs_lookup_active(dentry); @@ -563,7 +567,7 @@ static int autofs_dir_symlink(struct inode *dir, * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; BUG_ON(!ino); @@ -626,7 +630,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; if (atomic_dec_and_test(&ino->count)) { @@ -714,7 +718,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; spin_lock(&sbi->lookup_lock); @@ -759,7 +763,7 @@ static int autofs_dir_mkdir(struct inode *dir, * autofs mount is catatonic but the state of an autofs * file system needs to be preserved over restarts. */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -EACCES; pr_debug("dentry %p, creating %pd\n", dentry, dentry); diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c index f6385c6ef0a5..15a3e31d0904 100644 --- a/fs/autofs/waitq.c +++ b/fs/autofs/waitq.c @@ -20,14 +20,14 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi) struct autofs_wait_queue *wq, *nwq; mutex_lock(&sbi->wq_mutex); - if (sbi->catatonic) { + if (sbi->flags & AUTOFS_SBI_CATATONIC) { mutex_unlock(&sbi->wq_mutex); return; } pr_debug("entering catatonic mode\n"); - sbi->catatonic = 1; + sbi->flags |= AUTOFS_SBI_CATATONIC; wq = sbi->queues; sbi->queues = NULL; /* Erase all wait queues */ while (wq) { @@ -255,7 +255,7 @@ static int validate_request(struct autofs_wait_queue **wait, struct autofs_wait_queue *wq; struct autofs_info *ino; - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; /* Wait in progress, continue; */ @@ -290,7 +290,7 @@ static int validate_request(struct autofs_wait_queue **wait, if (mutex_lock_interruptible(&sbi->wq_mutex)) return -EINTR; - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; wq = autofs_find_wait(sbi, qstr); @@ -359,7 +359,7 @@ int autofs_wait(struct autofs_sb_info *sbi, pid_t tgid; /* In catatonic mode, we don't wait for nobody */ - if (sbi->catatonic) + if (sbi->flags & AUTOFS_SBI_CATATONIC) return -ENOENT; /* diff --git a/fs/bfs/bfs.h b/fs/bfs/bfs.h index 67aef3bb89e4..606f9378b2f0 100644 --- a/fs/bfs/bfs.h +++ b/fs/bfs/bfs.h @@ -1,13 +1,20 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * fs/bfs/bfs.h - * Copyright (C) 1999 Tigran Aivazian <tigran@veritas.com> + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> */ #ifndef _FS_BFS_BFS_H #define _FS_BFS_BFS_H #include <linux/bfs_fs.h> +/* In theory BFS supports up to 512 inodes, numbered from 2 (for /) up to 513 inclusive. + In actual fact, attempting to create the 512th inode (i.e. inode No. 513 or file No. 511) + will fail with ENOSPC in bfs_add_entry(): the root directory cannot contain so many entries, counting '..'. + So, mkfs.bfs(8) should really limit its -N option to 511 and not 512. For now, we just print a warning + if a filesystem is mounted with such "impossible to fill up" number of inodes */ +#define BFS_MAX_LASTI 513 + /* * BFS file system in-core superblock info */ @@ -17,7 +24,7 @@ struct bfs_sb_info { unsigned long si_freei; unsigned long si_lf_eblk; unsigned long si_lasti; - unsigned long *si_imap; + DECLARE_BITMAP(si_imap, BFS_MAX_LASTI+1); struct mutex bfs_lock; }; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index f32f21c3bbc7..d8dfe3a0cb39 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -2,8 +2,8 @@ /* * fs/bfs/dir.c * BFS directory operations. - * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> - * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005 + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> + * Made endianness-clean by Andrew Stribblehill <ads@wompom.org> 2005 */ #include <linux/time.h> diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 1476cdd90cfb..0dceefc54b48 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -2,7 +2,7 @@ /* * fs/bfs/file.c * BFS file operations. - * Copyright (C) 1999,2000 Tigran Aivazian <tigran@veritas.com> + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> * * Make the file block allocation algorithm understand the size * of the underlying block device. diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index d81c148682e7..d136b2aaafb3 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -1,10 +1,9 @@ /* * fs/bfs/inode.c * BFS superblock and inode operations. - * Copyright (C) 1999-2006 Tigran Aivazian <aivazian.tigran@gmail.com> + * Copyright (C) 1999-2018 Tigran Aivazian <aivazian.tigran@gmail.com> * From fs/minix, Copyright (C) 1991, 1992 Linus Torvalds. - * - * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. + * Made endianness-clean by Andrew Stribblehill <ads@wompom.org>, 2005. */ #include <linux/module.h> @@ -118,12 +117,12 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) { struct bfs_sb_info *info = BFS_SB(inode->i_sb); unsigned int ino = (u16)inode->i_ino; - unsigned long i_sblock; + unsigned long i_sblock; struct bfs_inode *di; struct buffer_head *bh; int err = 0; - dprintf("ino=%08x\n", ino); + dprintf("ino=%08x\n", ino); di = find_inode(inode->i_sb, ino, &bh); if (IS_ERR(di)) @@ -144,7 +143,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc) di->i_atime = cpu_to_le32(inode->i_atime.tv_sec); di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec); di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec); - i_sblock = BFS_I(inode)->i_sblock; + i_sblock = BFS_I(inode)->i_sblock; di->i_sblock = cpu_to_le32(i_sblock); di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock); di->i_eoffset = cpu_to_le32(i_sblock * BFS_BSIZE + inode->i_size - 1); @@ -188,13 +187,13 @@ static void bfs_evict_inode(struct inode *inode) mark_buffer_dirty(bh); brelse(bh); - if (bi->i_dsk_ino) { + if (bi->i_dsk_ino) { if (bi->i_sblock) info->si_freeb += bi->i_eblock + 1 - bi->i_sblock; info->si_freei++; clear_bit(ino, info->si_imap); - bfs_dump_imap("delete_inode", s); - } + bfs_dump_imap("evict_inode", s); + } /* * If this was the last file, make the previous block @@ -214,7 +213,6 @@ static void bfs_put_super(struct super_block *s) return; mutex_destroy(&info->bfs_lock); - kfree(info->si_imap); kfree(info); s->s_fs_info = NULL; } @@ -311,8 +309,7 @@ void bfs_dump_imap(const char *prefix, struct super_block *s) else strcat(tmpbuf, "0"); } - printf("BFS-fs: %s: lasti=%08lx <%s>\n", - prefix, BFS_SB(s)->si_lasti, tmpbuf); + printf("%s: lasti=%08lx <%s>\n", prefix, BFS_SB(s)->si_lasti, tmpbuf); free_page((unsigned long)tmpbuf); #endif } @@ -322,7 +319,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) struct buffer_head *bh, *sbh; struct bfs_super_block *bfs_sb; struct inode *inode; - unsigned i, imap_len; + unsigned i; struct bfs_sb_info *info; int ret = -EINVAL; unsigned long i_sblock, i_eblock, i_eoff, s_size; @@ -341,8 +338,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) bfs_sb = (struct bfs_super_block *)sbh->b_data; if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) { if (!silent) - printf("No BFS filesystem on %s (magic=%08x)\n", - s->s_id, le32_to_cpu(bfs_sb->s_magic)); + printf("No BFS filesystem on %s (magic=%08x)\n", s->s_id, le32_to_cpu(bfs_sb->s_magic)); goto out1; } if (BFS_UNCLEAN(bfs_sb, s) && !silent) @@ -351,18 +347,16 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) s->s_magic = BFS_MAGIC; if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) || - le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) { - printf("Superblock is corrupted\n"); + le32_to_cpu(bfs_sb->s_start) < sizeof(struct bfs_super_block) + sizeof(struct bfs_dirent)) { + printf("Superblock is corrupted on %s\n", s->s_id); goto out1; } - info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / - sizeof(struct bfs_inode) - + BFS_ROOT_INO - 1; - imap_len = (info->si_lasti / 8) + 1; - info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN); - if (!info->si_imap) { - printf("Cannot allocate %u bytes\n", imap_len); + info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) / sizeof(struct bfs_inode) + BFS_ROOT_INO - 1; + if (info->si_lasti == BFS_MAX_LASTI) + printf("WARNING: filesystem %s was created with 512 inodes, the real maximum is 511, mounting anyway\n", s->s_id); + else if (info->si_lasti > BFS_MAX_LASTI) { + printf("Impossible last inode number %lu > %d on %s\n", info->si_lasti, BFS_MAX_LASTI, s->s_id); goto out1; } for (i = 0; i < BFS_ROOT_INO; i++) @@ -372,26 +366,25 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) inode = bfs_iget(s, BFS_ROOT_INO); if (IS_ERR(inode)) { ret = PTR_ERR(inode); - goto out2; + goto out1; } s->s_root = d_make_root(inode); if (!s->s_root) { ret = -ENOMEM; - goto out2; + goto out1; } info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS; - info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; + info->si_freeb = (le32_to_cpu(bfs_sb->s_end) + 1 - le32_to_cpu(bfs_sb->s_start)) >> BFS_BSIZE_BITS; info->si_freei = 0; info->si_lf_eblk = 0; /* can we read the last block? */ bh = sb_bread(s, info->si_blocks - 1); if (!bh) { - printf("Last block not available: %lu\n", info->si_blocks - 1); + printf("Last block not available on %s: %lu\n", s->s_id, info->si_blocks - 1); ret = -EIO; - goto out3; + goto out2; } brelse(bh); @@ -425,11 +418,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) (i_eoff != le32_to_cpu(-1) && i_eoff > s_size) || i_sblock * BFS_BSIZE > i_eoff) { - printf("Inode 0x%08x corrupted\n", i); + printf("Inode 0x%08x corrupted on %s\n", i, s->s_id); brelse(bh); ret = -EIO; - goto out3; + goto out2; } if (!di->i_ino) { @@ -445,14 +438,12 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent) } brelse(bh); brelse(sbh); - bfs_dump_imap("read_super", s); + bfs_dump_imap("fill_super", s); return 0; -out3: +out2: dput(s->s_root); s->s_root = NULL; -out2: - kfree(info->si_imap); out1: brelse(sbh); out: @@ -482,7 +473,7 @@ static int __init init_bfs_fs(void) int err = init_inodecache(); if (err) goto out1; - err = register_filesystem(&bfs_fs_type); + err = register_filesystem(&bfs_fs_type); if (err) goto out; return 0; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 7cde3f46ad26..d0078cbb718b 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -42,10 +42,14 @@ static int load_script(struct linux_binprm *bprm) fput(bprm->file); bprm->file = NULL; - bprm->buf[BINPRM_BUF_SIZE - 1] = '\0'; - if ((cp = strchr(bprm->buf, '\n')) == NULL) - cp = bprm->buf+BINPRM_BUF_SIZE-1; + for (cp = bprm->buf+2;; cp++) { + if (cp >= bprm->buf + BINPRM_BUF_SIZE) + return -ENOEXEC; + if (!*cp || (*cp == '\n')) + break; + } *cp = '\0'; + while (cp > bprm->buf) { cp--; if ((*cp == ' ') || (*cp == '\t')) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fc126b92ea59..52abe4082680 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4103,8 +4103,7 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages, while (!list_empty(pages)) { for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) { - struct page *page = list_entry(pages->prev, - struct page, lru); + struct page *page = lru_to_page(pages); prefetchw(&page->flags); list_del(&page->lru); diff --git a/fs/buffer.c b/fs/buffer.c index d60d61e8ed7d..52d024bfdbc1 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2366,7 +2366,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping, balance_dirty_pages_ratelimited(mapping); - if (unlikely(fatal_signal_pending(current))) { + if (fatal_signal_pending(current)) { err = -EINTR; goto out; } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8eade7a993c1..5d0c05e288cc 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -306,7 +306,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->client->osdc; struct ceph_inode_info *ci = ceph_inode(inode); - struct page *page = list_entry(page_list->prev, struct page, lru); + struct page *page = lru_to_page(page_list); struct ceph_vino vino; struct ceph_osd_request *req; u64 off; @@ -333,8 +333,7 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, if (got) ceph_put_cap_refs(ci, got); while (!list_empty(page_list)) { - page = list_entry(page_list->prev, - struct page, lru); + page = lru_to_page(page_list); list_del(&page->lru); put_page(page); } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 5e405164394a..e3e3a7550205 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -33,6 +33,7 @@ #include <linux/mount.h> #include <linux/slab.h> #include <linux/swap.h> +#include <linux/mm.h> #include <asm/div64.h> #include "cifsfs.h" #include "cifspdu.h" @@ -3964,7 +3965,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list, INIT_LIST_HEAD(tmplist); - page = list_entry(page_list->prev, struct page, lru); + page = lru_to_page(page_list); /* * Lock the page and put it in the cache. Since no one else diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7ebae39fbcb3..a5d219d920e7 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -381,7 +381,8 @@ static void ep_nested_calls_init(struct nested_calls *ncalls) */ static inline int ep_events_available(struct eventpoll *ep) { - return !list_empty(&ep->rdllist) || ep->ovflist != EP_UNACTIVE_PTR; + return !list_empty_careful(&ep->rdllist) || + READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR; } #ifdef CONFIG_NET_RX_BUSY_POLL @@ -471,7 +472,6 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) * no re-entered. * * @ncalls: Pointer to the nested_calls structure to be used for this call. - * @max_nests: Maximum number of allowed nesting calls. * @nproc: Nested call core function pointer. * @priv: Opaque data to be passed to the @nproc callback. * @cookie: Cookie to be used to identify this nested call. @@ -480,7 +480,7 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) * Returns: Returns the code returned by the @nproc callback, or -1 if * the maximum recursion limit has been exceeded. */ -static int ep_call_nested(struct nested_calls *ncalls, int max_nests, +static int ep_call_nested(struct nested_calls *ncalls, int (*nproc)(void *, void *, int), void *priv, void *cookie, void *ctx) { @@ -499,7 +499,7 @@ static int ep_call_nested(struct nested_calls *ncalls, int max_nests, */ list_for_each_entry(tncur, lsthead, llink) { if (tncur->ctx == ctx && - (tncur->cookie == cookie || ++call_nests > max_nests)) { + (tncur->cookie == cookie || ++call_nests > EP_MAX_NESTS)) { /* * Ops ... loop detected or maximum nest level reached. * We abort this wake by breaking the cycle itself. @@ -573,7 +573,7 @@ static void ep_poll_safewake(wait_queue_head_t *wq) { int this_cpu = get_cpu(); - ep_call_nested(&poll_safewake_ncalls, EP_MAX_NESTS, + ep_call_nested(&poll_safewake_ncalls, ep_poll_wakeup_proc, NULL, wq, (void *) (long) this_cpu); put_cpu(); @@ -699,7 +699,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, */ spin_lock_irq(&ep->wq.lock); list_splice_init(&ep->rdllist, &txlist); - ep->ovflist = NULL; + WRITE_ONCE(ep->ovflist, NULL); spin_unlock_irq(&ep->wq.lock); /* @@ -713,7 +713,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * other events might have been queued by the poll callback. * We re-insert them inside the main ready-list here. */ - for (nepi = ep->ovflist; (epi = nepi) != NULL; + for (nepi = READ_ONCE(ep->ovflist); (epi = nepi) != NULL; nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { /* * We need to check if the item is already in the list. @@ -731,7 +731,7 @@ static __poll_t ep_scan_ready_list(struct eventpoll *ep, * releasing the lock, events will be queued in the normal way inside * ep->rdllist. */ - ep->ovflist = EP_UNACTIVE_PTR; + WRITE_ONCE(ep->ovflist, EP_UNACTIVE_PTR); /* * Quickly re-inject items left on "txlist". @@ -1154,10 +1154,10 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * semantics). All the events that happen during that period of time are * chained in ep->ovflist and requeued later on. */ - if (unlikely(ep->ovflist != EP_UNACTIVE_PTR)) { + if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { if (epi->next == EP_UNACTIVE_PTR) { - epi->next = ep->ovflist; - ep->ovflist = epi; + epi->next = READ_ONCE(ep->ovflist); + WRITE_ONCE(ep->ovflist, epi); if (epi->ws) { /* * Activate ep->ws since epi->ws may get @@ -1333,7 +1333,6 @@ static int reverse_path_check_proc(void *priv, void *cookie, int call_nests) } } else { error = ep_call_nested(&poll_loop_ncalls, - EP_MAX_NESTS, reverse_path_check_proc, child_file, child_file, current); @@ -1367,7 +1366,7 @@ static int reverse_path_check(void) /* let's call this for all tfiles */ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) { path_count_init(); - error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + error = ep_call_nested(&poll_loop_ncalls, reverse_path_check_proc, current_file, current_file, current); if (error) @@ -1626,21 +1625,24 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head { struct ep_send_events_data *esed = priv; __poll_t revents; - struct epitem *epi; - struct epoll_event __user *uevent; + struct epitem *epi, *tmp; + struct epoll_event __user *uevent = esed->events; struct wakeup_source *ws; poll_table pt; init_poll_funcptr(&pt, NULL); + esed->res = 0; /* * We can loop without lock because we are passed a task private list. * Items cannot vanish during the loop because ep_scan_ready_list() is * holding "mtx" during this call. */ - for (esed->res = 0, uevent = esed->events; - !list_empty(head) && esed->res < esed->maxevents;) { - epi = list_first_entry(head, struct epitem, rdllink); + lockdep_assert_held(&ep->mtx); + + list_for_each_entry_safe(epi, tmp, head, rdllink) { + if (esed->res >= esed->maxevents) + break; /* * Activate ep->ws before deactivating epi->ws to prevent @@ -1660,42 +1662,42 @@ static __poll_t ep_send_events_proc(struct eventpoll *ep, struct list_head *head list_del_init(&epi->rdllink); - revents = ep_item_poll(epi, &pt, 1); - /* * If the event mask intersect the caller-requested one, * deliver the event to userspace. Again, ep_scan_ready_list() - * is holding "mtx", so no operations coming from userspace + * is holding ep->mtx, so no operations coming from userspace * can change the item. */ - if (revents) { - if (__put_user(revents, &uevent->events) || - __put_user(epi->event.data, &uevent->data)) { - list_add(&epi->rdllink, head); - ep_pm_stay_awake(epi); - if (!esed->res) - esed->res = -EFAULT; - return 0; - } - esed->res++; - uevent++; - if (epi->event.events & EPOLLONESHOT) - epi->event.events &= EP_PRIVATE_BITS; - else if (!(epi->event.events & EPOLLET)) { - /* - * If this file has been added with Level - * Trigger mode, we need to insert back inside - * the ready list, so that the next call to - * epoll_wait() will check again the events - * availability. At this point, no one can insert - * into ep->rdllist besides us. The epoll_ctl() - * callers are locked out by - * ep_scan_ready_list() holding "mtx" and the - * poll callback will queue them in ep->ovflist. - */ - list_add_tail(&epi->rdllink, &ep->rdllist); - ep_pm_stay_awake(epi); - } + revents = ep_item_poll(epi, &pt, 1); + if (!revents) + continue; + + if (__put_user(revents, &uevent->events) || + __put_user(epi->event.data, &uevent->data)) { + list_add(&epi->rdllink, head); + ep_pm_stay_awake(epi); + if (!esed->res) + esed->res = -EFAULT; + return 0; + } + esed->res++; + uevent++; + if (epi->event.events & EPOLLONESHOT) + epi->event.events &= EP_PRIVATE_BITS; + else if (!(epi->event.events & EPOLLET)) { + /* + * If this file has been added with Level + * Trigger mode, we need to insert back inside + * the ready list, so that the next call to + * epoll_wait() will check again the events + * availability. At this point, no one can insert + * into ep->rdllist besides us. The epoll_ctl() + * callers are locked out by + * ep_scan_ready_list() holding "mtx" and the + * poll callback will queue them in ep->ovflist. + */ + list_add_tail(&epi->rdllink, &ep->rdllist); + ep_pm_stay_awake(epi); } } @@ -1747,6 +1749,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, { int res = 0, eavail, timed_out = 0; u64 slack = 0; + bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1761,11 +1764,18 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, } else if (timeout == 0) { /* * Avoid the unnecessary trip to the wait queue loop, if the - * caller specified a non blocking operation. + * caller specified a non blocking operation. We still need + * lock because we could race and not see an epi being added + * to the ready list while in irq callback. Thus incorrectly + * returning 0 back to userspace. */ timed_out = 1; + spin_lock_irq(&ep->wq.lock); - goto check_events; + eavail = ep_events_available(ep); + spin_unlock_irq(&ep->wq.lock); + + goto send_events; } fetch_events: @@ -1773,64 +1783,66 @@ fetch_events: if (!ep_events_available(ep)) ep_busy_loop(ep, timed_out); - spin_lock_irq(&ep->wq.lock); + eavail = ep_events_available(ep); + if (eavail) + goto send_events; - if (!ep_events_available(ep)) { - /* - * Busy poll timed out. Drop NAPI ID for now, we can add - * it back in when we have moved a socket with a valid NAPI - * ID onto the ready list. - */ - ep_reset_busy_poll_napi_id(ep); + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ + ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. - * We need to sleep here, and we will be wake up by - * ep_poll_callback() when events will become available. - */ + /* + * We don't have any available event to return to the caller. We need + * to sleep here, and we will be woken by ep_poll_callback() when events + * become available. + */ + if (!waiter) { + waiter = true; init_waitqueue_entry(&wait, current); - __add_wait_queue_exclusive(&ep->wq, &wait); - for (;;) { - /* - * We don't want to sleep if the ep_poll_callback() sends us - * a wakeup in between. That's why we set the task state - * to TASK_INTERRUPTIBLE before doing the checks. - */ - set_current_state(TASK_INTERRUPTIBLE); - /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. - */ - if (fatal_signal_pending(current)) { - res = -EINTR; - break; - } - if (ep_events_available(ep) || timed_out) - break; - if (signal_pending(current)) { - res = -EINTR; - break; - } + spin_lock_irq(&ep->wq.lock); + __add_wait_queue_exclusive(&ep->wq, &wait); + spin_unlock_irq(&ep->wq.lock); + } - spin_unlock_irq(&ep->wq.lock); - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) - timed_out = 1; + for (;;) { + /* + * We don't want to sleep if the ep_poll_callback() sends us + * a wakeup in between. That's why we set the task state + * to TASK_INTERRUPTIBLE before doing the checks. + */ + set_current_state(TASK_INTERRUPTIBLE); + /* + * Always short-circuit for fatal signals to allow + * threads to make a timely exit without the chance of + * finding more events available and fetching + * repeatedly. + */ + if (fatal_signal_pending(current)) { + res = -EINTR; + break; + } - spin_lock_irq(&ep->wq.lock); + eavail = ep_events_available(ep); + if (eavail) + break; + if (signal_pending(current)) { + res = -EINTR; + break; } - __remove_wait_queue(&ep->wq, &wait); - __set_current_state(TASK_RUNNING); + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { + timed_out = 1; + break; + } } -check_events: - /* Is it worth to try to dig for events ? */ - eavail = ep_events_available(ep); - spin_unlock_irq(&ep->wq.lock); + __set_current_state(TASK_RUNNING); +send_events: /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of @@ -1840,6 +1852,12 @@ check_events: !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; + if (waiter) { + spin_lock_irq(&ep->wq.lock); + __remove_wait_queue(&ep->wq, &wait); + spin_unlock_irq(&ep->wq.lock); + } + return res; } @@ -1876,7 +1894,7 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests) ep_tovisit = epi->ffd.file->private_data; if (ep_tovisit->visited) continue; - error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + error = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, epi->ffd.file, ep_tovisit, current); if (error != 0) @@ -1916,7 +1934,7 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file) int ret; struct eventpoll *ep_cur, *ep_next; - ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS, + ret = ep_call_nested(&poll_loop_ncalls, ep_loop_check_proc, file, ep, current); /* clear visited list */ list_for_each_entry_safe(ep_cur, ep_next, &visited_list, diff --git a/fs/exec.c b/fs/exec.c index fc281b738a98..44320d893f1a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -218,55 +218,10 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, if (ret <= 0) return NULL; - if (write) { - unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; - unsigned long ptr_size, limit; - - /* - * Since the stack will hold pointers to the strings, we - * must account for them as well. - * - * The size calculation is the entire vma while each arg page is - * built, so each time we get here it's calculating how far it - * is currently (rather than each call being just the newly - * added size from the arg page). As a result, we need to - * always add the entire size of the pointers, so that on the - * last call to get_arg_page() we'll actually have the entire - * correct size. - */ - ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); - if (ptr_size > ULONG_MAX - size) - goto fail; - size += ptr_size; - - acct_arg_size(bprm, size / PAGE_SIZE); - - /* - * We've historically supported up to 32 pages (ARG_MAX) - * of argument strings even with small stacks - */ - if (size <= ARG_MAX) - return page; - - /* - * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM - * (whichever is smaller) for the argv+env strings. - * This ensures that: - * - the remaining binfmt code will not run out of stack space, - * - the program will have a reasonable amount of stack left - * to work from. - */ - limit = _STK_LIM / 4 * 3; - limit = min(limit, bprm->rlim_stack.rlim_cur / 4); - if (size > limit) - goto fail; - } + if (write) + acct_arg_size(bprm, vma_pages(bprm->vma)); return page; - -fail: - put_page(page); - return NULL; } static void put_arg_page(struct page *page) @@ -492,6 +447,50 @@ static int count(struct user_arg_ptr argv, int max) return i; } +static int prepare_arg_pages(struct linux_binprm *bprm, + struct user_arg_ptr argv, struct user_arg_ptr envp) +{ + unsigned long limit, ptr_size; + + bprm->argc = count(argv, MAX_ARG_STRINGS); + if (bprm->argc < 0) + return bprm->argc; + + bprm->envc = count(envp, MAX_ARG_STRINGS); + if (bprm->envc < 0) + return bprm->envc; + + /* + * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM + * (whichever is smaller) for the argv+env strings. + * This ensures that: + * - the remaining binfmt code will not run out of stack space, + * - the program will have a reasonable amount of stack left + * to work from. + */ + limit = _STK_LIM / 4 * 3; + limit = min(limit, bprm->rlim_stack.rlim_cur / 4); + /* + * We've historically supported up to 32 pages (ARG_MAX) + * of argument strings even with small stacks + */ + limit = max_t(unsigned long, limit, ARG_MAX); + /* + * We must account for the size of all the argv and envp pointers to + * the argv and envp strings, since they will also take up space in + * the stack. They aren't stored until much later when we can't + * signal to the parent that the child has run out of stack space. + * Instead, calculate it here so it's possible to fail gracefully. + */ + ptr_size = (bprm->argc + bprm->envc) * sizeof(void *); + if (limit <= ptr_size) + return -E2BIG; + limit -= ptr_size; + + bprm->argmin = bprm->p - limit; + return 0; +} + /* * 'copy_strings()' copies argument/environment strings from the old * processes's memory to the new process's stack. The call to get_user_pages() @@ -527,6 +526,10 @@ static int copy_strings(int argc, struct user_arg_ptr argv, pos = bprm->p; str += len; bprm->p -= len; +#ifdef CONFIG_MMU + if (bprm->p < bprm->argmin) + goto out; +#endif while (len > 0) { int offset, bytes_to_copy; @@ -1084,7 +1087,7 @@ static int de_thread(struct task_struct *tsk) __set_current_state(TASK_KILLABLE); spin_unlock_irq(lock); schedule(); - if (unlikely(__fatal_signal_pending(tsk))) + if (__fatal_signal_pending(tsk)) goto killed; spin_lock_irq(lock); } @@ -1112,7 +1115,7 @@ static int de_thread(struct task_struct *tsk) write_unlock_irq(&tasklist_lock); cgroup_threadgroup_change_end(tsk); schedule(); - if (unlikely(__fatal_signal_pending(tsk))) + if (__fatal_signal_pending(tsk)) goto killed; } @@ -1789,12 +1792,8 @@ static int __do_execve_file(int fd, struct filename *filename, if (retval) goto out_unmark; - bprm->argc = count(argv, MAX_ARG_STRINGS); - if ((retval = bprm->argc) < 0) - goto out; - - bprm->envc = count(envp, MAX_ARG_STRINGS); - if ((retval = bprm->envc) < 0) + retval = prepare_arg_pages(bprm, argv, envp); + if (retval < 0) goto out; retval = prepare_binprm(bprm); diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index f461d75ac049..6aa282ee455a 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -128,7 +128,7 @@ int ext4_mpage_readpages(struct address_space *mapping, prefetchw(&page->flags); if (pages) { - page = list_entry(pages->prev, struct page, lru); + page = lru_to_page(pages); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, readahead_gfp_mask(mapping))) diff --git a/fs/fat/cache.c b/fs/fat/cache.c index 78d501c1fb65..738e427e2d21 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c @@ -363,7 +363,7 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys, *phys = 0; *mapped_blocks = 0; - if ((sbi->fat_bits != 32) && (inode->i_ino == MSDOS_ROOT_INO)) { + if (!is_fat32(sbi) && (inode->i_ino == MSDOS_ROOT_INO)) { if (sector < (sbi->dir_entries >> sbi->dir_per_block_bits)) { *phys = sector + sbi->dir_start; *mapped_blocks = 1; diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 0295a095b920..9d01db37183f 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -57,7 +57,7 @@ static inline void fat_dir_readahead(struct inode *dir, sector_t iblock, if ((iblock & (sbi->sec_per_clus - 1)) || sbi->sec_per_clus == 1) return; /* root dir of FAT12/FAT16 */ - if ((sbi->fat_bits != 32) && (dir->i_ino == MSDOS_ROOT_INO)) + if (!is_fat32(sbi) && (dir->i_ino == MSDOS_ROOT_INO)) return; bh = sb_find_get_block(sb, phys); @@ -1313,7 +1313,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, } } if (dir->i_ino == MSDOS_ROOT_INO) { - if (sbi->fat_bits != 32) + if (!is_fat32(sbi)) goto error; } else if (MSDOS_I(dir)->i_start == 0) { fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)", diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 4e1b2f6df5e6..922a0c6ba46c 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -142,6 +142,34 @@ static inline struct msdos_sb_info *MSDOS_SB(struct super_block *sb) return sb->s_fs_info; } +/* + * Functions that determine the variant of the FAT file system (i.e., + * whether this is FAT12, FAT16 or FAT32. + */ +static inline bool is_fat12(const struct msdos_sb_info *sbi) +{ + return sbi->fat_bits == 12; +} + +static inline bool is_fat16(const struct msdos_sb_info *sbi) +{ + return sbi->fat_bits == 16; +} + +static inline bool is_fat32(const struct msdos_sb_info *sbi) +{ + return sbi->fat_bits == 32; +} + +/* Maximum number of clusters */ +static inline u32 max_fat(struct super_block *sb) +{ + struct msdos_sb_info *sbi = MSDOS_SB(sb); + + return is_fat32(sbi) ? MAX_FAT32 : + is_fat16(sbi) ? MAX_FAT16 : MAX_FAT12; +} + static inline struct msdos_inode_info *MSDOS_I(struct inode *inode) { return container_of(inode, struct msdos_inode_info, vfs_inode); @@ -257,7 +285,7 @@ static inline int fat_get_start(const struct msdos_sb_info *sbi, const struct msdos_dir_entry *de) { int cluster = le16_to_cpu(de->start); - if (sbi->fat_bits == 32) + if (is_fat32(sbi)) cluster |= (le16_to_cpu(de->starthi) << 16); return cluster; } diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index f58c0cacc531..495edeafd60a 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c @@ -290,19 +290,17 @@ void fat_ent_access_init(struct super_block *sb) mutex_init(&sbi->fat_lock); - switch (sbi->fat_bits) { - case 32: + if (is_fat32(sbi)) { sbi->fatent_shift = 2; sbi->fatent_ops = &fat32_ops; - break; - case 16: + } else if (is_fat16(sbi)) { sbi->fatent_shift = 1; sbi->fatent_ops = &fat16_ops; - break; - case 12: + } else if (is_fat12(sbi)) { sbi->fatent_shift = -1; sbi->fatent_ops = &fat12_ops; - break; + } else { + fat_fs_error(sb, "invalid FAT variant, %u bits", sbi->fat_bits); } } @@ -310,7 +308,7 @@ static void mark_fsinfo_dirty(struct super_block *sb) { struct msdos_sb_info *sbi = MSDOS_SB(sb); - if (sb_rdonly(sb) || sbi->fat_bits != 32) + if (sb_rdonly(sb) || !is_fat32(sbi)) return; __mark_inode_dirty(sbi->fsinfo_inode, I_DIRTY_SYNC); @@ -327,7 +325,7 @@ static inline int fat_ent_update_ptr(struct super_block *sb, /* Is this fatent's blocks including this entry? */ if (!fatent->nr_bhs || bhs[0]->b_blocknr != blocknr) return 0; - if (sbi->fat_bits == 12) { + if (is_fat12(sbi)) { if ((offset + 1) < sb->s_blocksize) { /* This entry is on bhs[0]. */ if (fatent->nr_bhs == 2) { diff --git a/fs/fat/inode.c b/fs/fat/inode.c index c0b5b5c3373b..79bb0e73a65f 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -686,7 +686,7 @@ static void fat_set_state(struct super_block *sb, b = (struct fat_boot_sector *) bh->b_data; - if (sbi->fat_bits == 32) { + if (is_fat32(sbi)) { if (set) b->fat32.state |= FAT_STATE_DIRTY; else @@ -1396,7 +1396,7 @@ static int fat_read_root(struct inode *inode) inode->i_mode = fat_make_mode(sbi, ATTR_DIR, S_IRWXUGO); inode->i_op = sbi->dir_ops; inode->i_fop = &fat_dir_operations; - if (sbi->fat_bits == 32) { + if (is_fat32(sbi)) { MSDOS_I(inode)->i_start = sbi->root_cluster; error = fat_calc_dir_size(inode); if (error < 0) @@ -1423,7 +1423,7 @@ static unsigned long calc_fat_clusters(struct super_block *sb) struct msdos_sb_info *sbi = MSDOS_SB(sb); /* Divide first to avoid overflow */ - if (sbi->fat_bits != 12) { + if (!is_fat12(sbi)) { unsigned long ent_per_sec = sb->s_blocksize * 8 / sbi->fat_bits; return ent_per_sec * sbi->fat_length; } @@ -1743,7 +1743,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, } /* interpret volume ID as a little endian 32 bit integer */ - if (sbi->fat_bits == 32) + if (is_fat32(sbi)) sbi->vol_id = bpb.fat32_vol_id; else /* fat 16 or 12 */ sbi->vol_id = bpb.fat16_vol_id; @@ -1769,11 +1769,11 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, total_clusters = (total_sectors - sbi->data_start) / sbi->sec_per_clus; - if (sbi->fat_bits != 32) + if (!is_fat32(sbi)) sbi->fat_bits = (total_clusters > MAX_FAT12) ? 16 : 12; /* some OSes set FAT_STATE_DIRTY and clean it on unmount. */ - if (sbi->fat_bits == 32) + if (is_fat32(sbi)) sbi->dirty = bpb.fat32_state & FAT_STATE_DIRTY; else /* fat 16 or 12 */ sbi->dirty = bpb.fat16_state & FAT_STATE_DIRTY; @@ -1781,7 +1781,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, /* check that FAT table does not overflow */ fat_clusters = calc_fat_clusters(sb); total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); - if (total_clusters > MAX_FAT(sb)) { + if (total_clusters > max_fat(sb)) { if (!silent) fat_msg(sb, KERN_ERR, "count of clusters too big (%u)", total_clusters); @@ -1803,11 +1803,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_ent_access_init(sb); /* - * The low byte of FAT's first entry must have same value with - * media-field. But in real world, too many devices is - * writing wrong value. So, removed that validity check. + * The low byte of the first FAT entry must have the same value as + * the media field of the boot sector. But in real world, too many + * devices are writing wrong values. So, removed that validity check. * - * if (FAT_FIRST_ENT(sb, media) != first) + * The removed check compared the first FAT entry to a value dependent + * on the media field like this: + * == (0x0F00 | media), for FAT12 + * == (0XFF00 | media), for FAT16 + * == (0x0FFFFF | media), for FAT32 */ error = -EINVAL; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index fce0a76f3f1e..4fc950bb6433 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -64,7 +64,7 @@ int fat_clusters_flush(struct super_block *sb) struct buffer_head *bh; struct fat_boot_fsinfo *fsinfo; - if (sbi->fat_bits != 32) + if (!is_fat32(sbi)) return 0; bh = sb_bread(sb, sbi->fsinfo_sector); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index f37662675c3a..29a9dcfbe81f 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -565,6 +565,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { .symlink = hfsplus_symlink, .mknod = hfsplus_mknod, .rename = hfsplus_rename, + .getattr = hfsplus_getattr, .listxattr = hfsplus_listxattr, }; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index dd7ad9f13e3a..b8471bf05def 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -488,6 +488,8 @@ void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork); int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd); int hfsplus_cat_write_inode(struct inode *inode); +int hfsplus_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags); int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d7ab9d8c4b67..d131c8ea7eb6 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -270,6 +270,26 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +int hfsplus_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + + if (inode->i_flags & S_APPEND) + stat->attributes |= STATX_ATTR_APPEND; + if (inode->i_flags & S_IMMUTABLE) + stat->attributes |= STATX_ATTR_IMMUTABLE; + if (hip->userflags & HFSPLUS_FLG_NODUMP) + stat->attributes |= STATX_ATTR_NODUMP; + + stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE | + STATX_ATTR_NODUMP; + + generic_fillattr(inode, stat); + return 0; +} + int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) { @@ -329,6 +349,7 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end, static const struct inode_operations hfsplus_file_inode_operations = { .setattr = hfsplus_setattr, + .getattr = hfsplus_getattr, .listxattr = hfsplus_listxattr, }; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index eb1ce30412dc..832c1759a09a 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -30,6 +30,7 @@ #include <linux/quotaops.h> #include <linux/blkdev.h> #include <linux/uio.h> +#include <linux/mm.h> #include <cluster/masklog.h> @@ -397,7 +398,7 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping, * Check whether a remote node truncated this file - we just * drop out in that case as it's not worth handling here. */ - last = list_entry(pages->prev, struct page, lru); + last = lru_to_page(pages); start = (loff_t)last->index << PAGE_SHIFT; if (start >= i_size_read(inode)) goto out_unlock; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index fe53381b26b1..f038235c64bd 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -77,7 +77,7 @@ static int orangefs_readpages(struct file *file, for (page_idx = 0; page_idx < nr_pages; page_idx++) { struct page *page; - page = list_entry(pages->prev, struct page, lru); + page = lru_to_page(pages); list_del(&page->lru); if (!add_to_page_cache(page, mapping, diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c index c4e98c9c1621..443bcd8c3c19 100644 --- a/fs/orangefs/orangefs-bufmap.c +++ b/fs/orangefs/orangefs-bufmap.c @@ -105,7 +105,7 @@ static int wait_for_free(struct slot_map *m) left = t; else left = t + (left - n); - if (unlikely(signal_pending(current))) + if (signal_pending(current)) left = -EINTR; } while (left > 0); diff --git a/fs/proc/base.c b/fs/proc/base.c index d7fd1ca807d2..633a63462573 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -581,8 +581,10 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, /* * print the file header */ - seq_printf(m, "%-25s %-20s %-20s %-10s\n", - "Limit", "Soft Limit", "Hard Limit", "Units"); + seq_puts(m, "Limit " + "Soft Limit " + "Hard Limit " + "Units \n"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) @@ -2356,10 +2358,13 @@ static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, return -ESRCH; if (p != current) { - if (!capable(CAP_SYS_NICE)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); count = -EPERM; goto out; } + rcu_read_unlock(); err = security_task_setscheduler(p); if (err) { @@ -2392,11 +2397,14 @@ static int timerslack_ns_show(struct seq_file *m, void *v) return -ESRCH; if (p != current) { - - if (!capable(CAP_SYS_NICE)) { + rcu_read_lock(); + if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); + err = security_task_getscheduler(p); if (err) goto out; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 5792f9e39466..da649ccd6804 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -59,7 +59,6 @@ static struct kmem_cache *pde_opener_cache __ro_after_init; static struct inode *proc_alloc_inode(struct super_block *sb) { struct proc_inode *ei; - struct inode *inode; ei = kmem_cache_alloc(proc_inode_cachep, GFP_KERNEL); if (!ei) @@ -71,8 +70,7 @@ static struct inode *proc_alloc_inode(struct super_block *sb) ei->sysctl = NULL; ei->sysctl_entry = NULL; ei->ns_ops = NULL; - inode = &ei->vfs_inode; - return inode; + return &ei->vfs_inode; } static void proc_i_callback(struct rcu_head *head) diff --git a/fs/proc/util.c b/fs/proc/util.c index b161cfa0f9fa..98f8adc17345 100644 --- a/fs/proc/util.c +++ b/fs/proc/util.c @@ -1,4 +1,5 @@ #include <linux/dcache.h> +#include "internal.h" unsigned name_to_int(const struct qstr *qstr) { |