summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2014-01-10 15:24:39 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2014-01-10 15:24:39 +1100
commit83548563c157403ba219b5bd9c59141f11377182 (patch)
tree778e58f318fb178bfd8d5580f5437368cd7b6f7d /fs
parent7ddcdb2ccdcae0838a39b1bf7b0773c5540da847 (diff)
parent7679372dd5f9a7176914398576d802379bb3c634 (diff)
Merge branch 'akpm-current/current'
Conflicts: drivers/misc/mei/hbm.c
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/proc.c4
-rw-r--r--fs/autofs4/autofs_i.h4
-rw-r--r--fs/autofs4/dev-ioctl.c16
-rw-r--r--fs/autofs4/expire.c14
-rw-r--r--fs/autofs4/inode.c49
-rw-r--r--fs/autofs4/root.c6
-rw-r--r--fs/autofs4/symlink.c4
-rw-r--r--fs/autofs4/waitq.c16
-rw-r--r--fs/binfmt_elf.c24
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/coredump.c1
-rw-r--r--fs/coredump.h6
-rw-r--r--fs/exec.c115
-rw-r--r--fs/ext3/dir.c44
-rw-r--r--fs/ext4/block_validity.c33
-rw-r--r--fs/ext4/dir.c35
-rw-r--r--fs/fat/cache.c19
-rw-r--r--fs/fat/fat.h6
-rw-r--r--fs/fat/file.c78
-rw-r--r--fs/fat/inode.c57
-rw-r--r--fs/hfsplus/inode.c59
-rw-r--r--fs/jffs2/fs.c9
-rw-r--r--fs/jffs2/nodelist.c28
-rw-r--r--fs/jffs2/readinode.c26
-rw-r--r--fs/logfs/segment.c3
-rw-r--r--fs/nilfs2/ioctl.c371
-rw-r--r--fs/nilfs2/segment.c10
-rw-r--r--fs/notify/dnotify/dnotify.c34
-rw-r--r--fs/notify/fanotify/fanotify.c219
-rw-r--r--fs/notify/fanotify/fanotify.h23
-rw-r--r--fs/notify/fanotify/fanotify_user.c41
-rw-r--r--fs/notify/fsnotify.c42
-rw-r--r--fs/notify/group.c1
-rw-r--r--fs/notify/inotify/inotify.h21
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c149
-rw-r--r--fs/notify/inotify/inotify_user.c119
-rw-r--r--fs/notify/notification.c334
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c50
-rw-r--r--fs/ocfs2/cluster/Makefile2
-rw-r--r--fs/ocfs2/cluster/nodemanager.c4
-rw-r--r--fs/ocfs2/cluster/tcp.c35
-rw-r--r--fs/ocfs2/cluster/ver.c42
-rw-r--r--fs/ocfs2/cluster/ver.h31
-rw-r--r--fs/ocfs2/dlm/Makefile2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c5
-rw-r--r--fs/ocfs2/dlm/dlmver.c42
-rw-r--r--fs/ocfs2/dlm/dlmver.h31
-rw-r--r--fs/ocfs2/dlmfs/Makefile2
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/dlmfs/dlmfsver.c42
-rw-r--r--fs/ocfs2/dlmfs/dlmfsver.h31
-rw-r--r--fs/ocfs2/dlmglue.c4
-rw-r--r--fs/ocfs2/file.c66
-rw-r--r--fs/ocfs2/ioctl.c7
-rw-r--r--fs/ocfs2/localalloc.c40
-rw-r--r--fs/ocfs2/localalloc.h6
-rw-r--r--fs/ocfs2/move_extents.c77
-rw-r--r--fs/ocfs2/namei.c122
-rw-r--r--fs/ocfs2/ocfs2.h1
-rw-r--r--fs/ocfs2/stack_o2cb.c3
-rw-r--r--fs/ocfs2/stack_user.c308
-rw-r--r--fs/ocfs2/stackglue.c16
-rw-r--r--fs/ocfs2/stackglue.h15
-rw-r--r--fs/ocfs2/suballoc.c12
-rw-r--r--fs/ocfs2/suballoc.h12
-rw-r--r--fs/ocfs2/super.c20
-rw-r--r--fs/ocfs2/ver.c43
-rw-r--r--fs/ocfs2/ver.h31
-rw-r--r--fs/pipe.c3
-rw-r--r--fs/posix_acl.c94
-rw-r--r--fs/proc/array.c18
-rw-r--r--fs/proc/base.c69
-rw-r--r--fs/proc/generic.c3
-rw-r--r--fs/proc/meminfo.c37
-rw-r--r--fs/proc/page.c8
-rw-r--r--fs/proc/proc_devtree.c5
-rw-r--r--fs/ramfs/file-mmu.c7
-rw-r--r--fs/ramfs/file-nommu.c17
-rw-r--r--fs/ramfs/inode.c7
-rw-r--r--fs/ramfs/internal.h1
-rw-r--r--fs/read_write.c4
-rw-r--r--fs/reiserfs/reiserfs.h2
-rw-r--r--fs/romfs/super.c6
-rw-r--r--fs/super.c3
-rw-r--r--fs/ubifs/debug.c22
-rw-r--r--fs/ubifs/log.c21
-rw-r--r--fs/ubifs/orphan.c21
-rw-r--r--fs/ubifs/recovery.c21
-rw-r--r--fs/ubifs/super.c24
-rw-r--r--fs/ubifs/tnc.c22
-rw-r--r--fs/ufs/balloc.c30
-rw-r--r--fs/ufs/ialloc.c17
-rw-r--r--fs/ufs/super.c13
-rw-r--r--fs/ufs/ufs.h1
95 files changed, 1840 insertions, 1766 deletions
diff --git a/fs/afs/proc.c b/fs/afs/proc.c
index 526e4bbbde59..276cb6ed0b93 100644
--- a/fs/afs/proc.c
+++ b/fs/afs/proc.c
@@ -147,11 +147,11 @@ int afs_proc_init(void)
if (!proc_afs)
goto error_dir;
- p = proc_create("cells", 0, proc_afs, &afs_proc_cells_fops);
+ p = proc_create("cells", S_IFREG | S_IRUGO | S_IWUSR, proc_afs, &afs_proc_cells_fops);
if (!p)
goto error_cells;
- p = proc_create("rootcell", 0, proc_afs, &afs_proc_rootcell_fops);
+ p = proc_create("rootcell", S_IFREG | S_IRUGO | S_IWUSR, proc_afs, &afs_proc_rootcell_fops);
if (!p)
goto error_rootcell;
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 4218e26df916..acf32054edd8 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -104,7 +104,7 @@ struct autofs_sb_info {
u32 magic;
int pipefd;
struct file *pipe;
- pid_t oz_pgrp;
+ struct pid *oz_pgrp;
int catatonic;
int version;
int sub_version;
@@ -140,7 +140,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
filesystem without "magic".) */
static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
- return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp;
+ return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
}
/* Does a dentry have some pending activity? */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 1818ce7f5a06..3182c0e68b42 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -346,6 +346,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
{
int pipefd;
int err = 0;
+ struct pid *new_pid = NULL;
if (param->setpipefd.pipefd == -1)
return -EINVAL;
@@ -357,7 +358,17 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
mutex_unlock(&sbi->wq_mutex);
return -EBUSY;
} else {
- struct file *pipe = fget(pipefd);
+ struct file *pipe;
+
+ new_pid = get_task_pid(current, PIDTYPE_PGID);
+
+ if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) {
+ AUTOFS_WARN("Not allowed to change PID namespace");
+ err = -EINVAL;
+ goto out;
+ }
+
+ pipe = fget(pipefd);
if (!pipe) {
err = -EBADF;
goto out;
@@ -367,12 +378,13 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
fput(pipe);
goto out;
}
- sbi->oz_pgrp = task_pgrp_nr(current);
+ swap(sbi->oz_pgrp, new_pid);
sbi->pipefd = pipefd;
sbi->pipe = pipe;
sbi->catatonic = 0;
}
out:
+ put_pid(new_pid);
mutex_unlock(&sbi->wq_mutex);
return err;
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 3d9d3f5d5dda..394e90b02c5e 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -402,6 +402,20 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
goto next;
}
+ if (dentry->d_inode && S_ISLNK(dentry->d_inode->i_mode)) {
+ DPRINTK("checking symlink %p %.*s",
+ dentry, (int)dentry->d_name.len, dentry->d_name.name);
+ /*
+ * A symlink can't be "busy" in the usual sense so
+ * just check last used for expire timeout.
+ */
+ if (autofs4_can_expire(dentry, timeout, do_now)) {
+ expired = dentry;
+ goto found;
+ }
+ goto next;
+ }
+
if (simple_empty(dentry))
goto next;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 3b9cc9b973c2..d7bd395ab586 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -56,8 +56,11 @@ void autofs4_kill_sb(struct super_block *sb)
* just call kill_anon_super when we are called from
* deactivate_super.
*/
- if (sbi) /* Free wait queues, close pipe */
+ if (sbi) {
+ /* Free wait queues, close pipe */
autofs4_catatonic_mode(sbi);
+ put_pid(sbi->oz_pgrp);
+ }
DPRINTK("shutting down");
kill_litter_super(sb);
@@ -80,7 +83,7 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID))
seq_printf(m, ",gid=%u",
from_kgid_munged(&init_user_ns, root_inode->i_gid));
- seq_printf(m, ",pgrp=%d", sbi->oz_pgrp);
+ seq_printf(m, ",pgrp=%d", pid_vnr(sbi->oz_pgrp));
seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ);
seq_printf(m, ",minproto=%d", sbi->min_proto);
seq_printf(m, ",maxproto=%d", sbi->max_proto);
@@ -124,7 +127,8 @@ static const match_table_t tokens = {
};
static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
- pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto)
+ int *pgrp, bool *pgrp_set, unsigned int *type,
+ int *minproto, int *maxproto)
{
char *p;
substring_t args[MAX_OPT_ARGS];
@@ -132,7 +136,6 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
*uid = current_uid();
*gid = current_gid();
- *pgrp = task_pgrp_nr(current);
*minproto = AUTOFS_MIN_PROTO_VERSION;
*maxproto = AUTOFS_MAX_PROTO_VERSION;
@@ -171,6 +174,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
if (match_int(args, &option))
return 1;
*pgrp = option;
+ *pgrp_set = true;
break;
case Opt_minproto:
if (match_int(args, &option))
@@ -206,10 +210,13 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
+ int pgrp;
+ bool pgrp_set = false;
+ int ret = -EINVAL;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
- goto fail_unlock;
+ return -ENOMEM;
DPRINTK("starting up, sbi = %p",sbi);
s->s_fs_info = sbi;
@@ -218,7 +225,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi->pipe = NULL;
sbi->catatonic = 1;
sbi->exp_timeout = 0;
- sbi->oz_pgrp = task_pgrp_nr(current);
+ sbi->oz_pgrp = NULL;
sbi->sb = s;
sbi->version = 0;
sbi->sub_version = 0;
@@ -243,8 +250,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
* Get the root inode and dentry, but defer checking for errors.
*/
ino = autofs4_new_ino(sbi);
- if (!ino)
+ if (!ino) {
+ ret = -ENOMEM;
goto fail_free;
+ }
root_inode = autofs4_get_inode(s, S_IFDIR | 0755);
root = d_make_root(root_inode);
if (!root)
@@ -255,12 +264,23 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
/* Can this call block? */
if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
- &sbi->oz_pgrp, &sbi->type, &sbi->min_proto,
- &sbi->max_proto)) {
+ &pgrp, &pgrp_set, &sbi->type, &sbi->min_proto,
+ &sbi->max_proto)) {
printk("autofs: called with bogus options\n");
goto fail_dput;
}
+ if (pgrp_set) {
+ sbi->oz_pgrp = find_get_pid(pgrp);
+ if (!sbi->oz_pgrp) {
+ pr_warn("autofs: could not find process group %d\n",
+ pgrp);
+ goto fail_dput;
+ }
+ } else {
+ sbi->oz_pgrp = get_task_pid(current, PIDTYPE_PGID);
+ }
+
if (autofs_type_trigger(sbi->type))
__managed_dentry_set_managed(root);
@@ -284,14 +304,15 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi->version = sbi->max_proto;
sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
- DPRINTK("pipe fd = %d, pgrp = %u", pipefd, sbi->oz_pgrp);
+ DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp));
pipe = fget(pipefd);
-
+
if (!pipe) {
printk("autofs: could not open pipe file descriptor\n");
goto fail_dput;
}
- if (autofs_prepare_pipe(pipe) < 0)
+ ret = autofs_prepare_pipe(pipe);
+ if (ret < 0)
goto fail_fput;
sbi->pipe = pipe;
sbi->pipefd = pipefd;
@@ -316,10 +337,10 @@ fail_dput:
fail_ino:
kfree(ino);
fail_free:
+ put_pid(sbi->oz_pgrp);
kfree(sbi);
s->s_fs_info = NULL;
-fail_unlock:
- return -EINVAL;
+ return ret;
}
struct inode *autofs4_get_inode(struct super_block *sb, umode_t mode)
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 92ef341ba0cf..2caf36ac3e93 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -558,7 +558,7 @@ static int autofs4_dir_symlink(struct inode *dir,
dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
- if (p_ino && dentry->d_parent != dentry)
+ if (p_ino && !IS_ROOT(dentry))
atomic_inc(&p_ino->count);
dir->i_mtime = CURRENT_TIME;
@@ -593,7 +593,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
if (atomic_dec_and_test(&ino->count)) {
p_ino = autofs4_dentry_ino(dentry->d_parent);
- if (p_ino && dentry->d_parent != dentry)
+ if (p_ino && !IS_ROOT(dentry))
atomic_dec(&p_ino->count);
}
dput(ino->dentry);
@@ -732,7 +732,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
dget(dentry);
atomic_inc(&ino->count);
p_ino = autofs4_dentry_ino(dentry->d_parent);
- if (p_ino && dentry->d_parent != dentry)
+ if (p_ino && !IS_ROOT(dentry))
atomic_inc(&p_ino->count);
inc_nlink(dir);
dir->i_mtime = CURRENT_TIME;
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index f27c094a1919..1e8ea192be2b 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -14,6 +14,10 @@
static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd)
{
+ struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
+ struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ if (ino && !autofs4_oz_mode(sbi))
+ ino->last_used = jiffies;
nd_set_link(nd, dentry->d_inode->i_private);
return NULL;
}
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 689e40d983ad..116fd38ee472 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -347,11 +347,23 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
struct qstr qstr;
char *name;
int status, ret, type;
+ pid_t pid;
+ pid_t tgid;
/* In catatonic mode, we don't wait for nobody */
if (sbi->catatonic)
return -ENOENT;
+ /*
+ * Try translating pids to the namespace of the daemon.
+ *
+ * Zero means failure: we are in an unrelated pid namespace.
+ */
+ pid = task_pid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+ tgid = task_tgid_nr_ns(current, ns_of_pid(sbi->oz_pgrp));
+ if (pid == 0 || tgid == 0)
+ return -ENOENT;
+
if (!dentry->d_inode) {
/*
* A wait for a negative dentry is invalid for certain
@@ -417,8 +429,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
wq->ino = autofs4_get_ino(sbi);
wq->uid = current_uid();
wq->gid = current_gid();
- wq->pid = current->pid;
- wq->tgid = current->tgid;
+ wq->pid = pid;
+ wq->tgid = tgid;
wq->status = -EINTR; /* Status return if interrupted */
wq->wait_ctr = 2;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 571a42326908..1a965e654f2e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -140,6 +140,25 @@ static int padzero(unsigned long elf_bss)
#define ELF_BASE_PLATFORM NULL
#endif
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+ unsigned char *p = buf;
+
+ while (nbytes) {
+ unsigned int random_variable;
+ size_t chunk = min(nbytes, sizeof(random_variable));
+
+ random_variable = get_random_int();
+ memcpy(p, &random_variable, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+}
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -201,7 +220,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
/*
* Generate 16 random bytes for userspace PRNG seeding.
*/
- get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+ get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
@@ -543,9 +562,6 @@ out:
* libraries. There is no binary dependent code anywhere else.
*/
-#define INTERPRETER_NONE 0
-#define INTERPRETER_ELF 2
-
#ifndef STACK_RND_MASK
#define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12)) /* 8MB of VA */
#endif
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index dc52e13d58e0..3881610b6438 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -680,7 +680,8 @@ static int do_i2c_rdwr_ioctl(unsigned int fd, unsigned int cmd,
struct i2c_msg __user *tmsgs;
struct i2c_msg32 __user *umsgs;
compat_caddr_t datap;
- int nmsgs, i;
+ u32 nmsgs;
+ int i;
if (get_user(nmsgs, &udata->nmsgs))
return -EFAULT;
diff --git a/fs/coredump.c b/fs/coredump.c
index bc3fbcd32558..e3ad709a4232 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -40,7 +40,6 @@
#include <trace/events/task.h>
#include "internal.h"
-#include "coredump.h"
#include <trace/events/sched.h>
diff --git a/fs/coredump.h b/fs/coredump.h
deleted file mode 100644
index e39ff072110d..000000000000
--- a/fs/coredump.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _FS_COREDUMP_H
-#define _FS_COREDUMP_H
-
-extern int __get_dumpable(unsigned long mm_flags);
-
-#endif
diff --git a/fs/exec.c b/fs/exec.c
index 7ea097f6b341..493b102a27c1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -62,7 +62,6 @@
#include <trace/events/task.h>
#include "internal.h"
-#include "coredump.h"
#include <trace/events/sched.h>
@@ -1139,9 +1138,7 @@ void setup_new_exec(struct linux_binprm * bprm)
/* An exec changes our domain. We are no longer part of the thread
group */
-
current->self_exec_id++;
-
flush_signal_handlers(current, 0);
do_close_on_exec(current->files);
}
@@ -1173,6 +1170,10 @@ void free_bprm(struct linux_binprm *bprm)
mutex_unlock(&current->signal->cred_guard_mutex);
abort_creds(bprm->cred);
}
+ if (bprm->file) {
+ allow_write_access(bprm->file);
+ fput(bprm->file);
+ }
/* If a binfmt changed the interp, free it. */
if (bprm->interp != bprm->filename)
kfree(bprm->interp);
@@ -1224,11 +1225,10 @@ EXPORT_SYMBOL(install_exec_creds);
* - the caller must hold ->cred_guard_mutex to protect against
* PTRACE_ATTACH
*/
-static int check_unsafe_exec(struct linux_binprm *bprm)
+static void check_unsafe_exec(struct linux_binprm *bprm)
{
struct task_struct *p = current, *t;
unsigned n_fs;
- int res = 0;
if (p->ptrace) {
if (p->ptrace & PT_PTRACE_CAP)
@@ -1244,31 +1244,25 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
if (current->no_new_privs)
bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS;
+ t = p;
n_fs = 1;
spin_lock(&p->fs->lock);
rcu_read_lock();
- for (t = next_thread(p); t != p; t = next_thread(t)) {
+ while_each_thread(p, t) {
if (t->fs == p->fs)
n_fs++;
}
rcu_read_unlock();
- if (p->fs->users > n_fs) {
+ if (p->fs->users > n_fs)
bprm->unsafe |= LSM_UNSAFE_SHARE;
- } else {
- res = -EAGAIN;
- if (!p->fs->in_exec) {
- p->fs->in_exec = 1;
- res = 1;
- }
- }
+ else
+ p->fs->in_exec = 1;
spin_unlock(&p->fs->lock);
-
- return res;
}
-/*
- * Fill the binprm structure from the inode.
+/*
+ * Fill the binprm structure from the inode.
* Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
*
* This may be called multiple times for binary chains (scripts for example).
@@ -1430,14 +1424,7 @@ static int exec_binprm(struct linux_binprm *bprm)
audit_bprm(bprm);
trace_sched_process_exec(current, old_pid, bprm);
ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
- current->did_exec = 1;
proc_exec_connector(current);
-
- if (bprm->file) {
- allow_write_access(bprm->file);
- fput(bprm->file);
- bprm->file = NULL; /* to catch use-after-free */
- }
}
return ret;
@@ -1453,7 +1440,6 @@ static int do_execve_common(const char *filename,
struct linux_binprm *bprm;
struct file *file;
struct files_struct *displaced;
- bool clear_in_exec;
int retval;
/*
@@ -1485,10 +1471,7 @@ static int do_execve_common(const char *filename,
if (retval)
goto out_free;
- retval = check_unsafe_exec(bprm);
- if (retval < 0)
- goto out_free;
- clear_in_exec = retval;
+ check_unsafe_exec(bprm);
current->in_execve = 1;
file = open_exec(filename);
@@ -1504,7 +1487,7 @@ static int do_execve_common(const char *filename,
retval = bprm_mm_init(bprm);
if (retval)
- goto out_file;
+ goto out_unmark;
bprm->argc = count(argv, MAX_ARG_STRINGS);
if ((retval = bprm->argc) < 0)
@@ -1551,15 +1534,8 @@ out:
mmput(bprm->mm);
}
-out_file:
- if (bprm->file) {
- allow_write_access(bprm->file);
- fput(bprm->file);
- }
-
out_unmark:
- if (clear_in_exec)
- current->fs->in_exec = 0;
+ current->fs->in_exec = 0;
current->in_execve = 0;
out_free:
@@ -1609,67 +1585,22 @@ void set_binfmt(struct linux_binfmt *new)
if (new)
__module_get(new->module);
}
-
EXPORT_SYMBOL(set_binfmt);
/*
- * set_dumpable converts traditional three-value dumpable to two flags and
- * stores them into mm->flags. It modifies lower two bits of mm->flags, but
- * these bits are not changed atomically. So get_dumpable can observe the
- * intermediate state. To avoid doing unexpected behavior, get get_dumpable
- * return either old dumpable or new one by paying attention to the order of
- * modifying the bits.
- *
- * dumpable | mm->flags (binary)
- * old new | initial interim final
- * ---------+-----------------------
- * 0 1 | 00 01 01
- * 0 2 | 00 10(*) 11
- * 1 0 | 01 00 00
- * 1 2 | 01 11 11
- * 2 0 | 11 10(*) 00
- * 2 1 | 11 11 01
- *
- * (*) get_dumpable regards interim value of 10 as 11.
+ * set_dumpable stores three-value SUID_DUMP_* into mm->flags.
*/
void set_dumpable(struct mm_struct *mm, int value)
{
- switch (value) {
- case SUID_DUMP_DISABLE:
- clear_bit(MMF_DUMPABLE, &mm->flags);
- smp_wmb();
- clear_bit(MMF_DUMP_SECURELY, &mm->flags);
- break;
- case SUID_DUMP_USER:
- set_bit(MMF_DUMPABLE, &mm->flags);
- smp_wmb();
- clear_bit(MMF_DUMP_SECURELY, &mm->flags);
- break;
- case SUID_DUMP_ROOT:
- set_bit(MMF_DUMP_SECURELY, &mm->flags);
- smp_wmb();
- set_bit(MMF_DUMPABLE, &mm->flags);
- break;
- }
-}
-
-int __get_dumpable(unsigned long mm_flags)
-{
- int ret;
+ unsigned long old, new;
- ret = mm_flags & MMF_DUMPABLE_MASK;
- return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
-}
+ if (WARN_ON((unsigned)value > SUID_DUMP_ROOT))
+ return;
-/*
- * This returns the actual value of the suid_dumpable flag. For things
- * that are using this for checking for privilege transitions, it must
- * test against SUID_DUMP_USER rather than treating it as a boolean
- * value.
- */
-int get_dumpable(struct mm_struct *mm)
-{
- return __get_dumpable(mm->flags);
+ do {
+ old = ACCESS_ONCE(mm->flags);
+ new = (old & ~MMF_DUMPABLE_MASK) | value;
+ } while (cmpxchg(&mm->flags, old, new) != old);
}
SYSCALL_DEFINE3(execve,
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index bafdd48eefde..e66e4808719f 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -309,43 +309,17 @@ struct fname {
*/
static void free_rb_tree_fname(struct rb_root *root)
{
- struct rb_node *n = root->rb_node;
- struct rb_node *parent;
- struct fname *fname;
-
- while (n) {
- /* Do the node's children first */
- if (n->rb_left) {
- n = n->rb_left;
- continue;
- }
- if (n->rb_right) {
- n = n->rb_right;
- continue;
- }
- /*
- * The node has no children; free it, and then zero
- * out parent's link to it. Finally go to the
- * beginning of the loop and try to free the parent
- * node.
- */
- parent = rb_parent(n);
- fname = rb_entry(n, struct fname, rb_hash);
- while (fname) {
- struct fname * old = fname;
+ struct fname *fname, *next;
+
+ rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
+ do {
+ struct fname *old = fname;
fname = fname->next;
- kfree (old);
- }
- if (!parent)
- *root = RB_ROOT;
- else if (parent->rb_left == n)
- parent->rb_left = NULL;
- else if (parent->rb_right == n)
- parent->rb_right = NULL;
- n = parent;
- }
-}
+ kfree(old);
+ } while (fname);
+ *root = RB_ROOT;
+}
static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
loff_t pos)
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 3f11656bd72e..41eb9dcfac7e 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -180,37 +180,12 @@ int ext4_setup_system_zone(struct super_block *sb)
/* Called when the filesystem is unmounted */
void ext4_release_system_zone(struct super_block *sb)
{
- struct rb_node *n = EXT4_SB(sb)->system_blks.rb_node;
- struct rb_node *parent;
- struct ext4_system_zone *entry;
+ struct ext4_system_zone *entry, *n;
- while (n) {
- /* Do the node's children first */
- if (n->rb_left) {
- n = n->rb_left;
- continue;
- }
- if (n->rb_right) {
- n = n->rb_right;
- continue;
- }
- /*
- * The node has no children; free it, and then zero
- * out parent's link to it. Finally go to the
- * beginning of the loop and try to free the parent
- * node.
- */
- parent = rb_parent(n);
- entry = rb_entry(n, struct ext4_system_zone, node);
+ rbtree_postorder_for_each_entry_safe(entry, n,
+ &EXT4_SB(sb)->system_blks, node)
kmem_cache_free(ext4_system_zone_cachep, entry);
- if (!parent)
- EXT4_SB(sb)->system_blks = RB_ROOT;
- else if (parent->rb_left == n)
- parent->rb_left = NULL;
- else if (parent->rb_right == n)
- parent->rb_right = NULL;
- n = parent;
- }
+
EXT4_SB(sb)->system_blks = RB_ROOT;
}
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 680bb3388919..d638c57e996e 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -353,41 +353,16 @@ struct fname {
*/
static void free_rb_tree_fname(struct rb_root *root)
{
- struct rb_node *n = root->rb_node;
- struct rb_node *parent;
- struct fname *fname;
-
- while (n) {
- /* Do the node's children first */
- if (n->rb_left) {
- n = n->rb_left;
- continue;
- }
- if (n->rb_right) {
- n = n->rb_right;
- continue;
- }
- /*
- * The node has no children; free it, and then zero
- * out parent's link to it. Finally go to the
- * beginning of the loop and try to free the parent
- * node.
- */
- parent = rb_parent(n);
- fname = rb_entry(n, struct fname, rb_hash);
+ struct fname *fname, *next;
+
+ rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
while (fname) {
struct fname *old = fname;
fname = fname->next;
kfree(old);
}
- if (!parent)
- *root = RB_ROOT;
- else if (parent->rb_left == n)
- parent->rb_left = NULL;
- else if (parent->rb_right == n)
- parent->rb_right = NULL;
- n = parent;
- }
+
+ *root = RB_ROOT;
}
diff --git a/fs/fat/cache.c b/fs/fat/cache.c
index 91ad9e1c9441..d22c1a209808 100644
--- a/fs/fat/cache.c
+++ b/fs/fat/cache.c
@@ -325,19 +325,26 @@ int fat_bmap(struct inode *inode, sector_t sector, sector_t *phys,
last_block = (i_size_read(inode) + (blocksize - 1)) >> blocksize_bits;
if (sector >= last_block) {
- if (!create)
- return 0;
-
/*
- * ->mmu_private can access on only allocation path.
- * (caller must hold ->i_mutex)
+ * Both ->mmu_private and ->i_disksize can access
+ * on only allocation path. (caller must hold ->i_mutex)
*/
- last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ last_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
>> blocksize_bits;
+ if (!create) {
+ /* Map a block in fallocated region */
+ if (atomic_read(&MSDOS_I(inode)->beyond_isize))
+ if (sector < last_block)
+ goto out_map_cluster;
+
+ return 0;
+ }
+
if (sector >= last_block)
return 0;
}
+out_map_cluster:
cluster = sector >> (sbi->cluster_bits - sb->s_blocksize_bits);
offset = sector & (sbi->sec_per_clus - 1);
cluster = fat_bmap_cluster(inode, cluster);
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7c31f4bc74a9..b8842769b0c5 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -118,7 +118,8 @@ struct msdos_inode_info {
unsigned int cache_valid_id;
/* NOTE: mmu_private is 64bits, so must hold ->i_mutex to access */
- loff_t mmu_private; /* physically allocated size */
+ loff_t mmu_private; /* physically allocated size (initialized) */
+ loff_t i_disksize; /* physically allocated size (uninitialized) */
int i_start; /* first cluster or 0 */
int i_logstart; /* logical first cluster */
@@ -128,6 +129,9 @@ struct msdos_inode_info {
struct hlist_node i_dir_hash; /* hash by i_logstart */
struct rw_semaphore truncate_lock; /* protect bmap against truncate */
struct inode vfs_inode;
+
+ /* for getting block number beyond file size in case of fallocate */
+ atomic_t beyond_isize;
};
struct fat_slot_info {
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 9b104f543056..79db8b6ab347 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -17,8 +17,12 @@
#include <linux/blkdev.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
+#include <linux/falloc.h>
#include "fat.h"
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len);
+
static int fat_ioctl_get_attributes(struct inode *inode, u32 __user *user_attr)
{
u32 attr;
@@ -182,6 +186,7 @@ const struct file_operations fat_file_operations = {
#endif
.fsync = fat_file_fsync,
.splice_read = generic_file_splice_read,
+ .fallocate = fat_fallocate,
};
static int fat_cont_expand(struct inode *inode, loff_t size)
@@ -220,6 +225,75 @@ out:
return err;
}
+/*
+ * Preallocate space for a file. This implements fat's fallocate file
+ * operation, which gets called from sys_fallocate system call. User
+ * space requests len bytes at offset. If FALLOC_FL_KEEP_SIZE is set
+ * we just allocate clusters without zeroing them out. Otherwise we
+ * allocate and zero out clusters via an expanding truncate.
+ */
+static long fat_fallocate(struct file *file, int mode,
+ loff_t offset, loff_t len)
+{
+ int cluster;
+ int nr_cluster; /* Number of clusters to be allocated */
+ loff_t mm_bytes; /* Number of bytes to be allocated for file */
+ struct inode *inode = file->f_mapping->host;
+ struct super_block *sb = inode->i_sb;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int err = 0;
+
+ /* No support for hole punch or other fallocate flags. */
+ if (mode & ~FALLOC_FL_KEEP_SIZE)
+ return -EOPNOTSUPP;
+
+ /* No support for dir */
+ if (!S_ISREG(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ mutex_lock(&inode->i_mutex);
+ if ((offset + len) <= MSDOS_I(inode)->i_disksize)
+ goto error;
+
+ err = inode_newsize_ok(inode, (len + offset));
+ if (err)
+ goto error;
+
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ /* First compute the number of clusters to be allocated */
+ mm_bytes = offset + len - round_up(MSDOS_I(inode)->mmu_private,
+ sbi->cluster_size);
+ nr_cluster = (mm_bytes + (sbi->cluster_size - 1)) >>
+ sbi->cluster_bits;
+
+ /* Start the allocation.We are not zeroing out the clusters */
+ while (nr_cluster-- > 0) {
+ err = fat_alloc_clusters(inode, &cluster, 1);
+ if (err) {
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_alloc_clusters() error");
+ goto error;
+ }
+ err = fat_chain_add(inode, cluster, 1);
+ if (err) {
+ fat_free_clusters(inode, cluster);
+ goto error;
+ }
+ MSDOS_I(inode)->i_disksize += sbi->cluster_size;
+ }
+ } else {
+ /* This is just an expanding truncate */
+ err = fat_cont_expand(inode, (offset + len));
+ if (err)
+ fat_msg(sb, KERN_ERR,
+ "fat_fallocate(): fat_cont_expand() error");
+ }
+
+error:
+ mutex_unlock(&inode->i_mutex);
+ return err;
+}
+
/* Free all clusters after the skip'th cluster. */
static int fat_free(struct inode *inode, int skip)
{
@@ -300,8 +374,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
* This protects against truncating a file bigger than it was then
* trying to write into the hole.
*/
- if (MSDOS_I(inode)->mmu_private > offset)
+ if (MSDOS_I(inode)->i_disksize > offset) {
MSDOS_I(inode)->mmu_private = offset;
+ MSDOS_I(inode)->i_disksize = offset;
+ }
nr_clusters = (offset + (cluster_size - 1)) >> sbi->cluster_bits;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 854b578f6695..ba9831d9f648 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -54,6 +54,25 @@ static int fat_add_cluster(struct inode *inode)
return err;
}
+static void check_fallocated_region(struct inode *inode, sector_t iblock,
+ unsigned long *max_blocks, struct buffer_head *bh_result)
+{
+ struct super_block *sb = inode->i_sb;
+ sector_t last_block, disk_block;
+ const unsigned long blocksize = sb->s_blocksize;
+ const unsigned char blocksize_bits = sb->s_blocksize_bits;
+
+ last_block = (MSDOS_I(inode)->mmu_private + (blocksize - 1))
+ >> blocksize_bits;
+ disk_block = (MSDOS_I(inode)->i_disksize + (blocksize - 1))
+ >> blocksize_bits;
+ if (iblock >= last_block && iblock <= disk_block) {
+ MSDOS_I(inode)->mmu_private += *max_blocks << blocksize_bits;
+ set_buffer_new(bh_result);
+ }
+
+}
+
static inline int __fat_get_block(struct inode *inode, sector_t iblock,
unsigned long *max_blocks,
struct buffer_head *bh_result, int create)
@@ -68,8 +87,11 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
if (err)
return err;
if (phys) {
- map_bh(bh_result, sb, phys);
*max_blocks = min(mapped_blocks, *max_blocks);
+ if (create)
+ check_fallocated_region(inode, iblock, max_blocks,
+ bh_result);
+ map_bh(bh_result, sb, phys);
return 0;
}
if (!create)
@@ -93,6 +115,7 @@ static inline int __fat_get_block(struct inode *inode, sector_t iblock,
*max_blocks = min(mapped_blocks, *max_blocks);
MSDOS_I(inode)->mmu_private += *max_blocks << sb->s_blocksize_bits;
+ MSDOS_I(inode)->i_disksize = MSDOS_I(inode)->mmu_private;
err = fat_bmap(inode, iblock, &phys, &mapped_blocks, create);
if (err)
@@ -206,6 +229,13 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb,
loff_t size = offset + iov_length(iov, nr_segs);
if (MSDOS_I(inode)->mmu_private < size)
return 0;
+
+ /*
+ * In case of writing in fallocated region, return 0 and
+ * fallback to buffered write.
+ */
+ if (MSDOS_I(inode)->i_disksize > MSDOS_I(inode)->mmu_private)
+ return 0;
}
/*
@@ -226,7 +256,10 @@ static sector_t _fat_bmap(struct address_space *mapping, sector_t block)
/* fat_get_cluster() assumes the requested blocknr isn't truncated. */
down_read(&MSDOS_I(mapping->host)->truncate_lock);
+ /* To get block number beyond file size in fallocated region */
+ atomic_set(&MSDOS_I(mapping->host)->beyond_isize, 1);
blocknr = generic_block_bmap(mapping, block, fat_get_block);
+ atomic_set(&MSDOS_I(mapping->host)->beyond_isize, 0);
up_read(&MSDOS_I(mapping->host)->truncate_lock);
return blocknr;
@@ -408,6 +441,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
if (error < 0)
return error;
MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = inode->i_size;
set_nlink(inode, fat_subdirs(inode));
} else { /* not a directory */
@@ -423,6 +457,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_fop = &fat_file_operations;
inode->i_mapping->a_ops = &fat_aops;
MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = inode->i_size;
}
if (de->attr & ATTR_SYS) {
if (sbi->options.sys_immutable)
@@ -494,6 +529,25 @@ static void fat_evict_inode(struct inode *inode)
if (!inode->i_nlink) {
inode->i_size = 0;
fat_truncate_blocks(inode, 0);
+ } else {
+ /* Release unwritten fallocated blocks on inode eviction. */
+ if (MSDOS_I(inode)->mmu_private < MSDOS_I(inode)->i_disksize) {
+ int err;
+ fat_truncate_blocks(inode, MSDOS_I(inode)->mmu_private);
+ /* Fallocate results in updating the i_start/iogstart
+ * for the zero byte file. So, make it return to
+ * original state during evict and commit it
+ * synchrnously to avoid any corruption on the next
+ * access to the cluster chain for the file.
+ */
+ err = fat_sync_inode(inode);
+ if (err) {
+ fat_msg(inode->i_sb, KERN_WARNING, "Failed to "
+ "update on disk inode for unused fallocated "
+ "blocks, inode could be corrupted. Please run "
+ "fsck");
+ }
+ }
}
invalidate_inode_buffers(inode);
clear_inode(inode);
@@ -1223,6 +1277,7 @@ static int fat_read_root(struct inode *inode)
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
MSDOS_I(inode)->i_logstart = 0;
MSDOS_I(inode)->mmu_private = inode->i_size;
+ MSDOS_I(inode)->i_disksize = inode->i_size;
fat_save_attrs(inode, ATTR_DIR);
inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 37213d075f3c..3ebda928229c 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -178,64 +178,6 @@ const struct dentry_operations hfsplus_dentry_operations = {
.d_compare = hfsplus_compare_dentry,
};
-static struct dentry *hfsplus_file_lookup(struct inode *dir,
- struct dentry *dentry, unsigned int flags)
-{
- struct hfs_find_data fd;
- struct super_block *sb = dir->i_sb;
- struct inode *inode = NULL;
- struct hfsplus_inode_info *hip;
- int err;
-
- if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc"))
- goto out;
-
- inode = HFSPLUS_I(dir)->rsrc_inode;
- if (inode)
- goto out;
-
- inode = new_inode(sb);
- if (!inode)
- return ERR_PTR(-ENOMEM);
-
- hip = HFSPLUS_I(inode);
- inode->i_ino = dir->i_ino;
- INIT_LIST_HEAD(&hip->open_dir_list);
- mutex_init(&hip->extents_lock);
- hip->extent_state = 0;
- hip->flags = 0;
- hip->userflags = 0;
- set_bit(HFSPLUS_I_RSRC, &hip->flags);
-
- err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
- if (!err) {
- err = hfsplus_find_cat(sb, dir->i_ino, &fd);
- if (!err)
- err = hfsplus_cat_read_inode(inode, &fd);
- hfs_find_exit(&fd);
- }
- if (err) {
- iput(inode);
- return ERR_PTR(err);
- }
- hip->rsrc_inode = dir;
- HFSPLUS_I(dir)->rsrc_inode = inode;
- igrab(dir);
-
- /*
- * __mark_inode_dirty expects inodes to be hashed. Since we don't
- * want resource fork inodes in the regular inode space, we make them
- * appear hashed, but do not put on any lists. hlist_del()
- * will work fine and require no locking.
- */
- hlist_add_fake(&inode->i_hash);
-
- mark_inode_dirty(inode);
-out:
- d_add(dentry, inode);
- return NULL;
-}
-
static void hfsplus_get_perms(struct inode *inode,
struct hfsplus_perm *perms, int dir)
{
@@ -385,7 +327,6 @@ int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
}
static const struct inode_operations hfsplus_file_inode_operations = {
- .lookup = hfsplus_file_lookup,
.setattr = hfsplus_setattr,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 09b3ed455724..2b91675ffcab 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -456,12 +456,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
The umask is only applied if there's no default ACL */
ret = jffs2_init_acl_pre(dir_i, inode, &mode);
if (ret) {
- make_bad_inode(inode);
- iput(inode);
- return ERR_PTR(ret);
+ mutex_unlock(&f->sem);
+ make_bad_inode(inode);
+ iput(inode);
+ return ERR_PTR(ret);
}
ret = jffs2_do_new_inode (c, f, mode, ri);
if (ret) {
+ mutex_unlock(&f->sem);
make_bad_inode(inode);
iput(inode);
return ERR_PTR(ret);
@@ -478,6 +480,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
inode->i_size = 0;
if (insert_inode_locked(inode) < 0) {
+ mutex_unlock(&f->sem);
make_bad_inode(inode);
iput(inode);
return ERR_PTR(-EINVAL);
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 975a1f562c10..9a5449bc3afb 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -564,25 +564,10 @@ struct jffs2_node_frag *jffs2_lookup_node_frag(struct rb_root *fragtree, uint32_
they're killed. */
void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c)
{
- struct jffs2_node_frag *frag;
- struct jffs2_node_frag *parent;
-
- if (!root->rb_node)
- return;
+ struct jffs2_node_frag *frag, *next;
dbg_fragtree("killing\n");
-
- frag = (rb_entry(root->rb_node, struct jffs2_node_frag, rb));
- while(frag) {
- if (frag->rb.rb_left) {
- frag = frag_left(frag);
- continue;
- }
- if (frag->rb.rb_right) {
- frag = frag_right(frag);
- continue;
- }
-
+ rbtree_postorder_for_each_entry_safe(frag, next, root, rb) {
if (frag->node && !(--frag->node->frags)) {
/* Not a hole, and it's the final remaining frag
of this node. Free the node */
@@ -591,17 +576,8 @@ void jffs2_kill_fragtree(struct rb_root *root, struct jffs2_sb_info *c)
jffs2_free_full_dnode(frag->node);
}
- parent = frag_parent(frag);
- if (parent) {
- if (frag_left(parent) == frag)
- parent->rb.rb_left = NULL;
- else
- parent->rb.rb_right = NULL;
- }
jffs2_free_node_frag(frag);
- frag = parent;
-
cond_resched();
}
}
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index ae81b01e6fd7..386303dca382 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -543,33 +543,13 @@ static int jffs2_build_inode_fragtree(struct jffs2_sb_info *c,
static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
{
- struct rb_node *this;
- struct jffs2_tmp_dnode_info *tn;
-
- this = list->rb_node;
+ struct jffs2_tmp_dnode_info *tn, *next;
- /* Now at bottom of tree */
- while (this) {
- if (this->rb_left)
- this = this->rb_left;
- else if (this->rb_right)
- this = this->rb_right;
- else {
- tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
+ rbtree_postorder_for_each_entry_safe(tn, next, list, rb) {
jffs2_free_full_dnode(tn->fn);
jffs2_free_tmp_dnode_info(tn);
-
- this = rb_parent(this);
- if (!this)
- break;
-
- if (this->rb_left == &tn->rb)
- this->rb_left = NULL;
- else if (this->rb_right == &tn->rb)
- this->rb_right = NULL;
- else BUG();
- }
}
+
*list = RB_ROOT;
}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index d448a777166b..7f9b096d8d57 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -62,7 +62,8 @@ static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
page = read_cache_page(mapping, index, filler, sb);
else {
page = find_or_create_page(mapping, index, GFP_NOFS);
- unlock_page(page);
+ if (page)
+ unlock_page(page);
}
return page;
}
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index b44bdb291b84..2b34021948e4 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -37,7 +37,26 @@
#include "sufile.h"
#include "dat.h"
-
+/**
+ * nilfs_ioctl_wrap_copy - wrapping function of get/set metadata info
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @dir: set of direction flags
+ * @dofunc: concrete function of get/set metadata info
+ *
+ * Description: nilfs_ioctl_wrap_copy() gets/sets metadata info by means of
+ * calling dofunc() function on the basis of @argv argument.
+ *
+ * Return Value: On success, 0 is returned and requested metadata info
+ * is copied into userspace. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EINVAL - Invalid arguments from userspace.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during execution of requested operation.
+ */
static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
struct nilfs_argv *argv, int dir,
ssize_t (*dofunc)(struct the_nilfs *,
@@ -57,6 +76,14 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
if (argv->v_size > PAGE_SIZE)
return -EINVAL;
+ /*
+ * Reject pairs of a start item position (argv->v_index) and a
+ * total count (argv->v_nmembs) which leads position 'pos' to
+ * overflow by the increment at the end of the loop.
+ */
+ if (argv->v_index > ~(__u64)0 - argv->v_nmembs)
+ return -EINVAL;
+
buf = (void *)__get_free_pages(GFP_NOFS, 0);
if (unlikely(!buf))
return -ENOMEM;
@@ -99,6 +126,9 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs,
return ret;
}
+/**
+ * nilfs_ioctl_getflags - ioctl to support lsattr
+ */
static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
{
unsigned int flags = NILFS_I(inode)->i_flags & FS_FL_USER_VISIBLE;
@@ -106,6 +136,9 @@ static int nilfs_ioctl_getflags(struct inode *inode, void __user *argp)
return put_user(flags, (int __user *)argp);
}
+/**
+ * nilfs_ioctl_setflags - ioctl to support chattr
+ */
static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
void __user *argp)
{
@@ -158,11 +191,33 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_getversion - get info about a file's version (generation number)
+ */
static int nilfs_ioctl_getversion(struct inode *inode, void __user *argp)
{
return put_user(inode->i_generation, (int __user *)argp);
}
+/**
+ * nilfs_ioctl_change_cpmode - change checkpoint mode (checkpoint/snapshot)
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_change_cpmode() function changes mode of
+ * given checkpoint between checkpoint and snapshot state. This ioctl
+ * is used in chcp and mkcp utilities.
+ *
+ * Return Value: On success, 0 is returned and mode of a checkpoint is
+ * changed. On error, one of the following negative error codes
+ * is returned.
+ *
+ * %-EPERM - Operation not permitted.
+ *
+ * %-EFAULT - Failure during checkpoint mode changing.
+ */
static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -198,6 +253,25 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_delete_checkpoint - remove checkpoint
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_delete_checkpoint() function removes
+ * checkpoint from NILFS2 file system. This ioctl is used in rmcp
+ * utility.
+ *
+ * Return Value: On success, 0 is returned and a checkpoint is
+ * removed. On error, one of the following negative error codes
+ * is returned.
+ *
+ * %-EPERM - Operation not permitted.
+ *
+ * %-EFAULT - Failure during checkpoint removing.
+ */
static int
nilfs_ioctl_delete_checkpoint(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
@@ -229,6 +303,21 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_do_get_cpinfo - callback method getting info about checkpoints
+ * @nilfs: nilfs object
+ * @posp: pointer on array of checkpoint's numbers
+ * @flags: checkpoint mode (checkpoint or snapshot)
+ * @buf: buffer for storing checkponts' info
+ * @size: size in bytes of one checkpoint info item in array
+ * @nmembs: number of checkpoints in array (numbers and infos)
+ *
+ * Description: nilfs_ioctl_do_get_cpinfo() function returns info about
+ * requested checkpoints. The NILFS_IOCTL_GET_CPINFO ioctl is used in
+ * lscp utility and by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_cpinfo structures in output buffer.
+ */
static ssize_t
nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -242,6 +331,27 @@ nilfs_ioctl_do_get_cpinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
return ret;
}
+/**
+ * nilfs_ioctl_get_cpstat - get checkpoints statistics
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_get_cpstat() returns information about checkpoints.
+ * The NILFS_IOCTL_GET_CPSTAT ioctl is used by lscp, rmcp utilities
+ * and by nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned, and checkpoints information is
+ * copied into userspace pointer @argp. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during getting checkpoints statistics.
+ */
static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -260,6 +370,21 @@ static int nilfs_ioctl_get_cpstat(struct inode *inode, struct file *filp,
return ret;
}
+/**
+ * nilfs_ioctl_do_get_suinfo - callback method getting segment usage info
+ * @nilfs: nilfs object
+ * @posp: pointer on array of segment numbers
+ * @flags: *not used*
+ * @buf: buffer for storing suinfo array
+ * @size: size in bytes of one suinfo item in array
+ * @nmembs: count of segment numbers and suinfos in array
+ *
+ * Description: nilfs_ioctl_do_get_suinfo() function returns segment usage
+ * info about requested segments. The NILFS_IOCTL_GET_SUINFO ioctl is used
+ * in lssu, nilfs_resize utilities and by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_suinfo structures in output buffer.
+ */
static ssize_t
nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -273,6 +398,27 @@ nilfs_ioctl_do_get_suinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
return ret;
}
+/**
+ * nilfs_ioctl_get_sustat - get segment usage statistics
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_get_sustat() returns segment usage statistics.
+ * The NILFS_IOCTL_GET_SUSTAT ioctl is used in lssu, nilfs_resize utilities
+ * and by nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned, and segment usage information is
+ * copied into userspace pointer @argp. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during getting segment usage statistics.
+ */
static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -291,6 +437,21 @@ static int nilfs_ioctl_get_sustat(struct inode *inode, struct file *filp,
return ret;
}
+/**
+ * nilfs_ioctl_do_get_vinfo - callback method getting virtual blocks info
+ * @nilfs: nilfs object
+ * @posp: *not used*
+ * @flags: *not used*
+ * @buf: buffer for storing array of nilfs_vinfo structures
+ * @size: size in bytes of one vinfo item in array
+ * @nmembs: count of vinfos in array
+ *
+ * Description: nilfs_ioctl_do_get_vinfo() function returns information
+ * on virtual block addresses. The NILFS_IOCTL_GET_VINFO ioctl is used
+ * by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_vinfo structures in output buffer.
+ */
static ssize_t
nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -303,6 +464,21 @@ nilfs_ioctl_do_get_vinfo(struct the_nilfs *nilfs, __u64 *posp, int flags,
return ret;
}
+/**
+ * nilfs_ioctl_do_get_bdescs - callback method getting disk block descriptors
+ * @nilfs: nilfs object
+ * @posp: *not used*
+ * @flags: *not used*
+ * @buf: buffer for storing array of nilfs_bdesc structures
+ * @size: size in bytes of one bdesc item in array
+ * @nmembs: count of bdescs in array
+ *
+ * Description: nilfs_ioctl_do_get_bdescs() function returns information
+ * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
+ * is used by nilfs_cleanerd daemon.
+ *
+ * Return value: count of nilfs_bdescs structures in output buffer.
+ */
static ssize_t
nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
void *buf, size_t size, size_t nmembs)
@@ -329,6 +505,29 @@ nilfs_ioctl_do_get_bdescs(struct the_nilfs *nilfs, __u64 *posp, int flags,
return nmembs;
}
+/**
+ * nilfs_ioctl_get_bdescs - get disk block descriptors
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_do_get_bdescs() function returns information
+ * about descriptors of disk block numbers. The NILFS_IOCTL_GET_BDESCS ioctl
+ * is used by nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned, and disk block descriptors are
+ * copied into userspace pointer @argp. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EINVAL - Invalid arguments from userspace.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during getting disk block descriptors.
+ */
static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -352,6 +551,26 @@ static int nilfs_ioctl_get_bdescs(struct inode *inode, struct file *filp,
return ret;
}
+/**
+ * nilfs_ioctl_move_inode_block - prepare data/node block for moving by GC
+ * @inode: inode object
+ * @vdesc: descriptor of virtual block number
+ * @buffers: list of moving buffers
+ *
+ * Description: nilfs_ioctl_move_inode_block() function registers data/node
+ * buffer in the GC pagecache and submit read request.
+ *
+ * Return Value: On success, 0 is returned. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOENT - Requested block doesn't exist.
+ *
+ * %-EEXIST - Blocks conflict is detected.
+ */
static int nilfs_ioctl_move_inode_block(struct inode *inode,
struct nilfs_vdesc *vdesc,
struct list_head *buffers)
@@ -397,6 +616,19 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode,
return 0;
}
+/**
+ * nilfs_ioctl_move_blocks - move valid inode's blocks during garbage collection
+ * @sb: superblock object
+ * @argv: vector of arguments from userspace
+ * @buf: array of nilfs_vdesc structures
+ *
+ * Description: nilfs_ioctl_move_blocks() function reads valid data/node
+ * blocks that garbage collector specified with the array of nilfs_vdesc
+ * structures and stores them into page caches of GC inodes.
+ *
+ * Return Value: Number of processed nilfs_vdesc structures or
+ * error code, otherwise.
+ */
static int nilfs_ioctl_move_blocks(struct super_block *sb,
struct nilfs_argv *argv, void *buf)
{
@@ -462,6 +694,25 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
return ret;
}
+/**
+ * nilfs_ioctl_delete_checkpoints - delete checkpoints
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of periods of checkpoints numbers
+ *
+ * Description: nilfs_ioctl_delete_checkpoints() function deletes checkpoints
+ * in the period from p_start to p_end, excluding p_end itself. The checkpoints
+ * which have been already deleted are ignored.
+ *
+ * Return Value: Number of processed nilfs_period structures or
+ * error code, otherwise.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EINVAL - invalid checkpoints.
+ */
static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
{
@@ -479,6 +730,24 @@ static int nilfs_ioctl_delete_checkpoints(struct the_nilfs *nilfs,
return nmembs;
}
+/**
+ * nilfs_ioctl_free_vblocknrs - free virtual block numbers
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of virtual block numbers
+ *
+ * Description: nilfs_ioctl_free_vblocknrs() function frees
+ * the virtual block numbers specified by @buf and @argv->v_nmembs.
+ *
+ * Return Value: Number of processed virtual block numbers or
+ * error code, otherwise.
+ *
+ * %-EIO - I/O error.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-ENOENT - The virtual block number have not been allocated.
+ */
static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
{
@@ -490,6 +759,24 @@ static int nilfs_ioctl_free_vblocknrs(struct the_nilfs *nilfs,
return (ret < 0) ? ret : nmembs;
}
+/**
+ * nilfs_ioctl_mark_blocks_dirty - mark blocks dirty
+ * @nilfs: nilfs object
+ * @argv: vector of arguments from userspace
+ * @buf: array of block descriptors
+ *
+ * Description: nilfs_ioctl_mark_blocks_dirty() function marks
+ * metadata file or data blocks as dirty.
+ *
+ * Return Value: Number of processed block descriptors or
+ * error code, otherwise.
+ *
+ * %-ENOMEM - Insufficient memory available.
+ *
+ * %-EIO - I/O error
+ *
+ * %-ENOENT - the specified block does not exist (hole block)
+ */
static int nilfs_ioctl_mark_blocks_dirty(struct the_nilfs *nilfs,
struct nilfs_argv *argv, void *buf)
{
@@ -571,6 +858,20 @@ int nilfs_ioctl_prepare_clean_segments(struct the_nilfs *nilfs,
return ret;
}
+/**
+ * nilfs_ioctl_clean_segments - clean segments
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_clean_segments() function makes garbage
+ * collection operation in the environment of requested parameters
+ * from userspace. The NILFS_IOCTL_CLEAN_SEGMENTS ioctl is used by
+ * nilfs_cleanerd daemon.
+ *
+ * Return Value: On success, 0 is returned or error code, otherwise.
+ */
static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -682,6 +983,33 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_sync - make a checkpoint
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ *
+ * Description: nilfs_ioctl_sync() function constructs a logical segment
+ * for checkpointing. This function guarantees that all modified data
+ * and metadata are written out to the device when it successfully
+ * returned.
+ *
+ * Return Value: On success, 0 is retured. On errors, one of the following
+ * negative error code is returned.
+ *
+ * %-EROFS - Read only filesystem.
+ *
+ * %-EIO - I/O error
+ *
+ * %-ENOSPC - No space left on device (only in a panic state).
+ *
+ * %-ERESTARTSYS - Interrupted.
+ *
+ * %-ENOMEM - Insufficient memory available.
+ *
+ * %-EFAULT - Failure during execution of requested operation.
+ */
static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
@@ -710,6 +1038,14 @@ static int nilfs_ioctl_sync(struct inode *inode, struct file *filp,
return 0;
}
+/**
+ * nilfs_ioctl_resize - resize NILFS2 volume
+ * @inode: inode object
+ * @filp: file object
+ * @argp: pointer on argument from userspace
+ *
+ * Return Value: On success, 0 is returned or error code, otherwise.
+ */
static int nilfs_ioctl_resize(struct inode *inode, struct file *filp,
void __user *argp)
{
@@ -735,6 +1071,17 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated
+ * @inode: inode object
+ * @argp: pointer on argument from userspace
+ *
+ * Decription: nilfs_ioctl_set_alloc_range() function defines lower limit
+ * of segments in bytes and upper limit of segments in bytes.
+ * The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility.
+ *
+ * Return Value: On success, 0 is returned or error code, otherwise.
+ */
static int nilfs_ioctl_set_alloc_range(struct inode *inode, void __user *argp)
{
struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
@@ -767,6 +1114,28 @@ out:
return ret;
}
+/**
+ * nilfs_ioctl_get_info - wrapping function of get metadata info
+ * @inode: inode object
+ * @filp: file object
+ * @cmd: ioctl's request code
+ * @argp: pointer on argument from userspace
+ * @membsz: size of an item in bytes
+ * @dofunc: concrete function of getting metadata info
+ *
+ * Description: nilfs_ioctl_get_info() gets metadata info by means of
+ * calling dofunc() function.
+ *
+ * Return Value: On success, 0 is returned and requested metadata info
+ * is copied into userspace. On error, one of the following
+ * negative error codes is returned.
+ *
+ * %-EINVAL - Invalid arguments from userspace.
+ *
+ * %-ENOMEM - Insufficient amount of memory available.
+ *
+ * %-EFAULT - Failure during execution of requested operation.
+ */
static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp,
size_t membsz,
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 9f6b486b6c01..a1a191634abc 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1440,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nilfs_clear_logs(&sci->sc_segbufs);
- err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
- if (unlikely(err))
- return err;
-
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
sci->sc_freesegs,
sci->sc_nfreesegs,
NULL);
WARN_ON(err); /* do not happen */
+ sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
}
+
+ err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+ if (unlikely(err))
+ return err;
+
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 1fedd5f7ccc4..0b9ff4395e6a 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -82,20 +82,23 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
* events.
*/
static int dnotify_handle_event(struct fsnotify_group *group,
+ struct inode *inode,
struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmount_mark,
- struct fsnotify_event *event)
+ u32 mask, void *data, int data_type,
+ const unsigned char *file_name)
{
struct dnotify_mark *dn_mark;
- struct inode *to_tell;
struct dnotify_struct *dn;
struct dnotify_struct **prev;
struct fown_struct *fown;
- __u32 test_mask = event->mask & ~FS_EVENT_ON_CHILD;
+ __u32 test_mask = mask & ~FS_EVENT_ON_CHILD;
- BUG_ON(vfsmount_mark);
+ /* not a dir, dnotify doesn't care */
+ if (!S_ISDIR(inode->i_mode))
+ return 0;
- to_tell = event->to_tell;
+ BUG_ON(vfsmount_mark);
dn_mark = container_of(inode_mark, struct dnotify_mark, fsn_mark);
@@ -122,23 +125,6 @@ static int dnotify_handle_event(struct fsnotify_group *group,
return 0;
}
-/*
- * Given an inode and mask determine if dnotify would be interested in sending
- * userspace notification for that pair.
- */
-static bool dnotify_should_send_event(struct fsnotify_group *group,
- struct inode *inode,
- struct fsnotify_mark *inode_mark,
- struct fsnotify_mark *vfsmount_mark,
- __u32 mask, void *data, int data_type)
-{
- /* not a dir, dnotify doesn't care */
- if (!S_ISDIR(inode->i_mode))
- return false;
-
- return true;
-}
-
static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
{
struct dnotify_mark *dn_mark = container_of(fsn_mark,
@@ -152,10 +138,6 @@ static void dnotify_free_mark(struct fsnotify_mark *fsn_mark)
static struct fsnotify_ops dnotify_fsnotify_ops = {
.handle_event = dnotify_handle_event,
- .should_send_event = dnotify_should_send_event,
- .free_group_priv = NULL,
- .freeing_mark = NULL,
- .free_event_priv = NULL,
};
/*
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 0c2f9122b262..750895a801da 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -9,31 +9,27 @@
#include <linux/types.h>
#include <linux/wait.h>
-static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
+#include "fanotify.h"
+
+static bool should_merge(struct fsnotify_event *old_fsn,
+ struct fsnotify_event *new_fsn)
{
- pr_debug("%s: old=%p new=%p\n", __func__, old, new);
+ struct fanotify_event_info *old, *new;
- if (old->to_tell == new->to_tell &&
- old->data_type == new->data_type &&
- old->tgid == new->tgid) {
- switch (old->data_type) {
- case (FSNOTIFY_EVENT_PATH):
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
- /* dont merge two permission events */
- if ((old->mask & FAN_ALL_PERM_EVENTS) &&
- (new->mask & FAN_ALL_PERM_EVENTS))
- return false;
+ /* dont merge two permission events */
+ if ((old_fsn->mask & FAN_ALL_PERM_EVENTS) &&
+ (new_fsn->mask & FAN_ALL_PERM_EVENTS))
+ return false;
#endif
- if ((old->path.mnt == new->path.mnt) &&
- (old->path.dentry == new->path.dentry))
- return true;
- break;
- case (FSNOTIFY_EVENT_NONE):
- return true;
- default:
- BUG();
- };
- }
+ pr_debug("%s: old=%p new=%p\n", __func__, old_fsn, new_fsn);
+ old = FANOTIFY_E(old_fsn);
+ new = FANOTIFY_E(new_fsn);
+
+ if (old_fsn->inode == new_fsn->inode && old->tgid == new->tgid &&
+ old->path.mnt == new->path.mnt &&
+ old->path.dentry == new->path.dentry)
+ return true;
return false;
}
@@ -41,59 +37,25 @@ static bool should_merge(struct fsnotify_event *old, struct fsnotify_event *new)
static struct fsnotify_event *fanotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
- struct fsnotify_event_holder *test_holder;
struct fsnotify_event *test_event = NULL;
- struct fsnotify_event *new_event;
pr_debug("%s: list=%p event=%p\n", __func__, list, event);
-
- list_for_each_entry_reverse(test_holder, list, event_list) {
- if (should_merge(test_holder->event, event)) {
- test_event = test_holder->event;
+ list_for_each_entry_reverse(test_event, list, list) {
+ if (should_merge(test_event, event))
break;
- }
}
if (!test_event)
return NULL;
- fsnotify_get_event(test_event);
-
- /* if they are exactly the same we are done */
- if (test_event->mask == event->mask)
- return test_event;
-
- /*
- * if the refcnt == 2 this is the only queue
- * for this event and so we can update the mask
- * in place.
- */
- if (atomic_read(&test_event->refcnt) == 2) {
- test_event->mask |= event->mask;
- return test_event;
- }
-
- new_event = fsnotify_clone_event(test_event);
-
- /* done with test_event */
- fsnotify_put_event(test_event);
-
- /* couldn't allocate memory, merge was not possible */
- if (unlikely(!new_event))
- return ERR_PTR(-ENOMEM);
-
- /* build new event and replace it on the list */
- new_event->mask = (test_event->mask | event->mask);
- fsnotify_replace_event(test_holder, new_event);
-
- /* we hold a reference on new_event from clone_event */
- return new_event;
+ test_event->mask |= event->mask;
+ return test_event;
}
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
static int fanotify_get_response_from_access(struct fsnotify_group *group,
- struct fsnotify_event *event)
+ struct fanotify_event_info *event)
{
int ret;
@@ -106,7 +68,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
return 0;
/* userspace responded, convert to something usable */
- spin_lock(&event->lock);
switch (event->response) {
case FAN_ALLOW:
ret = 0;
@@ -116,7 +77,6 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
ret = -EPERM;
}
event->response = 0;
- spin_unlock(&event->lock);
pr_debug("%s: group=%p event=%p about to return ret=%d\n", __func__,
group, event, ret);
@@ -125,58 +85,17 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
}
#endif
-static int fanotify_handle_event(struct fsnotify_group *group,
- struct fsnotify_mark *inode_mark,
- struct fsnotify_mark *fanotify_mark,
- struct fsnotify_event *event)
-{
- int ret = 0;
- struct fsnotify_event *notify_event = NULL;
-
- BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
- BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
- BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
- BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
- BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
- BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
- BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
- BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
- BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
- BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
-
- pr_debug("%s: group=%p event=%p\n", __func__, group, event);
-
- notify_event = fsnotify_add_notify_event(group, event, NULL, fanotify_merge);
- if (IS_ERR(notify_event))
- return PTR_ERR(notify_event);
-
-#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
- if (event->mask & FAN_ALL_PERM_EVENTS) {
- /* if we merged we need to wait on the new event */
- if (notify_event)
- event = notify_event;
- ret = fanotify_get_response_from_access(group, event);
- }
-#endif
-
- if (notify_event)
- fsnotify_put_event(notify_event);
-
- return ret;
-}
-
-static bool fanotify_should_send_event(struct fsnotify_group *group,
- struct inode *to_tell,
- struct fsnotify_mark *inode_mark,
+static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark,
struct fsnotify_mark *vfsmnt_mark,
- __u32 event_mask, void *data, int data_type)
+ u32 event_mask,
+ void *data, int data_type)
{
__u32 marks_mask, marks_ignored_mask;
struct path *path = data;
- pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
- "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
- inode_mark, vfsmnt_mark, event_mask, data, data_type);
+ pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p"
+ " data_type=%d\n", __func__, inode_mark, vfsmnt_mark,
+ event_mask, data, data_type);
/* if we don't have enough info to send an event to userspace say no */
if (data_type != FSNOTIFY_EVENT_PATH)
@@ -217,6 +136,74 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
return false;
}
+static int fanotify_handle_event(struct fsnotify_group *group,
+ struct inode *inode,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *fanotify_mark,
+ u32 mask, void *data, int data_type,
+ const unsigned char *file_name)
+{
+ int ret = 0;
+ struct fanotify_event_info *event;
+ struct fsnotify_event *fsn_event;
+ struct fsnotify_event *notify_fsn_event;
+
+ BUILD_BUG_ON(FAN_ACCESS != FS_ACCESS);
+ BUILD_BUG_ON(FAN_MODIFY != FS_MODIFY);
+ BUILD_BUG_ON(FAN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE);
+ BUILD_BUG_ON(FAN_CLOSE_WRITE != FS_CLOSE_WRITE);
+ BUILD_BUG_ON(FAN_OPEN != FS_OPEN);
+ BUILD_BUG_ON(FAN_EVENT_ON_CHILD != FS_EVENT_ON_CHILD);
+ BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
+ BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
+ BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+ BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
+
+ if (!fanotify_should_send_event(inode_mark, fanotify_mark, mask, data,
+ data_type))
+ return 0;
+
+ pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
+ mask);
+
+ event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL);
+ if (unlikely(!event))
+ return -ENOMEM;
+
+ fsn_event = &event->fse;
+ fsnotify_init_event(fsn_event, inode, mask);
+ event->tgid = get_pid(task_tgid(current));
+ if (data_type == FSNOTIFY_EVENT_PATH) {
+ struct path *path = data;
+ event->path = *path;
+ path_get(&event->path);
+ } else {
+ event->path.mnt = NULL;
+ event->path.dentry = NULL;
+ }
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ event->response = 0;
+#endif
+
+ notify_fsn_event = fsnotify_add_notify_event(group, fsn_event,
+ fanotify_merge);
+ if (notify_fsn_event) {
+ /* Our event wasn't used in the end. Free it. */
+ fsnotify_destroy_event(group, fsn_event);
+ if (IS_ERR(notify_fsn_event))
+ return PTR_ERR(notify_fsn_event);
+ /* We need to ask about a different events after a merge... */
+ event = FANOTIFY_E(notify_fsn_event);
+ fsn_event = notify_fsn_event;
+ }
+
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ if (fsn_event->mask & FAN_ALL_PERM_EVENTS)
+ ret = fanotify_get_response_from_access(group, event);
+#endif
+ return ret;
+}
+
static void fanotify_free_group_priv(struct fsnotify_group *group)
{
struct user_struct *user;
@@ -226,10 +213,18 @@ static void fanotify_free_group_priv(struct fsnotify_group *group)
free_uid(user);
}
+static void fanotify_free_event(struct fsnotify_event *fsn_event)
+{
+ struct fanotify_event_info *event;
+
+ event = FANOTIFY_E(fsn_event);
+ path_put(&event->path);
+ put_pid(event->tgid);
+ kmem_cache_free(fanotify_event_cachep, event);
+}
+
const struct fsnotify_ops fanotify_fsnotify_ops = {
.handle_event = fanotify_handle_event,
- .should_send_event = fanotify_should_send_event,
.free_group_priv = fanotify_free_group_priv,
- .free_event_priv = NULL,
- .freeing_mark = NULL,
+ .free_event = fanotify_free_event,
};
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
new file mode 100644
index 000000000000..0e90174a116a
--- /dev/null
+++ b/fs/notify/fanotify/fanotify.h
@@ -0,0 +1,23 @@
+#include <linux/fsnotify_backend.h>
+#include <linux/path.h>
+#include <linux/slab.h>
+
+extern struct kmem_cache *fanotify_event_cachep;
+
+struct fanotify_event_info {
+ struct fsnotify_event fse;
+ /*
+ * We hold ref to this path so it may be dereferenced at any point
+ * during this object's lifetime
+ */
+ struct path path;
+ struct pid *tgid;
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+ u32 response; /* userspace answer to question */
+#endif
+};
+
+static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse)
+{
+ return container_of(fse, struct fanotify_event_info, fse);
+}
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index e44cb6427df3..57d7c083cb4b 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -19,6 +19,7 @@
#include "../../mount.h"
#include "../fdinfo.h"
+#include "fanotify.h"
#define FANOTIFY_DEFAULT_MAX_EVENTS 16384
#define FANOTIFY_DEFAULT_MAX_MARKS 8192
@@ -28,11 +29,12 @@ extern const struct fsnotify_ops fanotify_fsnotify_ops;
static struct kmem_cache *fanotify_mark_cache __read_mostly;
static struct kmem_cache *fanotify_response_event_cache __read_mostly;
+struct kmem_cache *fanotify_event_cachep __read_mostly;
struct fanotify_response_event {
struct list_head list;
__s32 fd;
- struct fsnotify_event *event;
+ struct fanotify_event_info *event;
};
/*
@@ -61,8 +63,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
}
static int create_fd(struct fsnotify_group *group,
- struct fsnotify_event *event,
- struct file **file)
+ struct fanotify_event_info *event,
+ struct file **file)
{
int client_fd;
struct file *new_file;
@@ -73,12 +75,6 @@ static int create_fd(struct fsnotify_group *group,
if (client_fd < 0)
return client_fd;
- if (event->data_type != FSNOTIFY_EVENT_PATH) {
- WARN_ON(1);
- put_unused_fd(client_fd);
- return -EINVAL;
- }
-
/*
* we need a new file handle for the userspace program so it can read even if it was
* originally opened O_WRONLY.
@@ -109,23 +105,25 @@ static int create_fd(struct fsnotify_group *group,
}
static int fill_event_metadata(struct fsnotify_group *group,
- struct fanotify_event_metadata *metadata,
- struct fsnotify_event *event,
- struct file **file)
+ struct fanotify_event_metadata *metadata,
+ struct fsnotify_event *fsn_event,
+ struct file **file)
{
int ret = 0;
+ struct fanotify_event_info *event;
pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
- group, metadata, event);
+ group, metadata, fsn_event);
*file = NULL;
+ event = container_of(fsn_event, struct fanotify_event_info, fse);
metadata->event_len = FAN_EVENT_METADATA_LEN;
metadata->metadata_len = FAN_EVENT_METADATA_LEN;
metadata->vers = FANOTIFY_METADATA_VERSION;
metadata->reserved = 0;
- metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
+ metadata->mask = fsn_event->mask & FAN_ALL_OUTGOING_EVENTS;
metadata->pid = pid_vnr(event->tgid);
- if (unlikely(event->mask & FAN_Q_OVERFLOW))
+ if (unlikely(fsn_event->mask & FAN_Q_OVERFLOW))
metadata->fd = FAN_NOFD;
else {
metadata->fd = create_fd(group, event, file);
@@ -209,7 +207,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
if (!re)
return -ENOMEM;
- re->event = event;
+ re->event = FANOTIFY_E(event);
re->fd = fd;
mutex_lock(&group->fanotify_data.access_mutex);
@@ -217,7 +215,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
if (atomic_read(&group->fanotify_data.bypass_perm)) {
mutex_unlock(&group->fanotify_data.access_mutex);
kmem_cache_free(fanotify_response_event_cache, re);
- event->response = FAN_ALLOW;
+ FANOTIFY_E(event)->response = FAN_ALLOW;
return 0;
}
@@ -273,7 +271,7 @@ out_close_fd:
out:
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
if (event->mask & FAN_ALL_PERM_EVENTS) {
- event->response = FAN_DENY;
+ FANOTIFY_E(event)->response = FAN_DENY;
wake_up(&group->fanotify_data.access_waitq);
}
#endif
@@ -321,7 +319,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
if (IS_ERR(kevent))
break;
ret = copy_event_to_user(group, kevent, buf);
- fsnotify_put_event(kevent);
+ fsnotify_destroy_event(group, kevent);
if (ret < 0)
break;
buf += ret;
@@ -409,7 +407,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct fsnotify_group *group;
- struct fsnotify_event_holder *holder;
+ struct fsnotify_event *fsn_event;
void __user *p;
int ret = -ENOTTY;
size_t send_len = 0;
@@ -421,7 +419,7 @@ static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long ar
switch (cmd) {
case FIONREAD:
mutex_lock(&group->notification_mutex);
- list_for_each_entry(holder, &group->notification_list, event_list)
+ list_for_each_entry(fsn_event, &group->notification_list, list)
send_len += FAN_EVENT_METADATA_LEN;
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
@@ -906,6 +904,7 @@ static int __init fanotify_user_setup(void)
fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event,
SLAB_PANIC);
+ fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC);
return 0;
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4bb21d67d9b1..1d4e1ea2f37c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -128,8 +128,7 @@ static int send_to_group(struct inode *to_tell,
struct fsnotify_mark *vfsmount_mark,
__u32 mask, void *data,
int data_is, u32 cookie,
- const unsigned char *file_name,
- struct fsnotify_event **event)
+ const unsigned char *file_name)
{
struct fsnotify_group *group = NULL;
__u32 inode_test_mask = 0;
@@ -170,27 +169,17 @@ static int send_to_group(struct inode *to_tell,
pr_debug("%s: group=%p to_tell=%p mask=%x inode_mark=%p"
" inode_test_mask=%x vfsmount_mark=%p vfsmount_test_mask=%x"
- " data=%p data_is=%d cookie=%d event=%p\n",
+ " data=%p data_is=%d cookie=%d\n",
__func__, group, to_tell, mask, inode_mark,
inode_test_mask, vfsmount_mark, vfsmount_test_mask, data,
- data_is, cookie, *event);
+ data_is, cookie);
if (!inode_test_mask && !vfsmount_test_mask)
return 0;
- if (group->ops->should_send_event(group, to_tell, inode_mark,
- vfsmount_mark, mask, data,
- data_is) == false)
- return 0;
-
- if (!*event) {
- *event = fsnotify_create_event(to_tell, mask, data,
- data_is, file_name,
- cookie, GFP_KERNEL);
- if (!*event)
- return -ENOMEM;
- }
- return group->ops->handle_event(group, inode_mark, vfsmount_mark, *event);
+ return group->ops->handle_event(group, to_tell, inode_mark,
+ vfsmount_mark, mask, data, data_is,
+ file_name);
}
/*
@@ -205,7 +194,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
struct hlist_node *inode_node = NULL, *vfsmount_node = NULL;
struct fsnotify_mark *inode_mark = NULL, *vfsmount_mark = NULL;
struct fsnotify_group *inode_group, *vfsmount_group;
- struct fsnotify_event *event = NULL;
struct mount *mnt;
int idx, ret = 0;
/* global tests shouldn't care about events on child only the specific event */
@@ -258,18 +246,18 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
if (inode_group > vfsmount_group) {
/* handle inode */
- ret = send_to_group(to_tell, inode_mark, NULL, mask, data,
- data_is, cookie, file_name, &event);
+ ret = send_to_group(to_tell, inode_mark, NULL, mask,
+ data, data_is, cookie, file_name);
/* we didn't use the vfsmount_mark */
vfsmount_group = NULL;
} else if (vfsmount_group > inode_group) {
- ret = send_to_group(to_tell, NULL, vfsmount_mark, mask, data,
- data_is, cookie, file_name, &event);
+ ret = send_to_group(to_tell, NULL, vfsmount_mark, mask,
+ data, data_is, cookie, file_name);
inode_group = NULL;
} else {
ret = send_to_group(to_tell, inode_mark, vfsmount_mark,
- mask, data, data_is, cookie, file_name,
- &event);
+ mask, data, data_is, cookie,
+ file_name);
}
if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
@@ -285,12 +273,6 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
ret = 0;
out:
srcu_read_unlock(&fsnotify_mark_srcu, idx);
- /*
- * fsnotify_create_event() took a reference so the event can't be cleaned
- * up while we are still trying to add it to lists, drop that one.
- */
- if (event)
- fsnotify_put_event(event);
return ret;
}
diff --git a/fs/notify/group.c b/fs/notify/group.c
index bd2625bd88b4..ee674fe2cec7 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -99,6 +99,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
INIT_LIST_HEAD(&group->marks_list);
group->ops = ops;
+ fsnotify_init_event(&group->overflow_event, NULL, FS_Q_OVERFLOW);
return group;
}
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index b6642e4de4bf..485eef3f4407 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -2,11 +2,12 @@
#include <linux/inotify.h>
#include <linux/slab.h> /* struct kmem_cache */
-extern struct kmem_cache *event_priv_cachep;
-
-struct inotify_event_private_data {
- struct fsnotify_event_private_data fsnotify_event_priv_data;
+struct inotify_event_info {
+ struct fsnotify_event fse;
int wd;
+ u32 sync_cookie;
+ int name_len;
+ char name[];
};
struct inotify_inode_mark {
@@ -14,8 +15,18 @@ struct inotify_inode_mark {
int wd;
};
+static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
+{
+ return container_of(fse, struct inotify_event_info, fse);
+}
+
extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group);
-extern void inotify_free_event_priv(struct fsnotify_event_private_data *event_priv);
+extern int inotify_handle_event(struct fsnotify_group *group,
+ struct inode *inode,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *vfsmount_mark,
+ u32 mask, void *data, int data_type,
+ const unsigned char *file_name);
extern const struct fsnotify_ops inotify_fsnotify_ops;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 4216308b81b4..aad1a35e9af1 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -34,100 +34,87 @@
#include "inotify.h"
/*
- * Check if 2 events contain the same information. We do not compare private data
- * but at this moment that isn't a problem for any know fsnotify listeners.
+ * Check if 2 events contain the same information.
*/
-static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new)
+static bool event_compare(struct fsnotify_event *old_fsn,
+ struct fsnotify_event *new_fsn)
{
- if ((old->mask == new->mask) &&
- (old->to_tell == new->to_tell) &&
- (old->data_type == new->data_type) &&
- (old->name_len == new->name_len)) {
- switch (old->data_type) {
- case (FSNOTIFY_EVENT_INODE):
- /* remember, after old was put on the wait_q we aren't
- * allowed to look at the inode any more, only thing
- * left to check was if the file_name is the same */
- if (!old->name_len ||
- !strcmp(old->file_name, new->file_name))
- return true;
- break;
- case (FSNOTIFY_EVENT_PATH):
- if ((old->path.mnt == new->path.mnt) &&
- (old->path.dentry == new->path.dentry))
- return true;
- break;
- case (FSNOTIFY_EVENT_NONE):
- if (old->mask & FS_Q_OVERFLOW)
- return true;
- else if (old->mask & FS_IN_IGNORED)
- return false;
- return true;
- };
- }
+ struct inotify_event_info *old, *new;
+
+ if (old_fsn->mask & FS_IN_IGNORED)
+ return false;
+ old = INOTIFY_E(old_fsn);
+ new = INOTIFY_E(new_fsn);
+ if ((old_fsn->mask == new_fsn->mask) &&
+ (old_fsn->inode == new_fsn->inode) &&
+ (old->name_len == new->name_len) &&
+ (!old->name_len || !strcmp(old->name, new->name)))
+ return true;
return false;
}
static struct fsnotify_event *inotify_merge(struct list_head *list,
struct fsnotify_event *event)
{
- struct fsnotify_event_holder *last_holder;
struct fsnotify_event *last_event;
- /* and the list better be locked by something too */
- spin_lock(&event->lock);
-
- last_holder = list_entry(list->prev, struct fsnotify_event_holder, event_list);
- last_event = last_holder->event;
- if (event_compare(last_event, event))
- fsnotify_get_event(last_event);
- else
- last_event = NULL;
-
- spin_unlock(&event->lock);
-
+ last_event = list_entry(list->prev, struct fsnotify_event, list);
+ if (!event_compare(last_event, event))
+ return NULL;
return last_event;
}
-static int inotify_handle_event(struct fsnotify_group *group,
- struct fsnotify_mark *inode_mark,
- struct fsnotify_mark *vfsmount_mark,
- struct fsnotify_event *event)
+int inotify_handle_event(struct fsnotify_group *group,
+ struct inode *inode,
+ struct fsnotify_mark *inode_mark,
+ struct fsnotify_mark *vfsmount_mark,
+ u32 mask, void *data, int data_type,
+ const unsigned char *file_name)
{
struct inotify_inode_mark *i_mark;
- struct inode *to_tell;
- struct inotify_event_private_data *event_priv;
- struct fsnotify_event_private_data *fsn_event_priv;
+ struct inotify_event_info *event;
struct fsnotify_event *added_event;
- int wd, ret = 0;
+ struct fsnotify_event *fsn_event;
+ int ret = 0;
+ int len = 0;
+ int alloc_len = sizeof(struct inotify_event_info);
BUG_ON(vfsmount_mark);
- pr_debug("%s: group=%p event=%p to_tell=%p mask=%x\n", __func__, group,
- event, event->to_tell, event->mask);
+ if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+ (data_type == FSNOTIFY_EVENT_PATH)) {
+ struct path *path = data;
- to_tell = event->to_tell;
+ if (d_unlinked(path->dentry))
+ return 0;
+ }
+ if (file_name) {
+ len = strlen(file_name);
+ alloc_len += len + 1;
+ }
+
+ pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode,
+ mask);
i_mark = container_of(inode_mark, struct inotify_inode_mark,
fsn_mark);
- wd = i_mark->wd;
- event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
- if (unlikely(!event_priv))
+ event = kmalloc(alloc_len, GFP_KERNEL);
+ if (unlikely(!event))
return -ENOMEM;
- fsn_event_priv = &event_priv->fsnotify_event_priv_data;
-
- fsnotify_get_group(group);
- fsn_event_priv->group = group;
- event_priv->wd = wd;
+ fsn_event = &event->fse;
+ fsnotify_init_event(fsn_event, inode, mask);
+ event->wd = i_mark->wd;
+ event->name_len = len;
+ if (len)
+ strcpy(event->name, file_name);
- added_event = fsnotify_add_notify_event(group, event, fsn_event_priv, inotify_merge);
+ added_event = fsnotify_add_notify_event(group, fsn_event, inotify_merge);
if (added_event) {
- inotify_free_event_priv(fsn_event_priv);
- if (!IS_ERR(added_event))
- fsnotify_put_event(added_event);
- else
+ /* Our event wasn't used in the end. Free it. */
+ fsnotify_destroy_event(group, fsn_event);
+ if (IS_ERR(added_event))
ret = PTR_ERR(added_event);
}
@@ -142,22 +129,6 @@ static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify
inotify_ignored_and_remove_idr(fsn_mark, group);
}
-static bool inotify_should_send_event(struct fsnotify_group *group, struct inode *inode,
- struct fsnotify_mark *inode_mark,
- struct fsnotify_mark *vfsmount_mark,
- __u32 mask, void *data, int data_type)
-{
- if ((inode_mark->mask & FS_EXCL_UNLINK) &&
- (data_type == FSNOTIFY_EVENT_PATH)) {
- struct path *path = data;
-
- if (d_unlinked(path->dentry))
- return false;
- }
-
- return true;
-}
-
/*
* This is NEVER supposed to be called. Inotify marks should either have been
* removed from the idr when the watch was removed or in the
@@ -202,22 +173,14 @@ static void inotify_free_group_priv(struct fsnotify_group *group)
free_uid(group->inotify_data.user);
}
-void inotify_free_event_priv(struct fsnotify_event_private_data *fsn_event_priv)
+static void inotify_free_event(struct fsnotify_event *fsn_event)
{
- struct inotify_event_private_data *event_priv;
-
-
- event_priv = container_of(fsn_event_priv, struct inotify_event_private_data,
- fsnotify_event_priv_data);
-
- fsnotify_put_group(fsn_event_priv->group);
- kmem_cache_free(event_priv_cachep, event_priv);
+ kfree(INOTIFY_E(fsn_event));
}
const struct fsnotify_ops inotify_fsnotify_ops = {
.handle_event = inotify_handle_event,
- .should_send_event = inotify_should_send_event,
.free_group_priv = inotify_free_group_priv,
- .free_event_priv = inotify_free_event_priv,
+ .free_event = inotify_free_event,
.freeing_mark = inotify_freeing_mark,
};
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 60f954a891ab..497395c8274b 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -50,7 +50,6 @@ static int inotify_max_queued_events __read_mostly;
static int inotify_max_user_watches __read_mostly;
static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
-struct kmem_cache *event_priv_cachep __read_mostly;
#ifdef CONFIG_SYSCTL
@@ -124,6 +123,16 @@ static unsigned int inotify_poll(struct file *file, poll_table *wait)
return ret;
}
+static int round_event_name_len(struct fsnotify_event *fsn_event)
+{
+ struct inotify_event_info *event;
+
+ event = INOTIFY_E(fsn_event);
+ if (!event->name_len)
+ return 0;
+ return roundup(event->name_len + 1, sizeof(struct inotify_event));
+}
+
/*
* Get an inotify_kernel_event if one exists and is small
* enough to fit in "count". Return an error pointer if
@@ -144,9 +153,7 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- if (event->name_len)
- event_size += roundup(event->name_len + 1, event_size);
-
+ event_size += round_event_name_len(event);
if (event_size > count)
return ERR_PTR(-EINVAL);
@@ -164,40 +171,27 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
* buffer we had in "get_one_event()" above.
*/
static ssize_t copy_event_to_user(struct fsnotify_group *group,
- struct fsnotify_event *event,
+ struct fsnotify_event *fsn_event,
char __user *buf)
{
struct inotify_event inotify_event;
- struct fsnotify_event_private_data *fsn_priv;
- struct inotify_event_private_data *priv;
+ struct inotify_event_info *event;
size_t event_size = sizeof(struct inotify_event);
- size_t name_len = 0;
-
- pr_debug("%s: group=%p event=%p\n", __func__, group, event);
+ size_t name_len;
+ size_t pad_name_len;
- /* we get the inotify watch descriptor from the event private data */
- spin_lock(&event->lock);
- fsn_priv = fsnotify_remove_priv_from_event(group, event);
- spin_unlock(&event->lock);
-
- if (!fsn_priv)
- inotify_event.wd = -1;
- else {
- priv = container_of(fsn_priv, struct inotify_event_private_data,
- fsnotify_event_priv_data);
- inotify_event.wd = priv->wd;
- inotify_free_event_priv(fsn_priv);
- }
+ pr_debug("%s: group=%p event=%p\n", __func__, group, fsn_event);
+ event = INOTIFY_E(fsn_event);
+ name_len = event->name_len;
/*
- * round up event->name_len so it is a multiple of event_size
+ * round up name length so it is a multiple of event_size
* plus an extra byte for the terminating '\0'.
*/
- if (event->name_len)
- name_len = roundup(event->name_len + 1, event_size);
- inotify_event.len = name_len;
-
- inotify_event.mask = inotify_mask_to_arg(event->mask);
+ pad_name_len = round_event_name_len(fsn_event);
+ inotify_event.len = pad_name_len;
+ inotify_event.mask = inotify_mask_to_arg(fsn_event->mask);
+ inotify_event.wd = event->wd;
inotify_event.cookie = event->sync_cookie;
/* send the main event */
@@ -209,20 +203,18 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
/*
* fsnotify only stores the pathname, so here we have to send the pathname
* and then pad that pathname out to a multiple of sizeof(inotify_event)
- * with zeros. I get my zeros from the nul_inotify_event.
+ * with zeros.
*/
- if (name_len) {
- unsigned int len_to_zero = name_len - event->name_len;
+ if (pad_name_len) {
/* copy the path name */
- if (copy_to_user(buf, event->file_name, event->name_len))
+ if (copy_to_user(buf, event->name, name_len))
return -EFAULT;
- buf += event->name_len;
+ buf += name_len;
/* fill userspace with 0's */
- if (clear_user(buf, len_to_zero))
+ if (clear_user(buf, pad_name_len - name_len))
return -EFAULT;
- buf += len_to_zero;
- event_size += name_len;
+ event_size += pad_name_len;
}
return event_size;
@@ -254,7 +246,7 @@ static ssize_t inotify_read(struct file *file, char __user *buf,
if (IS_ERR(kevent))
break;
ret = copy_event_to_user(group, kevent, buf);
- fsnotify_put_event(kevent);
+ fsnotify_destroy_event(group, kevent);
if (ret < 0)
break;
buf += ret;
@@ -297,8 +289,7 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct fsnotify_group *group;
- struct fsnotify_event_holder *holder;
- struct fsnotify_event *event;
+ struct fsnotify_event *fsn_event;
void __user *p;
int ret = -ENOTTY;
size_t send_len = 0;
@@ -311,12 +302,10 @@ static long inotify_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case FIONREAD:
mutex_lock(&group->notification_mutex);
- list_for_each_entry(holder, &group->notification_list, event_list) {
- event = holder->event;
+ list_for_each_entry(fsn_event, &group->notification_list,
+ list) {
send_len += sizeof(struct inotify_event);
- if (event->name_len)
- send_len += roundup(event->name_len + 1,
- sizeof(struct inotify_event));
+ send_len += round_event_name_len(fsn_event);
}
mutex_unlock(&group->notification_mutex);
ret = put_user(send_len, (int __user *) p);
@@ -503,43 +492,12 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
struct fsnotify_group *group)
{
struct inotify_inode_mark *i_mark;
- struct fsnotify_event *ignored_event, *notify_event;
- struct inotify_event_private_data *event_priv;
- struct fsnotify_event_private_data *fsn_event_priv;
- int ret;
-
- i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
-
- ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
- FSNOTIFY_EVENT_NONE, NULL, 0,
- GFP_NOFS);
- if (!ignored_event)
- goto skip_send_ignore;
-
- event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
- if (unlikely(!event_priv))
- goto skip_send_ignore;
-
- fsn_event_priv = &event_priv->fsnotify_event_priv_data;
-
- fsnotify_get_group(group);
- fsn_event_priv->group = group;
- event_priv->wd = i_mark->wd;
-
- notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL);
- if (notify_event) {
- if (IS_ERR(notify_event))
- ret = PTR_ERR(notify_event);
- else
- fsnotify_put_event(notify_event);
- inotify_free_event_priv(fsn_event_priv);
- }
-skip_send_ignore:
- /* matches the reference taken when the event was created */
- if (ignored_event)
- fsnotify_put_event(ignored_event);
+ /* Queue ignore event for the watch */
+ inotify_handle_event(group, NULL, fsn_mark, NULL, FS_IN_IGNORED,
+ NULL, FSNOTIFY_EVENT_NONE, NULL);
+ i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
/* remove this mark from the idr */
inotify_remove_from_idr(group, i_mark);
@@ -836,7 +794,6 @@ static int __init inotify_user_setup(void)
BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC);
- event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
inotify_max_queued_events = 16384;
inotify_max_user_instances = 128;
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index 7b51b05f160c..952237b8e2d2 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -48,15 +48,6 @@
#include <linux/fsnotify_backend.h>
#include "fsnotify.h"
-static struct kmem_cache *fsnotify_event_cachep;
-static struct kmem_cache *fsnotify_event_holder_cachep;
-/*
- * This is a magic event we send when the q is too full. Since it doesn't
- * hold real event information we just keep one system wide and use it any time
- * it is needed. It's refcnt is set 1 at kernel init time and will never
- * get set to 0 so it will never get 'freed'
- */
-static struct fsnotify_event *q_overflow_event;
static atomic_t fsnotify_sync_cookie = ATOMIC_INIT(0);
/**
@@ -76,60 +67,14 @@ bool fsnotify_notify_queue_is_empty(struct fsnotify_group *group)
return list_empty(&group->notification_list) ? true : false;
}
-void fsnotify_get_event(struct fsnotify_event *event)
+void fsnotify_destroy_event(struct fsnotify_group *group,
+ struct fsnotify_event *event)
{
- atomic_inc(&event->refcnt);
-}
-
-void fsnotify_put_event(struct fsnotify_event *event)
-{
- if (!event)
+ /* Overflow events are per-group and we don't want to free them */
+ if (!event || event->mask == FS_Q_OVERFLOW)
return;
- if (atomic_dec_and_test(&event->refcnt)) {
- pr_debug("%s: event=%p\n", __func__, event);
-
- if (event->data_type == FSNOTIFY_EVENT_PATH)
- path_put(&event->path);
-
- BUG_ON(!list_empty(&event->private_data_list));
-
- kfree(event->file_name);
- put_pid(event->tgid);
- kmem_cache_free(fsnotify_event_cachep, event);
- }
-}
-
-struct fsnotify_event_holder *fsnotify_alloc_event_holder(void)
-{
- return kmem_cache_alloc(fsnotify_event_holder_cachep, GFP_KERNEL);
-}
-
-void fsnotify_destroy_event_holder(struct fsnotify_event_holder *holder)
-{
- if (holder)
- kmem_cache_free(fsnotify_event_holder_cachep, holder);
-}
-
-/*
- * Find the private data that the group previously attached to this event when
- * the group added the event to the notification queue (fsnotify_add_notify_event)
- */
-struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnotify_group *group, struct fsnotify_event *event)
-{
- struct fsnotify_event_private_data *lpriv;
- struct fsnotify_event_private_data *priv = NULL;
-
- assert_spin_locked(&event->lock);
-
- list_for_each_entry(lpriv, &event->private_data_list, event_list) {
- if (lpriv->group == group) {
- priv = lpriv;
- list_del(&priv->event_list);
- break;
- }
- }
- return priv;
+ group->ops->free_event(event);
}
/*
@@ -137,91 +82,35 @@ struct fsnotify_event_private_data *fsnotify_remove_priv_from_event(struct fsnot
* event off the queue to deal with. If the event is successfully added to the
* group's notification queue, a reference is taken on event.
*/
-struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group, struct fsnotify_event *event,
- struct fsnotify_event_private_data *priv,
+struct fsnotify_event *fsnotify_add_notify_event(struct fsnotify_group *group,
+ struct fsnotify_event *event,
struct fsnotify_event *(*merge)(struct list_head *,
struct fsnotify_event *))
{
struct fsnotify_event *return_event = NULL;
- struct fsnotify_event_holder *holder = NULL;
struct list_head *list = &group->notification_list;
- pr_debug("%s: group=%p event=%p priv=%p\n", __func__, group, event, priv);
-
- /*
- * There is one fsnotify_event_holder embedded inside each fsnotify_event.
- * Check if we expect to be able to use that holder. If not alloc a new
- * holder.
- * For the overflow event it's possible that something will use the in
- * event holder before we get the lock so we may need to jump back and
- * alloc a new holder, this can't happen for most events...
- */
- if (!list_empty(&event->holder.event_list)) {
-alloc_holder:
- holder = fsnotify_alloc_event_holder();
- if (!holder)
- return ERR_PTR(-ENOMEM);
- }
+ pr_debug("%s: group=%p event=%p\n", __func__, group, event);
mutex_lock(&group->notification_mutex);
if (group->q_len >= group->max_events) {
- event = q_overflow_event;
-
- /*
- * we need to return the overflow event
- * which means we need a ref
- */
- fsnotify_get_event(event);
+ /* Queue overflow event only if it isn't already queued */
+ if (list_empty(&group->overflow_event.list))
+ event = &group->overflow_event;
return_event = event;
-
- /* sorry, no private data on the overflow event */
- priv = NULL;
}
if (!list_empty(list) && merge) {
- struct fsnotify_event *tmp;
-
- tmp = merge(list, event);
- if (tmp) {
- mutex_unlock(&group->notification_mutex);
-
- if (return_event)
- fsnotify_put_event(return_event);
- if (holder != &event->holder)
- fsnotify_destroy_event_holder(holder);
- return tmp;
- }
- }
-
- spin_lock(&event->lock);
-
- if (list_empty(&event->holder.event_list)) {
- if (unlikely(holder))
- fsnotify_destroy_event_holder(holder);
- holder = &event->holder;
- } else if (unlikely(!holder)) {
- /* between the time we checked above and got the lock the in
- * event holder was used, go back and get a new one */
- spin_unlock(&event->lock);
- mutex_unlock(&group->notification_mutex);
-
+ return_event = merge(list, event);
if (return_event) {
- fsnotify_put_event(return_event);
- return_event = NULL;
+ mutex_unlock(&group->notification_mutex);
+ return return_event;
}
-
- goto alloc_holder;
}
group->q_len++;
- holder->event = event;
-
- fsnotify_get_event(event);
- list_add_tail(&holder->event_list, list);
- if (priv)
- list_add_tail(&priv->event_list, &event->private_data_list);
- spin_unlock(&event->lock);
+ list_add_tail(&event->list, list);
mutex_unlock(&group->notification_mutex);
wake_up(&group->notification_waitq);
@@ -230,32 +119,20 @@ alloc_holder:
}
/*
- * Remove and return the first event from the notification list. There is a
- * reference held on this event since it was on the list. It is the responsibility
- * of the caller to drop this reference.
+ * Remove and return the first event from the notification list. It is the
+ * responsibility of the caller to destroy the obtained event
*/
struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group)
{
struct fsnotify_event *event;
- struct fsnotify_event_holder *holder;
BUG_ON(!mutex_is_locked(&group->notification_mutex));
pr_debug("%s: group=%p\n", __func__, group);
- holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
-
- event = holder->event;
-
- spin_lock(&event->lock);
- holder->event = NULL;
- list_del_init(&holder->event_list);
- spin_unlock(&event->lock);
-
- /* event == holder means we are referenced through the in event holder */
- if (holder != &event->holder)
- fsnotify_destroy_event_holder(holder);
-
+ event = list_first_entry(&group->notification_list,
+ struct fsnotify_event, list);
+ list_del(&event->list);
group->q_len--;
return event;
@@ -266,15 +143,10 @@ struct fsnotify_event *fsnotify_remove_notify_event(struct fsnotify_group *group
*/
struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
{
- struct fsnotify_event *event;
- struct fsnotify_event_holder *holder;
-
BUG_ON(!mutex_is_locked(&group->notification_mutex));
- holder = list_first_entry(&group->notification_list, struct fsnotify_event_holder, event_list);
- event = holder->event;
-
- return event;
+ return list_first_entry(&group->notification_list,
+ struct fsnotify_event, list);
}
/*
@@ -284,181 +156,31 @@ struct fsnotify_event *fsnotify_peek_notify_event(struct fsnotify_group *group)
void fsnotify_flush_notify(struct fsnotify_group *group)
{
struct fsnotify_event *event;
- struct fsnotify_event_private_data *priv;
mutex_lock(&group->notification_mutex);
while (!fsnotify_notify_queue_is_empty(group)) {
event = fsnotify_remove_notify_event(group);
- /* if they don't implement free_event_priv they better not have attached any */
- if (group->ops->free_event_priv) {
- spin_lock(&event->lock);
- priv = fsnotify_remove_priv_from_event(group, event);
- spin_unlock(&event->lock);
- if (priv)
- group->ops->free_event_priv(priv);
- }
- fsnotify_put_event(event); /* matches fsnotify_add_notify_event */
+ fsnotify_destroy_event(group, event);
}
mutex_unlock(&group->notification_mutex);
}
-static void initialize_event(struct fsnotify_event *event)
-{
- INIT_LIST_HEAD(&event->holder.event_list);
- atomic_set(&event->refcnt, 1);
-
- spin_lock_init(&event->lock);
-
- INIT_LIST_HEAD(&event->private_data_list);
-}
-
-/*
- * Caller damn well better be holding whatever mutex is protecting the
- * old_holder->event_list and the new_event must be a clean event which
- * cannot be found anywhere else in the kernel.
- */
-int fsnotify_replace_event(struct fsnotify_event_holder *old_holder,
- struct fsnotify_event *new_event)
-{
- struct fsnotify_event *old_event = old_holder->event;
- struct fsnotify_event_holder *new_holder = &new_event->holder;
-
- enum event_spinlock_class {
- SPINLOCK_OLD,
- SPINLOCK_NEW,
- };
-
- pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, new_event);
-
- /*
- * if the new_event's embedded holder is in use someone
- * screwed up and didn't give us a clean new event.
- */
- BUG_ON(!list_empty(&new_holder->event_list));
-
- spin_lock_nested(&old_event->lock, SPINLOCK_OLD);
- spin_lock_nested(&new_event->lock, SPINLOCK_NEW);
-
- new_holder->event = new_event;
- list_replace_init(&old_holder->event_list, &new_holder->event_list);
-
- spin_unlock(&new_event->lock);
- spin_unlock(&old_event->lock);
-
- /* event == holder means we are referenced through the in event holder */
- if (old_holder != &old_event->holder)
- fsnotify_destroy_event_holder(old_holder);
-
- fsnotify_get_event(new_event); /* on the list take reference */
- fsnotify_put_event(old_event); /* off the list, drop reference */
-
- return 0;
-}
-
-struct fsnotify_event *fsnotify_clone_event(struct fsnotify_event *old_event)
-{
- struct fsnotify_event *event;
-
- event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
- if (!event)
- return NULL;
-
- pr_debug("%s: old_event=%p new_event=%p\n", __func__, old_event, event);
-
- memcpy(event, old_event, sizeof(*event));
- initialize_event(event);
-
- if (event->name_len) {
- event->file_name = kstrdup(old_event->file_name, GFP_KERNEL);
- if (!event->file_name) {
- kmem_cache_free(fsnotify_event_cachep, event);
- return NULL;
- }
- }
- event->tgid = get_pid(old_event->tgid);
- if (event->data_type == FSNOTIFY_EVENT_PATH)
- path_get(&event->path);
-
- return event;
-}
-
/*
* fsnotify_create_event - Allocate a new event which will be sent to each
* group's handle_event function if the group was interested in this
* particular event.
*
- * @to_tell the inode which is supposed to receive the event (sometimes a
+ * @inode the inode which is supposed to receive the event (sometimes a
* parent of the inode to which the event happened.
* @mask what actually happened.
* @data pointer to the object which was actually affected
* @data_type flag indication if the data is a file, path, inode, nothing...
* @name the filename, if available
*/
-struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
- int data_type, const unsigned char *name,
- u32 cookie, gfp_t gfp)
+void fsnotify_init_event(struct fsnotify_event *event, struct inode *inode,
+ u32 mask)
{
- struct fsnotify_event *event;
-
- event = kmem_cache_zalloc(fsnotify_event_cachep, gfp);
- if (!event)
- return NULL;
-
- pr_debug("%s: event=%p to_tell=%p mask=%x data=%p data_type=%d\n",
- __func__, event, to_tell, mask, data, data_type);
-
- initialize_event(event);
-
- if (name) {
- event->file_name = kstrdup(name, gfp);
- if (!event->file_name) {
- kmem_cache_free(fsnotify_event_cachep, event);
- return NULL;
- }
- event->name_len = strlen(event->file_name);
- }
-
- event->tgid = get_pid(task_tgid(current));
- event->sync_cookie = cookie;
- event->to_tell = to_tell;
- event->data_type = data_type;
-
- switch (data_type) {
- case FSNOTIFY_EVENT_PATH: {
- struct path *path = data;
- event->path.dentry = path->dentry;
- event->path.mnt = path->mnt;
- path_get(&event->path);
- break;
- }
- case FSNOTIFY_EVENT_INODE:
- event->inode = data;
- break;
- case FSNOTIFY_EVENT_NONE:
- event->inode = NULL;
- event->path.dentry = NULL;
- event->path.mnt = NULL;
- break;
- default:
- BUG();
- }
-
+ INIT_LIST_HEAD(&event->list);
+ event->inode = inode;
event->mask = mask;
-
- return event;
-}
-
-static __init int fsnotify_notification_init(void)
-{
- fsnotify_event_cachep = KMEM_CACHE(fsnotify_event, SLAB_PANIC);
- fsnotify_event_holder_cachep = KMEM_CACHE(fsnotify_event_holder, SLAB_PANIC);
-
- q_overflow_event = fsnotify_create_event(NULL, FS_Q_OVERFLOW, NULL,
- FSNOTIFY_EVENT_NONE, NULL, 0,
- GFP_KERNEL);
- if (!q_overflow_event)
- panic("unable to allocate fsnotify q_overflow_event\n");
-
- return 0;
}
-subsys_initcall(fsnotify_notification_init);
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index f17e58b32989..ce210d4951a1 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -38,7 +38,6 @@ ocfs2-objs := \
symlink.o \
sysfile.o \
uptodate.o \
- ver.o \
quota_local.o \
quota_global.o \
xattr.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index dc7411fe185d..e2edff38be52 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -4742,6 +4742,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
enum ocfs2_alloc_restarted *reason_ret)
{
int status = 0, err = 0;
+ int need_free = 0;
int free_extents;
enum ocfs2_alloc_restarted reason = RESTART_NONE;
u32 bit_off, num_bits;
@@ -4796,7 +4797,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ need_free = 1;
+ goto bail;
}
block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
@@ -4807,7 +4809,8 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
num_bits, flags, meta_ac);
if (status < 0) {
mlog_errno(status);
- goto leave;
+ need_free = 1;
+ goto bail;
}
ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4821,6 +4824,19 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
reason = RESTART_TRANS;
}
+bail:
+ if (need_free) {
+ if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ bit_off, num_bits);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, bit_off),
+ num_bits);
+ }
+
leave:
if (reason_ret)
*reason_ret = reason;
@@ -6805,6 +6821,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
struct buffer_head *di_bh)
{
int ret, i, has_data, num_pages = 0;
+ int need_free = 0;
+ u32 bit_off, num;
handle_t *handle;
u64 uninitialized_var(block);
struct ocfs2_inode_info *oi = OCFS2_I(inode);
@@ -6850,7 +6868,6 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
}
if (has_data) {
- u32 bit_off, num;
unsigned int page_end;
u64 phys;
@@ -6886,6 +6903,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -6896,6 +6914,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -6927,6 +6946,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
if (ret) {
mlog_errno(ret);
+ need_free = 1;
goto out_commit;
}
@@ -6938,6 +6958,18 @@ out_commit:
dquot_free_space_nodirty(inode,
ocfs2_clusters_to_bytes(osb->sb, 1));
+ if (need_free) {
+ if (data_ac->ac_which == OCFS2_AC_USE_LOCAL)
+ ocfs2_free_local_alloc_bits(osb, handle, data_ac,
+ bit_off, num);
+ else
+ ocfs2_free_clusters(handle,
+ data_ac->ac_inode,
+ data_ac->ac_bh,
+ ocfs2_clusters_to_blocks(osb->sb, bit_off),
+ num);
+ }
+
ocfs2_commit_trans(osb, handle);
out_unlock:
@@ -7126,7 +7158,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
if (end > i_size_read(inode))
end = i_size_read(inode);
- BUG_ON(start >= end);
+ BUG_ON(start > end);
if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
@@ -7260,14 +7292,8 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
start = range->start >> osb->s_clustersize_bits;
len = range->len >> osb->s_clustersize_bits;
minlen = range->minlen >> osb->s_clustersize_bits;
- trimmed = 0;
- if (!len) {
- range->len = 0;
- return 0;
- }
-
- if (minlen >= osb->bitmap_cpg)
+ if (minlen >= osb->bitmap_cpg || range->len < sb->s_blocksize)
return -EINVAL;
main_bm_inode = ocfs2_get_system_file_inode(osb,
@@ -7293,6 +7319,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
goto out_unlock;
}
+ len = range->len >> osb->s_clustersize_bits;
if (start + len > le32_to_cpu(main_bm->i_clusters))
len = le32_to_cpu(main_bm->i_clusters) - start;
@@ -7307,6 +7334,7 @@ int ocfs2_trim_fs(struct super_block *sb, struct fstrim_range *range)
last_group = ocfs2_which_cluster_group(main_bm_inode, start + len - 1);
last_bit = osb->bitmap_cpg;
+ trimmed = 0;
for (group = first_group; group <= last_group;) {
if (first_bit + len >= osb->bitmap_cpg)
last_bit = osb->bitmap_cpg;
diff --git a/fs/ocfs2/cluster/Makefile b/fs/ocfs2/cluster/Makefile
index bc8c5e7d8608..1aefc0350ec3 100644
--- a/fs/ocfs2/cluster/Makefile
+++ b/fs/ocfs2/cluster/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_OCFS2_FS) += ocfs2_nodemanager.o
ocfs2_nodemanager-objs := heartbeat.o masklog.o sys.o nodemanager.o \
- quorum.o tcp.o netdebug.o ver.o
+ quorum.o tcp.o netdebug.o
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index bb240647ca5f..441c84e169e6 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -29,7 +29,6 @@
#include "heartbeat.h"
#include "masklog.h"
#include "sys.h"
-#include "ver.h"
/* for now we operate under the assertion that there can be only one
* cluster active at a time. Changing this will require trickling
@@ -945,8 +944,6 @@ static int __init init_o2nm(void)
{
int ret = -1;
- cluster_print_version();
-
ret = o2hb_init();
if (ret)
goto out;
@@ -984,6 +981,7 @@ out:
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 cluster management");
module_init(init_o2nm)
module_exit(exit_o2nm)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2cd2406b4140..1828201bc901 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1826,7 +1826,7 @@ int o2net_register_hb_callbacks(void)
/* ------------------------------------------------------------ */
-static int o2net_accept_one(struct socket *sock)
+static int o2net_accept_one(struct socket *sock, int *more)
{
int ret, slen;
struct sockaddr_in sin;
@@ -1837,6 +1837,7 @@ static int o2net_accept_one(struct socket *sock)
struct o2net_node *nn;
BUG_ON(sock == NULL);
+ *more = 0;
ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type,
sock->sk->sk_protocol, &new_sock);
if (ret)
@@ -1848,6 +1849,7 @@ static int o2net_accept_one(struct socket *sock)
if (ret < 0)
goto out;
+ *more = 1;
new_sock->sk->sk_allocation = GFP_ATOMIC;
ret = o2net_set_nodelay(new_sock);
@@ -1949,8 +1951,15 @@ out:
static void o2net_accept_many(struct work_struct *work)
{
struct socket *sock = o2net_listen_sock;
- while (o2net_accept_one(sock) == 0)
+ int more;
+ int err;
+
+ for (;;) {
+ err = o2net_accept_one(sock, &more);
+ if (!more)
+ break;
cond_resched();
+ }
}
static void o2net_listen_data_ready(struct sock *sk, int bytes)
@@ -1964,18 +1973,30 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes)
goto out;
}
- /* ->sk_data_ready is also called for a newly established child socket
- * before it has been accepted and the acceptor has set up their
- * data_ready.. we only want to queue listen work for our listening
- * socket */
+ /* This callback may called twice when a new connection
+ * is being established as a child socket inherits everything
+ * from a parent LISTEN socket, including the data_ready cb of
+ * the parent. This leads to a hazard. In o2net_accept_one()
+ * we are still initializing the child socket but have not
+ * changed the inherited data_ready callback yet when
+ * data starts arriving.
+ * We avoid this hazard by checking the state.
+ * For the listening socket, the state will be TCP_LISTEN; for the new
+ * socket, will be TCP_ESTABLISHED. Also, in this case,
+ * sk->sk_user_data is not a valid function pointer.
+ */
+
if (sk->sk_state == TCP_LISTEN) {
mlog(ML_TCP, "bytes: %d\n", bytes);
queue_work(o2net_wq, &o2net_listen_work);
+ } else {
+ ready = NULL;
}
out:
read_unlock(&sk->sk_callback_lock);
- ready(sk, bytes);
+ if (ready != NULL)
+ ready(sk, bytes);
}
static int o2net_open_listening_sock(__be32 addr, __be16 port)
diff --git a/fs/ocfs2/cluster/ver.c b/fs/ocfs2/cluster/ver.c
deleted file mode 100644
index a56eee6abad3..000000000000
--- a/fs/ocfs2/cluster/ver.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "ver.h"
-
-#define CLUSTER_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 Node Manager " CLUSTER_BUILD_VERSION
-
-void cluster_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(CLUSTER_BUILD_VERSION);
diff --git a/fs/ocfs2/cluster/ver.h b/fs/ocfs2/cluster/ver.h
deleted file mode 100644
index 32554c3382c2..000000000000
--- a/fs/ocfs2/cluster/ver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef O2CLUSTER_VER_H
-#define O2CLUSTER_VER_H
-
-void cluster_print_version(void);
-
-#endif /* O2CLUSTER_VER_H */
diff --git a/fs/ocfs2/dlm/Makefile b/fs/ocfs2/dlm/Makefile
index c8a044efbb15..bd1aab1f49a4 100644
--- a/fs/ocfs2/dlm/Makefile
+++ b/fs/ocfs2/dlm/Makefile
@@ -3,5 +3,5 @@ ccflags-y := -Ifs/ocfs2
obj-$(CONFIG_OCFS2_FS_O2CB) += ocfs2_dlm.o
ocfs2_dlm-objs := dlmdomain.o dlmdebug.o dlmthread.o dlmrecovery.o \
- dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o dlmver.o
+ dlmmaster.o dlmast.o dlmconvert.o dlmlock.o dlmunlock.o
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 8b3382abf840..33660a4a52fa 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -43,8 +43,6 @@
#include "dlmdomain.h"
#include "dlmdebug.h"
-#include "dlmver.h"
-
#define MLOG_MASK_PREFIX (ML_DLM|ML_DLM_DOMAIN)
#include "cluster/masklog.h"
@@ -2328,8 +2326,6 @@ static int __init dlm_init(void)
{
int status;
- dlm_print_version();
-
status = dlm_init_mle_cache();
if (status) {
mlog(ML_ERROR, "Could not create o2dlm_mle slabcache\n");
@@ -2379,6 +2375,7 @@ static void __exit dlm_exit (void)
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 Distributed Lock Management");
module_init(dlm_init);
module_exit(dlm_exit);
diff --git a/fs/ocfs2/dlm/dlmver.c b/fs/ocfs2/dlm/dlmver.c
deleted file mode 100644
index dfc0da4d158d..000000000000
--- a/fs/ocfs2/dlm/dlmver.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "dlmver.h"
-
-#define DLM_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 DLM " DLM_BUILD_VERSION
-
-void dlm_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(DLM_BUILD_VERSION);
diff --git a/fs/ocfs2/dlm/dlmver.h b/fs/ocfs2/dlm/dlmver.h
deleted file mode 100644
index f674aee77a16..000000000000
--- a/fs/ocfs2/dlm/dlmver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmfsver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef DLM_VER_H
-#define DLM_VER_H
-
-void dlm_print_version(void);
-
-#endif /* DLM_VER_H */
diff --git a/fs/ocfs2/dlmfs/Makefile b/fs/ocfs2/dlmfs/Makefile
index f14be89a6701..eed3db8c5b49 100644
--- a/fs/ocfs2/dlmfs/Makefile
+++ b/fs/ocfs2/dlmfs/Makefile
@@ -2,4 +2,4 @@ ccflags-y := -Ifs/ocfs2
obj-$(CONFIG_OCFS2_FS) += ocfs2_dlmfs.o
-ocfs2_dlmfs-objs := userdlm.o dlmfs.o dlmfsver.o
+ocfs2_dlmfs-objs := userdlm.o dlmfs.o
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index efa2b3d339e3..09b7d9dac71d 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -49,7 +49,6 @@
#include "stackglue.h"
#include "userdlm.h"
-#include "dlmfsver.h"
#define MLOG_MASK_PREFIX ML_DLMFS
#include "cluster/masklog.h"
@@ -644,8 +643,6 @@ static int __init init_dlmfs_fs(void)
int status;
int cleanup_inode = 0, cleanup_worker = 0;
- dlmfs_print_version();
-
status = bdi_init(&dlmfs_backing_dev_info);
if (status)
return status;
@@ -701,6 +698,7 @@ static void __exit exit_dlmfs_fs(void)
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 DLM-Filesystem");
module_init(init_dlmfs_fs)
module_exit(exit_dlmfs_fs)
diff --git a/fs/ocfs2/dlmfs/dlmfsver.c b/fs/ocfs2/dlmfs/dlmfsver.c
deleted file mode 100644
index a733b3321f83..000000000000
--- a/fs/ocfs2/dlmfs/dlmfsver.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmfsver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-
-#include "dlmfsver.h"
-
-#define DLM_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 DLMFS " DLM_BUILD_VERSION
-
-void dlmfs_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(DLM_BUILD_VERSION);
diff --git a/fs/ocfs2/dlmfs/dlmfsver.h b/fs/ocfs2/dlmfs/dlmfsver.h
deleted file mode 100644
index f35eadbed25c..000000000000
--- a/fs/ocfs2/dlmfs/dlmfsver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * dlmver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef DLMFS_VER_H
-#define DLMFS_VER_H
-
-void dlmfs_print_version(void);
-
-#endif /* DLMFS_VER_H */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 3407b2c62b21..19986959d149 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2996,6 +2996,8 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
/* for now, uuid == domain */
status = ocfs2_cluster_connect(osb->osb_cluster_stack,
+ osb->osb_cluster_name,
+ strlen(osb->osb_cluster_name),
osb->uuid_str,
strlen(osb->uuid_str),
&lproto, ocfs2_do_node_down, osb,
@@ -3005,7 +3007,7 @@ int ocfs2_dlm_init(struct ocfs2_super *osb)
goto bail;
}
- status = ocfs2_cluster_this_node(&osb->node_num);
+ status = ocfs2_cluster_this_node(conn, &osb->node_num);
if (status < 0) {
mlog_errno(status);
mlog(ML_ERROR,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6fff128cad16..a2d20c58ef07 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -185,6 +185,9 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end,
file->f_path.dentry->d_name.name,
(unsigned long long)datasync);
+ if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb))
+ return 0;
+
err = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (err)
return err;
@@ -474,11 +477,6 @@ static int ocfs2_truncate_file(struct inode *inode,
goto bail;
}
- /* lets handle the simple truncate cases before doing any more
- * cluster locking. */
- if (new_i_size == le64_to_cpu(fe->i_size))
- goto bail;
-
down_write(&OCFS2_I(inode)->ip_alloc_sem);
ocfs2_resv_discard(&osb->osb_la_resmap,
@@ -718,7 +716,8 @@ leave:
* While a write will already be ordering the data, a truncate will not.
* Thus, we need to explicitly order the zeroed pages.
*/
-static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode,
+ struct buffer_head *di_bh)
{
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
handle_t *handle = NULL;
@@ -735,7 +734,14 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
}
ret = ocfs2_jbd2_file_inode(handle, inode);
- if (ret < 0)
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret)
mlog_errno(ret);
out:
@@ -751,7 +757,7 @@ out:
* to be too fragile to do exactly what we need without us having to
* worry about recursive locking in ->write_begin() and ->write_end(). */
static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
- u64 abs_to)
+ u64 abs_to, struct buffer_head *di_bh)
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
@@ -759,6 +765,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
handle_t *handle = NULL;
int ret = 0;
unsigned zero_from, zero_to, block_start, block_end;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
BUG_ON(abs_from >= abs_to);
BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
@@ -801,7 +808,8 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
}
if (!handle) {
- handle = ocfs2_zero_start_ordered_transaction(inode);
+ handle = ocfs2_zero_start_ordered_transaction(inode,
+ di_bh);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
handle = NULL;
@@ -818,8 +826,22 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
ret = 0;
}
- if (handle)
+ if (handle) {
+ /*
+ * fs-writeback will release the dirty pages without page lock
+ * whose offset are over inode size, the release happens at
+ * block_write_full_page_endio().
+ */
+ i_size_write(inode, abs_to);
+ inode->i_blocks = ocfs2_inode_sector_count(inode);
+ di->i_size = cpu_to_le64((u64)i_size_read(inode));
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ di->i_mtime_nsec = di->i_ctime_nsec;
+ ocfs2_journal_dirty(handle, di_bh);
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
+ }
out_unlock:
unlock_page(page);
@@ -915,7 +937,7 @@ out:
* has made sure that the entire range needs zeroing.
*/
static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
- u64 range_end)
+ u64 range_end, struct buffer_head *di_bh)
{
int rc = 0;
u64 next_pos;
@@ -931,7 +953,7 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
if (next_pos > range_end)
next_pos = range_end;
- rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+ rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
if (rc < 0) {
mlog_errno(rc);
break;
@@ -977,7 +999,7 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
range_end = zero_to_size;
ret = ocfs2_zero_extend_range(inode, range_start,
- range_end);
+ range_end, di_bh);
if (ret) {
mlog_errno(ret);
break;
@@ -1145,14 +1167,14 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
goto bail_unlock_rw;
}
- if (size_change && attr->ia_size != i_size_read(inode)) {
+ if (size_change) {
status = inode_newsize_ok(inode, attr->ia_size);
if (status)
goto bail_unlock;
inode_dio_wait(inode);
- if (i_size_read(inode) > attr->ia_size) {
+ if (i_size_read(inode) >= attr->ia_size) {
if (ocfs2_should_order_data(inode)) {
status = ocfs2_begin_ordered_truncate(inode,
attr->ia_size);
@@ -1869,7 +1891,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
}
size = sr->l_start + sr->l_len;
- if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) {
+ if (cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64 ||
+ cmd == OCFS2_IOC_UNRESVSP || cmd == OCFS2_IOC_UNRESVSP64) {
if (sr->l_len <= 0) {
ret = -EINVAL;
goto out_inode_unlock;
@@ -2622,7 +2645,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence)
case SEEK_SET:
break;
case SEEK_END:
- offset += inode->i_size;
+ /* SEEK_END requires the OCFS2 inode lock for the file
+ * because it references the file's size.
+ */
+ ret = ocfs2_inode_lock(inode, NULL, 0);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ offset += i_size_read(inode);
+ ocfs2_inode_unlock(inode, 0);
break;
case SEEK_CUR:
if (offset == 0) {
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index fa32ce9b455d..8ca3c29accbf 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
+#include <linux/blkdev.h>
#include <linux/compat.h>
#include <cluster/masklog.h>
@@ -966,15 +967,21 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FITRIM:
{
struct super_block *sb = inode->i_sb;
+ struct request_queue *q = bdev_get_queue(sb->s_bdev);
struct fstrim_range range;
int ret = 0;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
+ if (!blk_queue_discard(q))
+ return -EOPNOTSUPP;
+
if (copy_from_user(&range, argp, sizeof(range)))
return -EFAULT;
+ range.minlen = max_t(u64, q->limits.discard_granularity,
+ range.minlen);
ret = ocfs2_trim_fs(sb, &range);
if (ret < 0)
return ret;
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index cd5496b7a0a3..25ec3b712d5f 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -781,6 +781,46 @@ bail:
return status;
}
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 bit_off,
+ u32 num_bits)
+{
+ int status, start;
+ struct inode *local_alloc_inode;
+ void *bitmap;
+ struct ocfs2_dinode *alloc;
+ struct ocfs2_local_alloc *la;
+
+ BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL);
+
+ local_alloc_inode = ac->ac_inode;
+ alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
+ la = OCFS2_LOCAL_ALLOC(alloc);
+
+ bitmap = la->la_bitmap;
+ start = bit_off - le32_to_cpu(la->la_bm_off);
+
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(local_alloc_inode),
+ osb->local_alloc_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+
+ while (num_bits--)
+ ocfs2_clear_bit(start++, bitmap);
+
+ le32_add_cpu(&alloc->id1.bitmap1.i_used, -num_bits);
+ ocfs2_journal_dirty(handle, osb->local_alloc_bh);
+
+bail:
+ return status;
+}
+
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
{
u32 count;
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 1be9b5864460..44a7d1fb2dec 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -55,6 +55,12 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
u32 *bit_off,
u32 *num_bits);
+int ocfs2_free_local_alloc_bits(struct ocfs2_super *osb,
+ handle_t *handle,
+ struct ocfs2_alloc_context *ac,
+ u32 bit_off,
+ u32 num_bits);
+
void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
unsigned int num_clusters);
void ocfs2_la_enable_worker(struct work_struct *work);
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 631a98213474..64c304d668f0 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -561,83 +561,6 @@ static void ocfs2_probe_alloc_group(struct inode *inode, struct buffer_head *bh,
mlog(0, "found phys_cpos: %u to fit the wanted moving.\n", *phys_cpos);
}
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
- handle_t *handle,
- struct buffer_head *di_bh,
- u32 num_bits,
- u16 chain)
-{
- int ret;
- u32 tmp_used;
- struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
- struct ocfs2_chain_list *cl =
- (struct ocfs2_chain_list *) &di->id2.i_chain;
-
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
-
- tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
- di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
- le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
- ocfs2_journal_dirty(handle, di_bh);
-
-out:
- return ret;
-}
-
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits)
-{
- int status;
- void *bitmap = bg->bg_bitmap;
- int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
-
- /* All callers get the descriptor via
- * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
- BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
- BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
-
- mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
- num_bits);
-
- if (ocfs2_is_cluster_bitmap(alloc_inode))
- journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
-
- status = ocfs2_journal_access_gd(handle,
- INODE_CACHE(alloc_inode),
- group_bh,
- journal_type);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
- le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
- if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) {
- ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit"
- " count %u but claims %u are freed. num_bits %d",
- (unsigned long long)le64_to_cpu(bg->bg_blkno),
- le16_to_cpu(bg->bg_bits),
- le16_to_cpu(bg->bg_free_bits_count), num_bits);
- return -EROFS;
- }
- while (num_bits--)
- ocfs2_set_bit(bit_off++, bitmap);
-
- ocfs2_journal_dirty(handle, group_bh);
-
-bail:
- return status;
-}
-
static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
u32 cpos, u32 phys_cpos, u32 *new_phys_cpos,
u32 len, int ext_flags)
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 4f791f6d27d0..179661a21b61 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -644,6 +644,7 @@ static int ocfs2_link(struct dentry *old_dentry,
struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
+ u64 old_de_ino;
trace_ocfs2_link((unsigned long long)OCFS2_I(inode)->ip_blkno,
old_dentry->d_name.len, old_dentry->d_name.name,
@@ -666,6 +667,18 @@ static int ocfs2_link(struct dentry *old_dentry,
goto out;
}
+ err = ocfs2_lookup_ino_from_name(dir, old_dentry->d_name.name,
+ old_dentry->d_name.len, &old_de_ino);
+ if (err) {
+ err = -ENOENT;
+ goto out;
+ }
+
+ if (old_de_ino != OCFS2_I(inode)->ip_blkno) {
+ err = -ENOENT;
+ goto out;
+ }
+
err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
dentry->d_name.len);
if (err)
@@ -954,6 +967,65 @@ leave:
return status;
}
+static int ocfs2_check_if_ancestor(struct ocfs2_super *osb,
+ u64 src_inode_no, u64 dest_inode_no)
+{
+ int ret = 0, i = 0;
+ u64 parent_inode_no = 0;
+ u64 child_inode_no = src_inode_no;
+ struct inode *child_inode;
+
+#define MAX_LOOKUP_TIMES 32
+ while (1) {
+ child_inode = ocfs2_iget(osb, child_inode_no, 0, 0);
+ if (IS_ERR(child_inode)) {
+ ret = PTR_ERR(child_inode);
+ break;
+ }
+
+ ret = ocfs2_inode_lock(child_inode, NULL, 0);
+ if (ret < 0) {
+ iput(child_inode);
+ if (ret != -ENOENT)
+ mlog_errno(ret);
+ break;
+ }
+
+ ret = ocfs2_lookup_ino_from_name(child_inode, "..", 2,
+ &parent_inode_no);
+ ocfs2_inode_unlock(child_inode, 0);
+ iput(child_inode);
+ if (ret < 0) {
+ ret = -ENOENT;
+ break;
+ }
+
+ if (parent_inode_no == dest_inode_no) {
+ ret = 1;
+ break;
+ }
+
+ if (parent_inode_no == osb->root_inode->i_ino) {
+ ret = 0;
+ break;
+ }
+
+ child_inode_no = parent_inode_no;
+
+ if (++i >= MAX_LOOKUP_TIMES) {
+ mlog(ML_NOTICE, "max lookup times reached, filesystem "
+ "may have nested directories, "
+ "src inode: %llu, dest inode: %llu.\n",
+ (unsigned long long)src_inode_no,
+ (unsigned long long)dest_inode_no);
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
+
/*
* The only place this should be used is rename!
* if they have the same id, then the 1st one is the only one locked.
@@ -965,6 +1037,7 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
struct inode *inode2)
{
int status;
+ int inode1_is_ancestor, inode2_is_ancestor;
struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
struct buffer_head **tmpbh;
@@ -978,9 +1051,26 @@ static int ocfs2_double_lock(struct ocfs2_super *osb,
if (*bh2)
*bh2 = NULL;
- /* we always want to lock the one with the lower lockid first. */
+ /* we always want to lock the one with the lower lockid first.
+ * and if they are nested, we lock ancestor first */
if (oi1->ip_blkno != oi2->ip_blkno) {
- if (oi1->ip_blkno < oi2->ip_blkno) {
+ inode1_is_ancestor = ocfs2_check_if_ancestor(osb, oi2->ip_blkno,
+ oi1->ip_blkno);
+ if (inode1_is_ancestor < 0) {
+ status = inode1_is_ancestor;
+ goto bail;
+ }
+
+ inode2_is_ancestor = ocfs2_check_if_ancestor(osb, oi1->ip_blkno,
+ oi2->ip_blkno);
+ if (inode2_is_ancestor < 0) {
+ status = inode2_is_ancestor;
+ goto bail;
+ }
+
+ if ((inode1_is_ancestor == 1) ||
+ (oi1->ip_blkno < oi2->ip_blkno &&
+ inode2_is_ancestor == 0)) {
/* switch id1 and id2 around */
tmpbh = bh2;
bh2 = bh1;
@@ -1097,6 +1187,22 @@ static int ocfs2_rename(struct inode *old_dir,
goto bail;
}
rename_lock = 1;
+
+ /* here we cannot guarantee the inodes haven't just been
+ * changed, so check if they are nested again */
+ status = ocfs2_check_if_ancestor(osb, new_dir->i_ino,
+ old_inode->i_ino);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ } else if (status == 1) {
+ status = -EPERM;
+ mlog(ML_ERROR, "src inode %llu should not be ancestor "
+ "of new dir inode %llu\n",
+ (unsigned long long)old_inode->i_ino,
+ (unsigned long long)new_dir->i_ino);
+ goto bail;
+ }
}
/* if old and new are the same, this'll just do one lock. */
@@ -2101,17 +2207,17 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
goto leave;
}
- /* remove it from the orphan directory */
- status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
+ status = ocfs2_journal_access_di(handle,
+ INODE_CACHE(orphan_dir_inode),
+ orphan_dir_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
if (status < 0) {
mlog_errno(status);
goto leave;
}
- status = ocfs2_journal_access_di(handle,
- INODE_CACHE(orphan_dir_inode),
- orphan_dir_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
+ /* remove it from the orphan directory */
+ status = ocfs2_delete_entry(handle, orphan_dir_inode, &lookup);
if (status < 0) {
mlog_errno(status);
goto leave;
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 3a903470c794..553f53cc73ae 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -387,6 +387,7 @@ struct ocfs2_super
u8 osb_stackflags;
char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
+ char osb_cluster_name[OCFS2_CLUSTER_NAME_LEN + 1];
struct ocfs2_cluster_connection *cconn;
struct ocfs2_lock_res osb_super_lockres;
struct ocfs2_lock_res osb_rename_lockres;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index bf1f8930456f..1724d43d3da1 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -398,7 +398,8 @@ static int o2cb_cluster_disconnect(struct ocfs2_cluster_connection *conn)
return 0;
}
-static int o2cb_cluster_this_node(unsigned int *node)
+static int o2cb_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *node)
{
int node_num;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 286edf1e231f..13a8537d8e8b 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -23,6 +23,7 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/reboot.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
#include "stackglue.h"
@@ -102,6 +103,12 @@
#define OCFS2_TEXT_UUID_LEN 32
#define OCFS2_CONTROL_MESSAGE_VERNUM_LEN 2
#define OCFS2_CONTROL_MESSAGE_NODENUM_LEN 8
+#define VERSION_LOCK "version_lock"
+
+enum ocfs2_connection_type {
+ WITH_CONTROLD,
+ NO_CONTROLD
+};
/*
* ocfs2_live_connection is refcounted because the filesystem and
@@ -110,6 +117,13 @@
struct ocfs2_live_connection {
struct list_head oc_list;
struct ocfs2_cluster_connection *oc_conn;
+ enum ocfs2_connection_type oc_type;
+ atomic_t oc_this_node;
+ int oc_our_slot;
+ struct dlm_lksb oc_version_lksb;
+ char oc_lvb[DLM_LVB_LEN];
+ struct completion oc_sync_wait;
+ wait_queue_head_t oc_wait;
};
struct ocfs2_control_private {
@@ -198,20 +212,15 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
* mount path. Since the VFS prevents multiple calls to
* fill_super(), we can't get dupes here.
*/
-static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
- struct ocfs2_live_connection **c_ret)
+static int ocfs2_live_connection_attach(struct ocfs2_cluster_connection *conn,
+ struct ocfs2_live_connection *c)
{
int rc = 0;
- struct ocfs2_live_connection *c;
-
- c = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
- if (!c)
- return -ENOMEM;
mutex_lock(&ocfs2_control_lock);
c->oc_conn = conn;
- if (atomic_read(&ocfs2_control_opened))
+ if ((c->oc_type == NO_CONTROLD) || atomic_read(&ocfs2_control_opened))
list_add(&c->oc_list, &ocfs2_live_connection_list);
else {
printk(KERN_ERR
@@ -220,12 +229,6 @@ static int ocfs2_live_connection_new(struct ocfs2_cluster_connection *conn,
}
mutex_unlock(&ocfs2_control_lock);
-
- if (!rc)
- *c_ret = c;
- else
- kfree(c);
-
return rc;
}
@@ -799,18 +802,251 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
return 0;
}
+static void lvb_to_version(char *lvb, struct ocfs2_protocol_version *ver)
+{
+ struct ocfs2_protocol_version *pv =
+ (struct ocfs2_protocol_version *)lvb;
+ /*
+ * ocfs2_protocol_version has two u8 variables, so we don't
+ * need any endian conversion.
+ */
+ ver->pv_major = pv->pv_major;
+ ver->pv_minor = pv->pv_minor;
+}
+
+static void version_to_lvb(struct ocfs2_protocol_version *ver, char *lvb)
+{
+ struct ocfs2_protocol_version *pv =
+ (struct ocfs2_protocol_version *)lvb;
+ /*
+ * ocfs2_protocol_version has two u8 variables, so we don't
+ * need any endian conversion.
+ */
+ pv->pv_major = ver->pv_major;
+ pv->pv_minor = ver->pv_minor;
+}
+
+static void sync_wait_cb(void *arg)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ complete(&lc->oc_sync_wait);
+}
+
+static int sync_unlock(struct ocfs2_cluster_connection *conn,
+ struct dlm_lksb *lksb, char *name)
+{
+ int error;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+
+ error = dlm_unlock(conn->cc_lockspace, lksb->sb_lkid, 0, lksb, conn);
+ if (error) {
+ printk(KERN_ERR "%s lkid %x error %d\n",
+ name, lksb->sb_lkid, error);
+ return error;
+ }
+
+ wait_for_completion(&lc->oc_sync_wait);
+
+ if (lksb->sb_status != -DLM_EUNLOCK) {
+ printk(KERN_ERR "%s lkid %x status %d\n",
+ name, lksb->sb_lkid, lksb->sb_status);
+ return -1;
+ }
+ return 0;
+}
+
+static int sync_lock(struct ocfs2_cluster_connection *conn,
+ int mode, uint32_t flags,
+ struct dlm_lksb *lksb, char *name)
+{
+ int error, status;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+
+ error = dlm_lock(conn->cc_lockspace, mode, lksb, flags,
+ name, strlen(name),
+ 0, sync_wait_cb, conn, NULL);
+ if (error) {
+ printk(KERN_ERR "%s lkid %x flags %x mode %d error %d\n",
+ name, lksb->sb_lkid, flags, mode, error);
+ return error;
+ }
+
+ wait_for_completion(&lc->oc_sync_wait);
+
+ status = lksb->sb_status;
+
+ if (status && status != -EAGAIN) {
+ printk(KERN_ERR "%s lkid %x flags %x mode %d status %d\n",
+ name, lksb->sb_lkid, flags, mode, status);
+ }
+
+ return status;
+}
+
+
+static int version_lock(struct ocfs2_cluster_connection *conn, int mode,
+ int flags)
+{
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ return sync_lock(conn, mode, flags,
+ &lc->oc_version_lksb, VERSION_LOCK);
+}
+
+static int version_unlock(struct ocfs2_cluster_connection *conn)
+{
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ return sync_unlock(conn, &lc->oc_version_lksb, VERSION_LOCK);
+}
+
+/* get_protocol_version()
+ *
+ * To exchange ocfs2 versioning, we use the LVB of the version dlm lock.
+ * The algorithm is:
+ * 1. Attempt to take the lock in EX mode (non-blocking).
+ * 2. If successful (which means it is the first mount), write the
+ * version number and downconvert to PR lock.
+ * 3. If unsuccessful (returns -EAGAIN), read the version from the LVB after
+ * taking the PR lock.
+ */
+
+static int get_protocol_version(struct ocfs2_cluster_connection *conn)
+{
+ int ret;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ struct ocfs2_protocol_version pv;
+
+ running_proto.pv_major =
+ ocfs2_user_plugin.sp_max_proto.pv_major;
+ running_proto.pv_minor =
+ ocfs2_user_plugin.sp_max_proto.pv_minor;
+
+ lc->oc_version_lksb.sb_lvbptr = lc->oc_lvb;
+ ret = version_lock(conn, DLM_LOCK_EX,
+ DLM_LKF_VALBLK|DLM_LKF_NOQUEUE);
+ if (!ret) {
+ conn->cc_version.pv_major = running_proto.pv_major;
+ conn->cc_version.pv_minor = running_proto.pv_minor;
+ version_to_lvb(&running_proto, lc->oc_lvb);
+ version_lock(conn, DLM_LOCK_PR, DLM_LKF_CONVERT|DLM_LKF_VALBLK);
+ } else if (ret == -EAGAIN) {
+ ret = version_lock(conn, DLM_LOCK_PR, DLM_LKF_VALBLK);
+ if (ret)
+ goto out;
+ lvb_to_version(lc->oc_lvb, &pv);
+
+ if ((pv.pv_major != running_proto.pv_major) ||
+ (pv.pv_minor > running_proto.pv_minor)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ conn->cc_version.pv_major = pv.pv_major;
+ conn->cc_version.pv_minor = pv.pv_minor;
+ }
+out:
+ return ret;
+}
+
+static void user_recover_prep(void *arg)
+{
+}
+
+static void user_recover_slot(void *arg, struct dlm_slot *slot)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ printk(KERN_INFO "ocfs2: Node %d/%d down. Initiating recovery.\n",
+ slot->nodeid, slot->slot);
+ conn->cc_recovery_handler(slot->nodeid, conn->cc_recovery_data);
+
+}
+
+static void user_recover_done(void *arg, struct dlm_slot *slots,
+ int num_slots, int our_slot,
+ uint32_t generation)
+{
+ struct ocfs2_cluster_connection *conn = arg;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+ int i;
+
+ for (i = 0; i < num_slots; i++)
+ if (slots[i].slot == our_slot) {
+ atomic_set(&lc->oc_this_node, slots[i].nodeid);
+ break;
+ }
+
+ lc->oc_our_slot = our_slot;
+ wake_up(&lc->oc_wait);
+}
+
+static const struct dlm_lockspace_ops ocfs2_ls_ops = {
+ .recover_prep = user_recover_prep,
+ .recover_slot = user_recover_slot,
+ .recover_done = user_recover_done,
+};
+
+static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
+{
+ version_unlock(conn);
+ dlm_release_lockspace(conn->cc_lockspace, 2);
+ conn->cc_lockspace = NULL;
+ ocfs2_live_connection_drop(conn->cc_private);
+ conn->cc_private = NULL;
+ return 0;
+}
+
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
{
dlm_lockspace_t *fsdlm;
- struct ocfs2_live_connection *uninitialized_var(control);
- int rc = 0;
+ struct ocfs2_live_connection *lc;
+ int rc, ops_rv;
BUG_ON(conn == NULL);
- rc = ocfs2_live_connection_new(conn, &control);
+ lc = kzalloc(sizeof(struct ocfs2_live_connection), GFP_KERNEL);
+ if (!lc) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ init_waitqueue_head(&lc->oc_wait);
+ init_completion(&lc->oc_sync_wait);
+ atomic_set(&lc->oc_this_node, 0);
+ conn->cc_private = lc;
+ lc->oc_type = NO_CONTROLD;
+
+ rc = dlm_new_lockspace(conn->cc_name, conn->cc_cluster_name,
+ DLM_LSFL_FS, DLM_LVB_LEN,
+ &ocfs2_ls_ops, conn, &ops_rv, &fsdlm);
+ if (rc)
+ goto out;
+
+ if (ops_rv == -EOPNOTSUPP) {
+ lc->oc_type = WITH_CONTROLD;
+ printk(KERN_NOTICE "ocfs2: You seem to be using an older "
+ "version of dlm_controld and/or ocfs2-tools."
+ " Please consider upgrading.\n");
+ } else if (ops_rv) {
+ rc = ops_rv;
+ goto out;
+ }
+ conn->cc_lockspace = fsdlm;
+
+ rc = ocfs2_live_connection_attach(conn, lc);
if (rc)
goto out;
+ if (lc->oc_type == NO_CONTROLD) {
+ rc = get_protocol_version(conn);
+ if (rc) {
+ printk(KERN_ERR "ocfs2: Could not determine"
+ " locking version\n");
+ user_cluster_disconnect(conn);
+ goto out;
+ }
+ wait_event(lc->oc_wait, (atomic_read(&lc->oc_this_node) > 0));
+ }
+
/*
* running_proto must have been set before we allowed any mounts
* to proceed.
@@ -818,42 +1054,34 @@ static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
if (fs_protocol_compare(&running_proto, &conn->cc_version)) {
printk(KERN_ERR
"Unable to mount with fs locking protocol version "
- "%u.%u because the userspace control daemon has "
- "negotiated %u.%u\n",
+ "%u.%u because negotiated protocol is %u.%u\n",
conn->cc_version.pv_major, conn->cc_version.pv_minor,
running_proto.pv_major, running_proto.pv_minor);
rc = -EPROTO;
- ocfs2_live_connection_drop(control);
- goto out;
- }
-
- rc = dlm_new_lockspace(conn->cc_name, NULL, DLM_LSFL_FS, DLM_LVB_LEN,
- NULL, NULL, NULL, &fsdlm);
- if (rc) {
- ocfs2_live_connection_drop(control);
- goto out;
+ ocfs2_live_connection_drop(lc);
+ lc = NULL;
}
- conn->cc_private = control;
- conn->cc_lockspace = fsdlm;
out:
+ if (rc && lc)
+ kfree(lc);
return rc;
}
-static int user_cluster_disconnect(struct ocfs2_cluster_connection *conn)
-{
- dlm_release_lockspace(conn->cc_lockspace, 2);
- conn->cc_lockspace = NULL;
- ocfs2_live_connection_drop(conn->cc_private);
- conn->cc_private = NULL;
- return 0;
-}
-static int user_cluster_this_node(unsigned int *this_node)
+static int user_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *this_node)
{
int rc;
+ struct ocfs2_live_connection *lc = conn->cc_private;
+
+ if (lc->oc_type == WITH_CONTROLD)
+ rc = ocfs2_control_get_this_node();
+ else if (lc->oc_type == NO_CONTROLD)
+ rc = atomic_read(&lc->oc_this_node);
+ else
+ rc = -EINVAL;
- rc = ocfs2_control_get_this_node();
if (rc < 0)
return rc;
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index cb7ec0b63ddc..1324e6600e57 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -309,6 +309,8 @@ int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
EXPORT_SYMBOL_GPL(ocfs2_plock);
int ocfs2_cluster_connect(const char *stack_name,
+ const char *cluster_name,
+ int cluster_name_len,
const char *group,
int grouplen,
struct ocfs2_locking_protocol *lproto,
@@ -342,8 +344,10 @@ int ocfs2_cluster_connect(const char *stack_name,
goto out;
}
- memcpy(new_conn->cc_name, group, grouplen);
+ strlcpy(new_conn->cc_name, group, GROUP_NAME_MAX + 1);
new_conn->cc_namelen = grouplen;
+ strlcpy(new_conn->cc_cluster_name, cluster_name, CLUSTER_NAME_MAX + 1);
+ new_conn->cc_cluster_name_len = cluster_name_len;
new_conn->cc_recovery_handler = recovery_handler;
new_conn->cc_recovery_data = recovery_data;
@@ -386,8 +390,9 @@ int ocfs2_cluster_connect_agnostic(const char *group,
if (cluster_stack_name[0])
stack_name = cluster_stack_name;
- return ocfs2_cluster_connect(stack_name, group, grouplen, lproto,
- recovery_handler, recovery_data, conn);
+ return ocfs2_cluster_connect(stack_name, NULL, 0, group, grouplen,
+ lproto, recovery_handler, recovery_data,
+ conn);
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_connect_agnostic);
@@ -460,9 +465,10 @@ void ocfs2_cluster_hangup(const char *group, int grouplen)
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_hangup);
-int ocfs2_cluster_this_node(unsigned int *node)
+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *node)
{
- return active_stack->sp_ops->this_node(node);
+ return active_stack->sp_ops->this_node(conn, node);
}
EXPORT_SYMBOL_GPL(ocfs2_cluster_this_node);
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 1ec56fdb8d0d..66334a30cea8 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -45,6 +45,9 @@ struct file_lock;
*/
#define GROUP_NAME_MAX 64
+/* This shadows OCFS2_CLUSTER_NAME_LEN */
+#define CLUSTER_NAME_MAX 16
+
/*
* ocfs2_protocol_version changes when ocfs2 does something different in
@@ -97,8 +100,10 @@ struct ocfs2_locking_protocol {
* locking compatibility.
*/
struct ocfs2_cluster_connection {
- char cc_name[GROUP_NAME_MAX];
+ char cc_name[GROUP_NAME_MAX + 1];
int cc_namelen;
+ char cc_cluster_name[CLUSTER_NAME_MAX + 1];
+ int cc_cluster_name_len;
struct ocfs2_protocol_version cc_version;
struct ocfs2_locking_protocol *cc_proto;
void (*cc_recovery_handler)(int node_num, void *recovery_data);
@@ -152,7 +157,8 @@ struct ocfs2_stack_operations {
* ->this_node() returns the cluster's unique identifier for the
* local node.
*/
- int (*this_node)(unsigned int *node);
+ int (*this_node)(struct ocfs2_cluster_connection *conn,
+ unsigned int *node);
/*
* Call the underlying dlm lock function. The ->dlm_lock()
@@ -239,6 +245,8 @@ struct ocfs2_stack_plugin {
/* Used by the filesystem */
int ocfs2_cluster_connect(const char *stack_name,
+ const char *cluster_name,
+ int cluster_name_len,
const char *group,
int grouplen,
struct ocfs2_locking_protocol *lproto,
@@ -260,7 +268,8 @@ int ocfs2_cluster_connect_agnostic(const char *group,
int ocfs2_cluster_disconnect(struct ocfs2_cluster_connection *conn,
int hangup_pending);
void ocfs2_cluster_hangup(const char *group, int grouplen);
-int ocfs2_cluster_this_node(unsigned int *node);
+int ocfs2_cluster_this_node(struct ocfs2_cluster_connection *conn,
+ unsigned int *node);
struct ocfs2_lock_res;
int ocfs2_dlm_lock(struct ocfs2_cluster_connection *conn,
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 2c91452c4047..47ae2663a6f5 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -113,12 +113,6 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
struct ocfs2_suballoc_result *res);
static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
int nr);
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
- struct inode *alloc_inode,
- struct ocfs2_group_desc *bg,
- struct buffer_head *group_bh,
- unsigned int bit_off,
- unsigned int num_bits);
static int ocfs2_relink_block_group(handle_t *handle,
struct inode *alloc_inode,
struct buffer_head *fe_bh,
@@ -1343,7 +1337,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
return status;
}
-static inline int ocfs2_block_group_set_bits(handle_t *handle,
+int ocfs2_block_group_set_bits(handle_t *handle,
struct inode *alloc_inode,
struct ocfs2_group_desc *bg,
struct buffer_head *group_bh,
@@ -1388,8 +1382,6 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
ocfs2_journal_dirty(handle, group_bh);
bail:
- if (status)
- mlog_errno(status);
return status;
}
@@ -1588,7 +1580,7 @@ static int ocfs2_block_group_search(struct inode *inode,
return ret;
}
-static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
handle_t *handle,
struct buffer_head *di_bh,
u32 num_bits,
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index a36d0aa50911..218d8036b3e7 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -86,6 +86,18 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
u32 bits_wanted,
struct ocfs2_alloc_context **ac);
+int ocfs2_alloc_dinode_update_counts(struct inode *inode,
+ handle_t *handle,
+ struct buffer_head *di_bh,
+ u32 num_bits,
+ u16 chain);
+int ocfs2_block_group_set_bits(handle_t *handle,
+ struct inode *alloc_inode,
+ struct ocfs2_group_desc *bg,
+ struct buffer_head *group_bh,
+ unsigned int bit_off,
+ unsigned int num_bits);
+
int ocfs2_claim_metadata(handle_t *handle,
struct ocfs2_alloc_context *ac,
u32 bits_wanted,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c41492957aa5..49d84f80f36c 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -68,7 +68,6 @@
#include "super.h"
#include "sysfile.h"
#include "uptodate.h"
-#include "ver.h"
#include "xattr.h"
#include "quota.h"
#include "refcounttree.h"
@@ -90,6 +89,7 @@ static struct dentry *ocfs2_debugfs_root = NULL;
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("OCFS2 cluster file system");
struct mount_options
{
@@ -1618,8 +1618,6 @@ static int __init ocfs2_init(void)
{
int status, i;
- ocfs2_print_version();
-
for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++)
init_waitqueue_head(&ocfs2__ioend_wq[i]);
@@ -1947,11 +1945,15 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
ocfs2_shutdown_local_alloc(osb);
- ocfs2_truncate_log_shutdown(osb);
-
/* This will disable recovery and flush any recovery work. */
ocfs2_recovery_exit(osb);
+ /*
+ * During dismount, when it recovers another node it will call
+ * ocfs2_recover_orphans and queue delayed work osb_truncate_log_wq.
+ */
+ ocfs2_truncate_log_shutdown(osb);
+
ocfs2_journal_shutdown(osb);
ocfs2_sync_blockdev(sb);
@@ -2225,10 +2227,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
if (ocfs2_clusterinfo_valid(osb)) {
osb->osb_stackflags =
OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags;
- memcpy(osb->osb_cluster_stack,
+ strlcpy(osb->osb_cluster_stack,
OCFS2_RAW_SB(di)->s_cluster_info.ci_stack,
- OCFS2_STACK_LABEL_LEN);
- osb->osb_cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
+ OCFS2_STACK_LABEL_LEN + 1);
if (strlen(osb->osb_cluster_stack) != OCFS2_STACK_LABEL_LEN) {
mlog(ML_ERROR,
"couldn't mount because of an invalid "
@@ -2237,6 +2238,9 @@ static int ocfs2_initialize_super(struct super_block *sb,
status = -EINVAL;
goto bail;
}
+ strlcpy(osb->osb_cluster_name,
+ OCFS2_RAW_SB(di)->s_cluster_info.ci_cluster,
+ OCFS2_CLUSTER_NAME_LEN + 1);
} else {
/* The empty string is identical with classic tools that
* don't know about s_cluster_info. */
diff --git a/fs/ocfs2/ver.c b/fs/ocfs2/ver.c
deleted file mode 100644
index e2488f4128a2..000000000000
--- a/fs/ocfs2/ver.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.c
- *
- * version string
- *
- * Copyright (C) 2002, 2005 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-#include "ver.h"
-
-#define OCFS2_BUILD_VERSION "1.5.0"
-
-#define VERSION_STR "OCFS2 " OCFS2_BUILD_VERSION
-
-void ocfs2_print_version(void)
-{
- printk(KERN_INFO "%s\n", VERSION_STR);
-}
-
-MODULE_DESCRIPTION(VERSION_STR);
-
-MODULE_VERSION(OCFS2_BUILD_VERSION);
diff --git a/fs/ocfs2/ver.h b/fs/ocfs2/ver.h
deleted file mode 100644
index d7395cb91d2f..000000000000
--- a/fs/ocfs2/ver.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
- * ver.h
- *
- * Function prototypes
- *
- * Copyright (C) 2002, 2004 Oracle. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public
- * License along with this program; if not, write to the
- * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- * Boston, MA 021110-1307, USA.
- */
-
-#ifndef OCFS2_VER_H
-#define OCFS2_VER_H
-
-void ocfs2_print_version(void);
-
-#endif /* OCFS2_VER_H */
diff --git a/fs/pipe.c b/fs/pipe.c
index 0e0752ef2715..78fd0d0788db 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -663,10 +663,11 @@ out:
wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
}
- if (ret > 0) {
+ if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) {
int err = file_update_time(filp);
if (err)
ret = err;
+ sb_end_write(file_inode(filp)->i_sb);
}
return ret;
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8bd2135b7f82..551e61ba15b6 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -22,11 +22,80 @@
#include <linux/errno.h>
-EXPORT_SYMBOL(posix_acl_init);
-EXPORT_SYMBOL(posix_acl_alloc);
-EXPORT_SYMBOL(posix_acl_valid);
-EXPORT_SYMBOL(posix_acl_equiv_mode);
-EXPORT_SYMBOL(posix_acl_from_mode);
+struct posix_acl **acl_by_type(struct inode *inode, int type)
+{
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ return &inode->i_acl;
+ case ACL_TYPE_DEFAULT:
+ return &inode->i_default_acl;
+ default:
+ BUG();
+ }
+}
+EXPORT_SYMBOL(acl_by_type);
+
+struct posix_acl *get_cached_acl(struct inode *inode, int type)
+{
+ struct posix_acl **p = acl_by_type(inode, type);
+ struct posix_acl *acl = ACCESS_ONCE(*p);
+ if (acl) {
+ spin_lock(&inode->i_lock);
+ acl = *p;
+ if (acl != ACL_NOT_CACHED)
+ acl = posix_acl_dup(acl);
+ spin_unlock(&inode->i_lock);
+ }
+ return acl;
+}
+EXPORT_SYMBOL(get_cached_acl);
+
+struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type)
+{
+ return rcu_dereference(*acl_by_type(inode, type));
+}
+EXPORT_SYMBOL(get_cached_acl_rcu);
+
+void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+ struct posix_acl **p = acl_by_type(inode, type);
+ struct posix_acl *old;
+ spin_lock(&inode->i_lock);
+ old = *p;
+ rcu_assign_pointer(*p, posix_acl_dup(acl));
+ spin_unlock(&inode->i_lock);
+ if (old != ACL_NOT_CACHED)
+ posix_acl_release(old);
+}
+EXPORT_SYMBOL(set_cached_acl);
+
+void forget_cached_acl(struct inode *inode, int type)
+{
+ struct posix_acl **p = acl_by_type(inode, type);
+ struct posix_acl *old;
+ spin_lock(&inode->i_lock);
+ old = *p;
+ *p = ACL_NOT_CACHED;
+ spin_unlock(&inode->i_lock);
+ if (old != ACL_NOT_CACHED)
+ posix_acl_release(old);
+}
+EXPORT_SYMBOL(forget_cached_acl);
+
+void forget_all_cached_acls(struct inode *inode)
+{
+ struct posix_acl *old_access, *old_default;
+ spin_lock(&inode->i_lock);
+ old_access = inode->i_acl;
+ old_default = inode->i_default_acl;
+ inode->i_acl = inode->i_default_acl = ACL_NOT_CACHED;
+ spin_unlock(&inode->i_lock);
+ if (old_access != ACL_NOT_CACHED)
+ posix_acl_release(old_access);
+ if (old_default != ACL_NOT_CACHED)
+ posix_acl_release(old_default);
+}
+EXPORT_SYMBOL(forget_all_cached_acls);
/*
* Init a fresh posix_acl
@@ -37,6 +106,7 @@ posix_acl_init(struct posix_acl *acl, int count)
atomic_set(&acl->a_refcount, 1);
acl->a_count = count;
}
+EXPORT_SYMBOL(posix_acl_init);
/*
* Allocate a new ACL with the specified number of entries.
@@ -51,6 +121,7 @@ posix_acl_alloc(int count, gfp_t flags)
posix_acl_init(acl, count);
return acl;
}
+EXPORT_SYMBOL(posix_acl_alloc);
/*
* Clone an ACL.
@@ -78,8 +149,6 @@ posix_acl_valid(const struct posix_acl *acl)
{
const struct posix_acl_entry *pa, *pe;
int state = ACL_USER_OBJ;
- kuid_t prev_uid = INVALID_UID;
- kgid_t prev_gid = INVALID_GID;
int needs_mask = 0;
FOREACH_ACL_ENTRY(pa, acl, pe) {
@@ -98,10 +167,6 @@ posix_acl_valid(const struct posix_acl *acl)
return -EINVAL;
if (!uid_valid(pa->e_uid))
return -EINVAL;
- if (uid_valid(prev_uid) &&
- uid_lte(pa->e_uid, prev_uid))
- return -EINVAL;
- prev_uid = pa->e_uid;
needs_mask = 1;
break;
@@ -117,10 +182,6 @@ posix_acl_valid(const struct posix_acl *acl)
return -EINVAL;
if (!gid_valid(pa->e_gid))
return -EINVAL;
- if (gid_valid(prev_gid) &&
- gid_lte(pa->e_gid, prev_gid))
- return -EINVAL;
- prev_gid = pa->e_gid;
needs_mask = 1;
break;
@@ -146,6 +207,7 @@ posix_acl_valid(const struct posix_acl *acl)
return 0;
return -EINVAL;
}
+EXPORT_SYMBOL(posix_acl_valid);
/*
* Returns 0 if the acl can be exactly represented in the traditional
@@ -186,6 +248,7 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
*mode_p = (*mode_p & ~S_IRWXUGO) | mode;
return not_equiv;
}
+EXPORT_SYMBOL(posix_acl_equiv_mode);
/*
* Create an ACL representing the file mode permission bits of an inode.
@@ -207,6 +270,7 @@ posix_acl_from_mode(umode_t mode, gfp_t flags)
acl->a_entries[2].e_perm = (mode & S_IRWXO);
return acl;
}
+EXPORT_SYMBOL(posix_acl_from_mode);
/*
* Return 0 if current is granted want access to the inode
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 1bd2077187fd..656e401794de 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -140,24 +140,15 @@ static const char * const task_state_array[] = {
"t (tracing stop)", /* 8 */
"Z (zombie)", /* 16 */
"X (dead)", /* 32 */
- "x (dead)", /* 64 */
- "K (wakekill)", /* 128 */
- "W (waking)", /* 256 */
- "P (parked)", /* 512 */
};
static inline const char *get_task_state(struct task_struct *tsk)
{
- unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
- const char * const *p = &task_state_array[0];
+ unsigned int state = (tsk->state | tsk->exit_state) & TASK_REPORT;
- BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+ BUILD_BUG_ON(1 + ilog2(TASK_REPORT) != ARRAY_SIZE(task_state_array)-1);
- while (state) {
- p++;
- state >>= 1;
- }
- return *p;
+ return task_state_array[fls(state)];
}
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
@@ -453,8 +444,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
min_flt += t->min_flt;
maj_flt += t->maj_flt;
gtime += task_gtime(t);
- t = next_thread(t);
- } while (t != task);
+ } while_each_thread(task, t);
min_flt += sig->min_flt;
maj_flt += sig->maj_flt;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 03c8d747be48..51507065263b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1658,13 +1658,18 @@ int pid_revalidate(struct dentry *dentry, unsigned int flags)
return 0;
}
+static inline bool proc_inode_is_dead(struct inode *inode)
+{
+ return !proc_pid(inode)->tasks[PIDTYPE_PID].first;
+}
+
int pid_delete_dentry(const struct dentry *dentry)
{
/* Is the task we represent dead?
* If so, then don't put the dentry on the lru list,
* kill it immediately.
*/
- return !proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first;
+ return proc_inode_is_dead(dentry->d_inode);
}
const struct dentry_operations pid_dentry_operations =
@@ -3092,34 +3097,42 @@ out_no_task:
* In the case of a seek we start with the leader and walk nr
* threads past it.
*/
-static struct task_struct *first_tid(struct task_struct *leader,
- int tid, int nr, struct pid_namespace *ns)
+static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos,
+ struct pid_namespace *ns)
{
- struct task_struct *pos;
+ struct task_struct *pos, *task;
+ unsigned long nr = f_pos;
+
+ if (nr != f_pos) /* 32bit overflow? */
+ return NULL;
rcu_read_lock();
- /* Attempt to start with the pid of a thread */
- if (tid && (nr > 0)) {
+ task = pid_task(pid, PIDTYPE_PID);
+ if (!task)
+ goto fail;
+
+ /* Attempt to start with the tid of a thread */
+ if (tid && nr) {
pos = find_task_by_pid_ns(tid, ns);
- if (pos && (pos->group_leader == leader))
+ if (pos && same_thread_group(pos, task))
goto found;
}
/* If nr exceeds the number of threads there is nothing todo */
- pos = NULL;
- if (nr && nr >= get_nr_threads(leader))
- goto out;
+ if (nr >= get_nr_threads(task))
+ goto fail;
/* If we haven't found our starting place yet start
* with the leader and walk nr threads forward.
*/
- for (pos = leader; nr > 0; --nr) {
- pos = next_thread(pos);
- if (pos == leader) {
- pos = NULL;
- goto out;
- }
- }
+ pos = task = task->group_leader;
+ do {
+ if (!nr--)
+ goto found;
+ } while_each_thread(task, pos);
+fail:
+ pos = NULL;
+ goto out;
found:
get_task_struct(pos);
out:
@@ -3152,25 +3165,16 @@ static struct task_struct *next_tid(struct task_struct *start)
/* for the /proc/TGID/task/ directories */
static int proc_task_readdir(struct file *file, struct dir_context *ctx)
{
- struct task_struct *leader = NULL;
- struct task_struct *task = get_proc_task(file_inode(file));
+ struct inode *inode = file_inode(file);
+ struct task_struct *task;
struct pid_namespace *ns;
int tid;
- if (!task)
- return -ENOENT;
- rcu_read_lock();
- if (pid_alive(task)) {
- leader = task->group_leader;
- get_task_struct(leader);
- }
- rcu_read_unlock();
- put_task_struct(task);
- if (!leader)
+ if (proc_inode_is_dead(inode))
return -ENOENT;
if (!dir_emit_dots(file, ctx))
- goto out;
+ return 0;
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
@@ -3178,7 +3182,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
ns = file->f_dentry->d_sb->s_fs_info;
tid = (int)file->f_version;
file->f_version = 0;
- for (task = first_tid(leader, tid, ctx->pos - 2, ns);
+ for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns);
task;
task = next_tid(task), ctx->pos++) {
char name[PROC_NUMBUF];
@@ -3194,8 +3198,7 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
break;
}
}
-out:
- put_task_struct(leader);
+
return 0;
}
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index cca93b6fb9a9..b7f268eb5f45 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -49,8 +49,7 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
setattr_copy(inode, iattr);
mark_inode_dirty(inode);
- de->uid = inode->i_uid;
- de->gid = inode->i_gid;
+ proc_set_user(de, inode->i_uid, inode->i_gid);
de->mode = inode->i_mode;
return 0;
}
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index a77d2b299199..24270eceddbf 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -26,7 +26,11 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
unsigned long committed;
struct vmalloc_info vmi;
long cached;
+ long available;
+ unsigned long pagecache;
+ unsigned long wmark_low = 0;
unsigned long pages[NR_LRU_LISTS];
+ struct zone *zone;
int lru;
/*
@@ -47,12 +51,44 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
pages[lru] = global_page_state(NR_LRU_BASE + lru);
+ for_each_zone(zone)
+ wmark_low += zone->watermark[WMARK_LOW];
+
+ /*
+ * Estimate the amount of memory available for userspace allocations,
+ * without causing swapping.
+ *
+ * Free memory cannot be taken below the low watermark, before the
+ * system starts swapping.
+ */
+ available = i.freeram - wmark_low;
+
+ /*
+ * Not all the page cache can be freed, otherwise the system will
+ * start swapping. Assume at least half of the page cache, or the
+ * low watermark worth of cache, needs to stay.
+ */
+ pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
+ pagecache -= min(pagecache / 2, wmark_low);
+ available += pagecache;
+
+ /*
+ * Part of the reclaimable swap consists of items that are in use,
+ * and cannot be freed. Cap this estimate at the low watermark.
+ */
+ available += global_page_state(NR_SLAB_RECLAIMABLE) -
+ min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
+
+ if (available < 0)
+ available = 0;
+
/*
* Tagged format, for easy grepping and expansion.
*/
seq_printf(m,
"MemTotal: %8lu kB\n"
"MemFree: %8lu kB\n"
+ "MemAvailable: %8lu kB\n"
"Buffers: %8lu kB\n"
"Cached: %8lu kB\n"
"SwapCached: %8lu kB\n"
@@ -105,6 +141,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
,
K(i.totalram),
K(i.freeram),
+ K(available),
K(i.bufferram),
K(cached),
K(total_swapcache_pages()),
diff --git a/fs/proc/page.c b/fs/proc/page.c
index b8730d9ebaee..cab84b6272ed 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -118,10 +118,12 @@ u64 stable_page_flags(struct page *page)
/*
* PageTransCompound can be true for non-huge compound pages (slab
* pages or pages allocated by drivers with __GFP_COMP) because it
- * just checks PG_head/PG_tail, so we need to check PageLRU to make
- * sure a given page is a thp, not a non-huge compound page.
+ * just checks PG_head/PG_tail, so we need to check PageLRU/PageAnon
+ * to make sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
+ else if (PageTransCompound(page) &&
+ (PageLRU(compound_trans_head(page)) ||
+ PageAnon(compound_trans_head(page))))
u |= 1 << KPF_THP;
/*
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
index 70779b2fc209..c82dd5147845 100644
--- a/fs/proc/proc_devtree.c
+++ b/fs/proc/proc_devtree.c
@@ -74,9 +74,9 @@ __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
return NULL;
if (!strncmp(name, "security-", 9))
- ent->size = 0; /* don't leak number of password chars */
+ proc_set_size(ent, 0); /* don't leak number of password chars */
else
- ent->size = pp->length;
+ proc_set_size(ent, pp->length);
return ent;
}
@@ -232,6 +232,7 @@ void __init proc_device_tree_init(void)
return;
root = of_find_node_by_path("/");
if (root == NULL) {
+ remove_proc_entry("device-tree", NULL);
pr_debug("/proc/device-tree: can't find root\n");
return;
}
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 4884ac5ae9be..1e56a4e8cf7c 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -30,13 +30,6 @@
#include "internal.h"
-const struct address_space_operations ramfs_aops = {
- .readpage = simple_readpage,
- .write_begin = simple_write_begin,
- .write_end = simple_write_end,
- .set_page_dirty = __set_page_dirty_no_writeback,
-};
-
const struct file_operations ramfs_file_operations = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 8d5b438cc188..0b3d8e4cb2fa 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -27,13 +27,12 @@
#include "internal.h"
static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
-
-const struct address_space_operations ramfs_aops = {
- .readpage = simple_readpage,
- .write_begin = simple_write_begin,
- .write_end = simple_write_end,
- .set_page_dirty = __set_page_dirty_no_writeback,
-};
+static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
+ unsigned long addr,
+ unsigned long len,
+ unsigned long pgoff,
+ unsigned long flags);
+static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
const struct file_operations ramfs_file_operations = {
.mmap = ramfs_nommu_mmap,
@@ -197,7 +196,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
* - the pages to be mapped must exist
* - the pages be physically contiguous in sequence
*/
-unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
+static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
@@ -256,7 +255,7 @@ out:
/*
* set up a mapping for shared memory segments
*/
-int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
+static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
{
if (!(vma->vm_flags & VM_SHARED))
return -ENOSYS;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 39d14659a8d3..03b8016e5bbc 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -43,6 +43,13 @@
static const struct super_operations ramfs_ops;
static const struct inode_operations ramfs_dir_inode_operations;
+static const struct address_space_operations ramfs_aops = {
+ .readpage = simple_readpage,
+ .write_begin = simple_write_begin,
+ .write_end = simple_write_end,
+ .set_page_dirty = __set_page_dirty_no_writeback,
+};
+
static struct backing_dev_info ramfs_backing_dev_info = {
.name = "ramfs",
.ra_pages = 0, /* No readahead */
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
index 6b330639b51d..a9d8ae88fa15 100644
--- a/fs/ramfs/internal.h
+++ b/fs/ramfs/internal.h
@@ -10,5 +10,4 @@
*/
-extern const struct address_space_operations ramfs_aops;
extern const struct inode_operations ramfs_file_inode_operations;
diff --git a/fs/read_write.c b/fs/read_write.c
index 58e440df1bc6..1193ffd03565 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -901,10 +901,6 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
io_fn_t fn;
iov_fn_t fnv;
- ret = -EFAULT;
- if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
- goto out;
-
ret = compat_rw_copy_check_uvector(type, uvector, nr_segs,
UIO_FASTIOV, iovstack, &iov);
if (ret <= 0)
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index f8adaee537c2..dfb617b2bad2 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -1958,8 +1958,6 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
#define MAX_US_INT 0xffff
// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
-#define U32_MAX (~(__u32)0)
-
static inline loff_t max_reiserfs_offset(struct inode *inode)
{
if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index ff1d3d42e72a..d8418782862b 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -533,16 +533,14 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
root = romfs_iget(sb, pos);
if (IS_ERR(root))
- goto error;
+ return PTR_ERR(root);
sb->s_root = d_make_root(root);
if (!sb->s_root)
- goto error;
+ return -ENOMEM;
return 0;
-error:
- return -EINVAL;
error_rsb_inval:
ret = -EINVAL;
error_rsb:
diff --git a/fs/super.c b/fs/super.c
index e5f6c2cfac38..cecd780e0f44 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -166,6 +166,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
if (!s)
return NULL;
+ INIT_LIST_HEAD(&s->s_mounts);
+
if (security_sb_alloc(s))
goto fail;
@@ -188,7 +190,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
if (list_lru_init(&s->s_inode_lru))
goto fail;
- INIT_LIST_HEAD(&s->s_mounts);
init_rwsem(&s->s_umount);
lockdep_set_class(&s->s_umount, &type->s_umount_key);
/*
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index cc1febd8fadf..5157b866a853 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2118,26 +2118,10 @@ out_free:
*/
static void free_inodes(struct fsck_data *fsckd)
{
- struct rb_node *this = fsckd->inodes.rb_node;
- struct fsck_inode *fscki;
+ struct fsck_inode *fscki, *n;
- while (this) {
- if (this->rb_left)
- this = this->rb_left;
- else if (this->rb_right)
- this = this->rb_right;
- else {
- fscki = rb_entry(this, struct fsck_inode, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &fscki->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
- kfree(fscki);
- }
- }
+ rbtree_postorder_for_each_entry_safe(fscki, n, &fsckd->inodes, rb)
+ kfree(fscki);
}
/**
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36bd4efd0819..a902c5919e42 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -574,27 +574,10 @@ static int done_already(struct rb_root *done_tree, int lnum)
*/
static void destroy_done_tree(struct rb_root *done_tree)
{
- struct rb_node *this = done_tree->rb_node;
- struct done_ref *dr;
+ struct done_ref *dr, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- dr = rb_entry(this, struct done_ref, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &dr->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
+ rbtree_postorder_for_each_entry_safe(dr, n, done_tree, rb)
kfree(dr);
- }
}
/**
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index ba32da3fe08a..f1c3e5a1b315 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -815,27 +815,10 @@ static int dbg_find_check_orphan(struct rb_root *root, ino_t inum)
static void dbg_free_check_tree(struct rb_root *root)
{
- struct rb_node *this = root->rb_node;
- struct check_orphan *o;
+ struct check_orphan *o, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- o = rb_entry(this, struct check_orphan, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &o->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
+ rbtree_postorder_for_each_entry_safe(o, n, root, rb)
kfree(o);
- }
}
static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 065096e36ed9..c14adb2f420c 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -1335,29 +1335,14 @@ static void remove_ino(struct ubifs_info *c, ino_t inum)
*/
void ubifs_destroy_size_tree(struct ubifs_info *c)
{
- struct rb_node *this = c->size_tree.rb_node;
- struct size_entry *e;
+ struct size_entry *e, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- e = rb_entry(this, struct size_entry, rb);
+ rbtree_postorder_for_each_entry_safe(e, n, &c->size_tree, rb) {
if (e->inode)
iput(e->inode);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &e->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
kfree(e);
}
+
c->size_tree = RB_ROOT;
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f69daa514a57..5ded8490c0c6 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -873,26 +873,10 @@ static void free_orphans(struct ubifs_info *c)
*/
static void free_buds(struct ubifs_info *c)
{
- struct rb_node *this = c->buds.rb_node;
- struct ubifs_bud *bud;
-
- while (this) {
- if (this->rb_left)
- this = this->rb_left;
- else if (this->rb_right)
- this = this->rb_right;
- else {
- bud = rb_entry(this, struct ubifs_bud, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &bud->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
- kfree(bud);
- }
- }
+ struct ubifs_bud *bud, *n;
+
+ rbtree_postorder_for_each_entry_safe(bud, n, &c->buds, rb)
+ kfree(bud);
}
/**
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 349f31a30f40..9083bc7ed4ae 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -178,27 +178,11 @@ static int ins_clr_old_idx_znode(struct ubifs_info *c,
*/
void destroy_old_idx(struct ubifs_info *c)
{
- struct rb_node *this = c->old_idx.rb_node;
- struct ubifs_old_idx *old_idx;
+ struct ubifs_old_idx *old_idx, *n;
- while (this) {
- if (this->rb_left) {
- this = this->rb_left;
- continue;
- } else if (this->rb_right) {
- this = this->rb_right;
- continue;
- }
- old_idx = rb_entry(this, struct ubifs_old_idx, rb);
- this = rb_parent(this);
- if (this) {
- if (this->rb_left == &old_idx->rb)
- this->rb_left = NULL;
- else
- this->rb_right = NULL;
- }
+ rbtree_postorder_for_each_entry_safe(old_idx, n, &c->old_idx, rb)
kfree(old_idx);
- }
+
c->old_idx = RB_ROOT;
}
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index a7ea492ae660..59aa24dc0cdd 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -54,7 +54,7 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
if (ufs_fragnum(fragment) + count > uspi->s_fpg)
ufs_error (sb, "ufs_free_fragments", "internal error");
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
cgno = ufs_dtog(uspi, fragment);
bit = ufs_dtogd(uspi, fragment);
@@ -118,12 +118,12 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT\n");
return;
failed:
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT (FAILED)\n");
return;
}
@@ -155,7 +155,7 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count)
goto failed;
}
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
do_more:
overflow = 0;
@@ -215,12 +215,12 @@ do_more:
}
ufs_mark_sb_dirty(sb);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT\n");
return;
failed_unlock:
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
failed:
UFSD("EXIT (FAILED)\n");
return;
@@ -361,7 +361,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
usb1 = ubh_get_usb_first(uspi);
*err = -ENOSPC;
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
tmp = ufs_data_ptr_to_cpu(sb, p);
if (count + ufs_fragnum(fragment) > uspi->s_fpb) {
@@ -382,19 +382,19 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
"fragment %llu, tmp %llu\n",
(unsigned long long)fragment,
(unsigned long long)tmp);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return INVBLOCK;
}
if (fragment < UFS_I(inode)->i_lastfrag) {
UFSD("EXIT (ALREADY ALLOCATED)\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return 0;
}
}
else {
if (tmp) {
UFSD("EXIT (ALREADY ALLOCATED)\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return 0;
}
}
@@ -403,7 +403,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
* There is not enough space for user on the device
*/
if (!capable(CAP_SYS_RESOURCE) && ufs_freespace(uspi, UFS_MINFREE) <= 0) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT (FAILED)\n");
return 0;
}
@@ -428,7 +428,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
ufs_clear_frags(inode, result + oldcount,
newcount - oldcount, locked_page != NULL);
}
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -443,7 +443,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
fragment + count);
ufs_clear_frags(inode, result + oldcount, newcount - oldcount,
locked_page != NULL);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT, result %llu\n", (unsigned long long)result);
return result;
}
@@ -481,7 +481,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
*err = 0;
UFS_I(inode)->i_lastfrag = max(UFS_I(inode)->i_lastfrag,
fragment + count);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
if (newcount < request)
ufs_free_fragments (inode, result + newcount, request - newcount);
ufs_free_fragments (inode, tmp, oldcount);
@@ -489,7 +489,7 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
return result;
}
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT (FAILED)\n");
return 0;
}
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index d0426d74817b..da5e52551850 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -71,11 +71,11 @@ void ufs_free_inode (struct inode * inode)
ino = inode->i_ino;
- mutex_lock(&UFS_SB(sb)->s_lock);
+ lock_ufs(sb);
if (!((ino > 1) && (ino < (uspi->s_ncg * uspi->s_ipg )))) {
ufs_warning(sb, "ufs_free_inode", "reserved inode or nonexistent inode %u\n", ino);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return;
}
@@ -83,7 +83,7 @@ void ufs_free_inode (struct inode * inode)
bit = ufs_inotocgoff (ino);
ucpi = ufs_load_cylinder (sb, cg);
if (!ucpi) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
return;
}
ucg = ubh_get_ucg(UCPI_UBH(ucpi));
@@ -117,7 +117,7 @@ void ufs_free_inode (struct inode * inode)
ubh_sync_block(UCPI_UBH(ucpi));
ufs_mark_sb_dirty(sb);
- mutex_unlock(&UFS_SB(sb)->s_lock);
+ unlock_ufs(sb);
UFSD("EXIT\n");
}
@@ -197,7 +197,7 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode)
uspi = sbi->s_uspi;
usb1 = ubh_get_usb_first(uspi);
- mutex_lock(&sbi->s_lock);
+ lock_ufs(sb);
/*
* Try to place the inode in its parent directory
@@ -332,21 +332,20 @@ cg_found:
sync_dirty_buffer(bh);
brelse(bh);
}
-
- mutex_unlock(&sbi->s_lock);
+ unlock_ufs(sb);
UFSD("allocating inode %lu\n", inode->i_ino);
UFSD("EXIT\n");
return inode;
fail_remove_inode:
- mutex_unlock(&sbi->s_lock);
+ unlock_ufs(sb);
clear_nlink(inode);
iput(inode);
UFSD("EXIT (FAILED): err %d\n", err);
return ERR_PTR(err);
failed:
- mutex_unlock(&sbi->s_lock);
+ unlock_ufs(sb);
make_bad_inode(inode);
iput (inode);
UFSD("EXIT (FAILED): err %d\n", err);
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 329f2f53b7ed..e5a993416fec 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -699,7 +699,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
unsigned flags;
lock_ufs(sb);
- mutex_lock(&UFS_SB(sb)->s_lock);
UFSD("ENTER\n");
@@ -717,7 +716,6 @@ static int ufs_sync_fs(struct super_block *sb, int wait)
ufs_put_cstotal(sb);
UFSD("EXIT\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0;
@@ -762,6 +760,7 @@ static void ufs_put_super(struct super_block *sb)
ubh_brelse_uspi (sbi->s_uspi);
kfree (sbi->s_uspi);
+ mutex_destroy(&sbi->mutex);
kfree (sbi);
sb->s_fs_info = NULL;
UFSD("EXIT\n");
@@ -805,7 +804,6 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent)
}
#endif
mutex_init(&sbi->mutex);
- mutex_init(&sbi->s_lock);
spin_lock_init(&sbi->work_lock);
INIT_DELAYED_WORK(&sbi->sync_work, delayed_sync_fs);
/*
@@ -1262,6 +1260,7 @@ failed:
if (ubh)
ubh_brelse_uspi (uspi);
kfree (uspi);
+ mutex_destroy(&sbi->mutex);
kfree(sbi);
sb->s_fs_info = NULL;
UFSD("EXIT (FAILED)\n");
@@ -1281,7 +1280,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
unsigned flags;
lock_ufs(sb);
- mutex_lock(&UFS_SB(sb)->s_lock);
uspi = UFS_SB(sb)->s_uspi;
flags = UFS_SB(sb)->s_flags;
usb1 = ubh_get_usb_first(uspi);
@@ -1295,7 +1293,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
new_mount_opt = 0;
ufs_set_opt (new_mount_opt, ONERROR_LOCK);
if (!ufs_parse_options (data, &new_mount_opt)) {
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
}
@@ -1303,14 +1300,12 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
new_mount_opt |= ufstype;
} else if ((new_mount_opt & UFS_MOUNT_UFSTYPE) != ufstype) {
printk("ufstype can't be changed during remount\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
}
if ((*mount_flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
UFS_SB(sb)->s_mount_opt = new_mount_opt;
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0;
}
@@ -1335,7 +1330,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#ifndef CONFIG_UFS_FS_WRITE
printk("ufs was compiled with read-only support, "
"can't be mounted as read-write\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
#else
@@ -1345,13 +1339,11 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
ufstype != UFS_MOUNT_UFSTYPE_SUNx86 &&
ufstype != UFS_MOUNT_UFSTYPE_UFS2) {
printk("this ufstype is read-only supported\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EINVAL;
}
if (!ufs_read_cylinder_structures(sb)) {
printk("failed during remounting\n");
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return -EPERM;
}
@@ -1359,7 +1351,6 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data)
#endif
}
UFS_SB(sb)->s_mount_opt = new_mount_opt;
- mutex_unlock(&UFS_SB(sb)->s_lock);
unlock_ufs(sb);
return 0;
}
diff --git a/fs/ufs/ufs.h b/fs/ufs/ufs.h
index ff2c15ab81aa..343e6fc571e5 100644
--- a/fs/ufs/ufs.h
+++ b/fs/ufs/ufs.h
@@ -24,7 +24,6 @@ struct ufs_sb_info {
int work_queued; /* non-zero if the delayed work is queued */
struct delayed_work sync_work; /* FS sync delayed work */
spinlock_t work_lock; /* protects sync_work and work_queued */
- struct mutex s_lock;
};
struct ufs_inode_info {