From ebbefc011e56bd85b4745d01e5b8d7d05d95ed5d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Nov 2008 14:54:41 +0100 Subject: [PATCH] clean up blkdev_get a little bit The way the bd_claim for the FMODE_EXCL case is implemented is rather confusing. Clean it up to the most logical style. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/block_dev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index db831efbdbbd..7c727523bc54 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1135,12 +1135,15 @@ static int blkdev_open(struct inode * inode, struct file * filp) if (res) return res; - if (!(filp->f_mode & FMODE_EXCL)) - return 0; + if (filp->f_mode & FMODE_EXCL) { + res = bd_claim(bdev, filp); + if (res) + goto out_blkdev_put; + } - if (!(res = bd_claim(bdev, filp))) - return 0; + return 0; + out_blkdev_put: blkdev_put(bdev, filp->f_mode); return res; } -- cgit v1.2.3 From fd4ce1acd0f8558033b1a6968001552bd7671e6d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Nov 2008 14:58:42 +0100 Subject: [PATCH 1/2] kill FMODE_NDELAY_NOW Update FMODE_NDELAY before each ioctl call so that we can kill the magic FMODE_NDELAY_NOW. It would be even better to do this directly in setfl(), but for that we'd need to have FMODE_NDELAY for all files, not just block special files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- block/compat_ioctl.c | 8 +++++++- drivers/scsi/sd.c | 2 +- drivers/scsi/sr.c | 2 +- fs/block_dev.c | 10 +++++++++- include/linux/fs.h | 1 - 5 files changed, 18 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index d43e6087badc..67eb93cff699 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -722,8 +722,14 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct backing_dev_info *bdi; loff_t size; + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; switch (cmd) { case HDIO_GETGEO: diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index c9e1242eaf25..5081b3981d3c 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -757,7 +757,7 @@ static int sd_ioctl(struct block_device *bdev, fmode_t mode, * access to the device is prohibited. */ error = scsi_nonblockable_ioctl(sdp, cmd, p, - (mode & FMODE_NDELAY_NOW) != 0); + (mode & FMODE_NDELAY) != 0); if (!scsi_block_when_processing_errors(sdp) || !error) return error; diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c index 62b6633e3a97..45b66b98a516 100644 --- a/drivers/scsi/sr.c +++ b/drivers/scsi/sr.c @@ -521,7 +521,7 @@ static int sr_block_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, * if it doesn't recognise the ioctl */ ret = scsi_nonblockable_ioctl(sdev, cmd, argp, - (mode & FMODE_NDELAY_NOW) != 0); + (mode & FMODE_NDELAY) != 0); if (ret != -ENODEV) return ret; return scsi_ioctl(sdev, cmd, argp); diff --git a/fs/block_dev.c b/fs/block_dev.c index 7c727523bc54..99e0ae1a4c78 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1206,8 +1206,16 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct block_device *bdev = I_BDEV(file->f_mapping->host); fmode_t mode = file->f_mode; + + /* + * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have + * to updated it before every ioctl. + */ if (file->f_flags & O_NDELAY) - mode |= FMODE_NDELAY_NOW; + mode |= FMODE_NDELAY; + else + mode &= ~FMODE_NDELAY; + return blkdev_ioctl(bdev, mode, cmd, arg); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 0dcdd9458f4b..b3345a90e11a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -79,7 +79,6 @@ extern int dir_notify_enable; #define FMODE_NDELAY ((__force fmode_t)32) #define FMODE_EXCL ((__force fmode_t)64) #define FMODE_WRITE_IOCTL ((__force fmode_t)128) -#define FMODE_NDELAY_NOW ((__force fmode_t)256) #define RW_MASK 1 #define RWA_MASK 2 -- cgit v1.2.3 From 576a488a27f267af203f3ea69c700a1612335e9f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 4 Dec 2008 09:09:34 +1100 Subject: [XFS] Fix hang after disallowed rename across directory quota domains When project quota is active and is being used for directory tree quota control, we disallow rename outside the current directory tree. This requires a check to be made after all the inodes involved in the rename are locked. We fail to unlock the inodes correctly if we disallow the rename when the target is outside the current directory tree. This results in a hang on the next access to the inodes involved in failed rename. Reported-by: Arkadiusz Miskiewicz Signed-off-by: Dave Chinner Tested-by: Arkadiusz Miskiewicz Signed-off-by: Lachlan McIlroy --- fs/xfs/xfs_rename.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index d700dacdb10e..c903130be7fd 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c @@ -212,7 +212,7 @@ xfs_rename( if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { error = XFS_ERROR(EXDEV); - xfs_rename_unlock4(inodes, XFS_ILOCK_SHARED); + xfs_rename_unlock4(inodes, XFS_ILOCK_EXCL); xfs_trans_cancel(tp, cancel_flags); goto std_return; } -- cgit v1.2.3 From 218d11a8b071b23b76c484fd5f72a4fe3306801e Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 5 Dec 2008 16:12:48 -0700 Subject: Fix a race condition in FASYNC handling Changeset a238b790d5f99c7832f9b73ac8847025815b85f7 (Call fasync() functions without the BKL) introduced a race which could leave file->f_flags in a state inconsistent with what the underlying driver/filesystem believes. Revert that change, and also fix the same races in ioctl_fioasync() and ioctl_fionbio(). This is a minimal, short-term fix; the real fix will not involve the BKL. Reported-by: Oleg Nesterov Cc: Andi Kleen Cc: Al Viro Cc: stable@kernel.org Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- fs/fcntl.c | 7 +++++++ fs/ioctl.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/fcntl.c b/fs/fcntl.c index ac4f7db9f134..549daf8005fb 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -175,6 +176,11 @@ static int setfl(int fd, struct file * filp, unsigned long arg) if (error) return error; + /* + * We still need a lock here for now to keep multiple FASYNC calls + * from racing with each other. + */ + lock_kernel(); if ((arg ^ filp->f_flags) & FASYNC) { if (filp->f_op && filp->f_op->fasync) { error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); @@ -185,6 +191,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg) filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); out: + unlock_kernel(); return error; } diff --git a/fs/ioctl.c b/fs/ioctl.c index d152856c371b..43e8b2c0664b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -400,11 +400,9 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - lock_kernel(); + if (filp->f_op && filp->f_op->fasync) error = filp->f_op->fasync(fd, filp, on); - unlock_kernel(); - } else + else error = -ENOTTY; } if (error) @@ -440,11 +438,17 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIONBIO: + /* BKL needed to avoid races tweaking f_flags */ + lock_kernel(); error = ioctl_fionbio(filp, argp); + unlock_kernel(); break; case FIOASYNC: + /* BKL needed to avoid races tweaking f_flags */ + lock_kernel(); error = ioctl_fioasync(fd, filp, argp); + unlock_kernel(); break; case FIOQSIZE: -- cgit v1.2.3 From a4f4d6df537368297a84e6b9444f403f99bf59f6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Mon, 8 Dec 2008 18:24:18 -0500 Subject: EXPORTFS: handle NULL returns from fh_to_dentry()/fh_to_parent() While 440037287c5 "[PATCH] switch all filesystems over to d_obtain_alias" removed some cases where fh_to_dentry() and fh_to_parent() could return NULL, there are still a few NULL returns left in individual filesystems. Thus it was a mistake for that commit to remove the handling of NULL returns in the callers. Revert those parts of 440037287c5 which removed the NULL handling. (We could, alternatively, modify all implementations to return -ESTALE instead of NULL, but that proves to require fixing a number of filesystems, and in some cases it's arguably more natural to return NULL.) Thanks to David for original patch and Linus, Christoph, and Hugh for review. Signed-off-by: J. Bruce Fields Cc: David Howells Cc: Christoph Hellwig Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- fs/exportfs/expfs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 80246bad1b7f..890e01828817 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -367,6 +367,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, * Try to get any dentry for the given file handle from the filesystem. */ result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type); + if (!result) + result = ERR_PTR(-ESTALE); if (IS_ERR(result)) return result; @@ -420,6 +422,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid, target_dir = nop->fh_to_parent(mnt->mnt_sb, fid, fh_len, fileid_type); + if (!target_dir) + goto err_result; err = PTR_ERR(target_dir); if (IS_ERR(target_dir)) goto err_result; -- cgit v1.2.3 From 85f334666a771680472722eee43ae0fc8730a619 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Tue, 9 Dec 2008 19:36:38 -0800 Subject: tracehook: exec double-reporting fix The patch 6341c39 "tracehook: exec" introduced a small regression in 2.6.27 regarding binfmt_misc exec event reporting. Since the reporting is now done in the common search_binary_handler() function, an exec of a misc binary will result in two (or possibly multiple) exec events being reported, instead of just a single one, because the misc handler contains a recursive call to search_binary_handler. To add to the confusion, if PTRACE_O_TRACEEXEC is not active, the multiple SIGTRAP signals will in fact cause only a single ptrace intercept, as the signals are not queued. However, if PTRACE_O_TRACEEXEC is on, the debugger will actually see multiple ptrace intercepts (PTRACE_EVENT_EXEC). The test program included below demonstrates the problem. This change fixes the bug by calling tracehook_report_exec() only in the outermost search_binary_handler() call (bprm->recursion_depth == 0). The additional change to restore bprm->recursion_depth after each binfmt load_binary call is actually superfluous for this bug, since we test the value saved on entry to search_binary_handler(). But it keeps the use of of the depth count to its most obvious expected meaning. Depending on what binfmt handlers do in certain cases, there could have been false-positive tests for recursion limits before this change. /* Test program using PTRACE_O_TRACEEXEC. This forks and exec's the first argument with the rest of the arguments, while ptrace'ing. It expects to see one PTRACE_EVENT_EXEC stop and then a successful exit, with no other signals or events in between. Test for kernel doing two PTRACE_EVENT_EXEC stops for a binfmt_misc exec: $ gcc -g traceexec.c -o traceexec $ sudo sh -c 'echo :test:M::foobar::/bin/cat: > /proc/sys/fs/binfmt_misc/register' $ echo 'foobar test' > ./foobar $ chmod +x ./foobar $ ./traceexec ./foobar; echo $? ==> good <== foobar test 0 $ ==> bad <== foobar test unexpected status 0x4057f != 0 3 $ */ #include #include #include #include #include #include #include static void wait_for (pid_t child, int expect) { int status; pid_t p = wait (&status); if (p != child) { perror ("wait"); exit (2); } if (status != expect) { fprintf (stderr, "unexpected status %#x != %#x\n", status, expect); exit (3); } } int main (int argc, char **argv) { pid_t child = fork (); if (child < 0) { perror ("fork"); return 127; } else if (child == 0) { ptrace (PTRACE_TRACEME); raise (SIGUSR1); execv (argv[1], &argv[1]); perror ("execve"); _exit (127); } wait_for (child, W_STOPCODE (SIGUSR1)); if (ptrace (PTRACE_SETOPTIONS, child, 0L, (void *) (long) PTRACE_O_TRACEEXEC) != 0) { perror ("PTRACE_SETOPTIONS"); return 4; } if (ptrace (PTRACE_CONT, child, 0L, 0L) != 0) { perror ("PTRACE_CONT"); return 5; } wait_for (child, W_STOPCODE (SIGTRAP | (PTRACE_EVENT_EXEC << 8))); if (ptrace (PTRACE_CONT, child, 0L, 0L) != 0) { perror ("PTRACE_CONT"); return 6; } wait_for (child, W_EXITCODE (0, 0)); return 0; } Reported-by: Arnd Bergmann CC: Ulrich Weigand Signed-off-by: Roland McGrath --- fs/exec.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/exec.c b/fs/exec.c index 4e834f16d9da..ec5df9a38313 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1159,6 +1159,7 @@ EXPORT_SYMBOL(remove_arg_zero); */ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) { + unsigned int depth = bprm->recursion_depth; int try,retval; struct linux_binfmt *fmt; #ifdef __alpha__ @@ -1219,8 +1220,15 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) continue; read_unlock(&binfmt_lock); retval = fn(bprm, regs); + /* + * Restore the depth counter to its starting value + * in this call, so we don't have to rely on every + * load_binary function to restore it on return. + */ + bprm->recursion_depth = depth; if (retval >= 0) { - tracehook_report_exec(fmt, bprm, regs); + if (depth == 0) + tracehook_report_exec(fmt, bprm, regs); put_binfmt(fmt); allow_write_access(bprm->file); if (bprm->file) -- cgit v1.2.3 From 71c5576fbd809f2015f4eddf72e501e298720cf3 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Dec 2008 13:14:13 -0800 Subject: revert "percpu counter: clean up percpu_counter_sum_and_set()" Revert commit 1f7c14c62ce63805f9574664a6c6de3633d4a354 Author: Mingming Cao Date: Thu Oct 9 12:50:59 2008 -0400 percpu counter: clean up percpu_counter_sum_and_set() Before this patch we had the following: percpu_counter_sum(): return the percpu_counter's value percpu_counter_sum_and_set(): return the percpu_counter's value, copying that value into the central value and zeroing the per-cpu counters before returning. After this patch, percpu_counter_sum_and_set() has gone, and percpu_counter_sum() gets the old percpu_counter_sum_and_set() functionality. Problem is, as Eric points out, the old percpu_counter_sum_and_set() functionality was racy and wrong. It zeroes out counters on "other" cpus, without holding any locks which will prevent races agaist updates from those other CPUS. This patch reverts 1f7c14c62ce63805f9574664a6c6de3633d4a354. This means that percpu_counter_sum_and_set() still has the race, but percpu_counter_sum() does not. Note that this is not a simple revert - ext4 has since started using percpu_counter_sum() for its dirty_blocks counter as well. Note that this revert patch changes percpu_counter_sum() semantics. Before the patch, a call to percpu_counter_sum() will bring the counter's central counter mostly up-to-date, so a following percpu_counter_read() will return a close value. After this patch, a call to percpu_counter_sum() will leave the counter's central accumulator unaltered, so a subsequent call to percpu_counter_read() can now return a significantly inaccurate result. If there is any code in the tree which was introduced after e8ced39d5e8911c662d4d69a342b9d053eaaac4e was merged, and which depends upon the new percpu_counter_sum() semantics, that code will break. Reported-by: Eric Dumazet Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Mingming Cao Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/balloc.c | 4 ++-- include/linux/percpu_counter.h | 12 +++++++++--- lib/percpu_counter.c | 8 +++++--- 3 files changed, 16 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d2003cdc36aa..c17f69bcd7dd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum(fbc); - dirty_blocks = percpu_counter_sum(dbc); + free_blocks = percpu_counter_sum_and_set(fbc); + dirty_blocks = percpu_counter_sum_and_set(dbc); if (dirty_blocks < 0) { printk(KERN_CRIT "Dirty block accounting " "went wrong %lld\n", diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 9007ccdfc112..208388835357 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc); +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,13 +44,19 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc); + s64 ret = __percpu_counter_sum(fbc, 0); return ret < 0 ? 0 : ret; } +static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) +{ + return __percpu_counter_sum(fbc, 1); +} + + static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc); + return __percpu_counter_sum(fbc, 0); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 71b265c330ce..dba1530a5b29 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc) +s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) { s64 ret; int cpu; @@ -62,9 +62,11 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; - *pcount = 0; + if (set) + *pcount = 0; } - fbc->count = ret; + if (set) + fbc->count = ret; spin_unlock(&fbc->lock); return ret; -- cgit v1.2.3 From 02d211688727ad02bb4555b1aa8ae2de16b21b39 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 9 Dec 2008 13:14:14 -0800 Subject: revert "percpu_counter: new function percpu_counter_sum_and_set" Revert commit e8ced39d5e8911c662d4d69a342b9d053eaaac4e Author: Mingming Cao Date: Fri Jul 11 19:27:31 2008 -0400 percpu_counter: new function percpu_counter_sum_and_set As described in revert "percpu counter: clean up percpu_counter_sum_and_set()" the new percpu_counter_sum_and_set() is racy against updates to the cpu-local accumulators on other CPUs. Revert that change. This means that ext4 will be slow again. But correct. Reported-by: Eric Dumazet Cc: "David S. Miller" Cc: Peter Zijlstra Cc: Mingming Cao Cc: Cc: [2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext4/balloc.c | 4 ++-- include/linux/percpu_counter.h | 12 +++--------- lib/percpu_counter.c | 7 +------ 3 files changed, 6 insertions(+), 17 deletions(-) (limited to 'fs') diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index c17f69bcd7dd..db35cfdb3c8b 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -609,8 +609,8 @@ int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks) if (free_blocks - (nblocks + root_blocks + dirty_blocks) < EXT4_FREEBLOCKS_WATERMARK) { - free_blocks = percpu_counter_sum_and_set(fbc); - dirty_blocks = percpu_counter_sum_and_set(dbc); + free_blocks = percpu_counter_sum_positive(fbc); + dirty_blocks = percpu_counter_sum_positive(dbc); if (dirty_blocks < 0) { printk(KERN_CRIT "Dirty block accounting " "went wrong %lld\n", diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h index 208388835357..9007ccdfc112 100644 --- a/include/linux/percpu_counter.h +++ b/include/linux/percpu_counter.h @@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount); void percpu_counter_destroy(struct percpu_counter *fbc); void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch); -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set); +s64 __percpu_counter_sum(struct percpu_counter *fbc); static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { @@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { - s64 ret = __percpu_counter_sum(fbc, 0); + s64 ret = __percpu_counter_sum(fbc); return ret < 0 ? 0 : ret; } -static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc) -{ - return __percpu_counter_sum(fbc, 1); -} - - static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { - return __percpu_counter_sum(fbc, 0); + return __percpu_counter_sum(fbc); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index dba1530a5b29..b255b939bc1b 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add); * Add up all the per-cpu counts, return the result. This is a more accurate * but much slower version of percpu_counter_read_positive() */ -s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) +s64 __percpu_counter_sum(struct percpu_counter *fbc) { s64 ret; int cpu; @@ -62,12 +62,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set) for_each_online_cpu(cpu) { s32 *pcount = per_cpu_ptr(fbc->counters, cpu); ret += *pcount; - if (set) - *pcount = 0; } - if (set) - fbc->count = ret; - spin_unlock(&fbc->lock); return ret; } -- cgit v1.2.3 From 49c50342c728344b79c8f9e8293637fe80ef5ad5 Mon Sep 17 00:00:00 2001 From: Matt Mackall Date: Tue, 9 Dec 2008 13:14:21 -0800 Subject: pagemap: fix 32-bit pagemap regression The large pages fix from bcf8039ed45 broke 32-bit pagemap by pulling the pagemap entry code out into a function with the wrong return type. Pagemap entries are 64 bits on all systems and unsigned long is only 32 bits on 32-bit systems. Signed-off-by: Matt Mackall Reported-by: Doug Graham Cc: Alexey Dobriyan Cc: Dave Hansen Cc: [2.6.26.x, 2.6.27.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b770c095e45c..3a8bdd7f5756 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -557,9 +557,9 @@ static u64 swap_pte_to_pagemap_entry(pte_t pte) return swp_type(e) | (swp_offset(e) << MAX_SWAPFILES_SHIFT); } -static unsigned long pte_to_pagemap_entry(pte_t pte) +static u64 pte_to_pagemap_entry(pte_t pte) { - unsigned long pme = 0; + u64 pme = 0; if (is_swap_pte(pte)) pme = PM_PFRAME(swap_pte_to_pagemap_entry(pte)) | PM_PSHIFT(PAGE_SHIFT) | PM_SWAP; -- cgit v1.2.3 From 6ee5a399d6a92a52646836a6e10faf255c16393e Mon Sep 17 00:00:00 2001 From: Dmitri Monakhov Date: Tue, 9 Dec 2008 13:14:26 -0800 Subject: inotify: fix IN_ONESHOT unmount event watcher On umount two event will be dispatched to watcher: 1: inotify_dev_queue_event(.., IN_UNMOUNT,..) 2: remove_watch(watch, dev) ->inotify_dev_queue_event(.., IN_IGNORED, ..) But if watcher has IN_ONESHOT bit set then the watcher will be released inside first event. Which result in accessing invalid object later. IMHO it is not pure regression. This bug wasn't triggered while initial inotify interface testing phase because of another bug in IN_ONESHOT handling logic :) commit ac74c00e499ed276a965e5b5600667d5dc04a84a Author: Ulisses Furquim Date: Fri Feb 8 04:18:16 2008 -0800 inotify: fix check for one-shot watches before destroying them As the IN_ONESHOT bit is never set when an event is sent we must check it in the watch's mask and not in the event's mask. TESTCASE: mkdir mnt mount -ttmpfs none mnt mkdir mnt/d ./inotify mnt/d& umount mnt ## << lockup or crash here TESTSOURCE: /* gcc -oinotify inotify.c */ #include #include #include int main(int argc, char **argv) { char buf[1024]; struct inotify_event *ie; char *p; int i; ssize_t l; p = argv[1]; i = inotify_init(); inotify_add_watch(i, p, ~0); l = read(i, buf, sizeof(buf)); printf("read %d bytes\n", l); ie = (struct inotify_event *) buf; printf("event mask: %d\n", ie->mask); return 0; } Signed-off-by: Dmitri Monakhov Cc: John McCutchan Cc: Al Viro Cc: Robert Love Cc: Ulisses Furquim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/inotify.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/inotify.c b/fs/inotify.c index 7bbed1b89825..dae3f28f30d4 100644 --- a/fs/inotify.c +++ b/fs/inotify.c @@ -428,11 +428,13 @@ void inotify_unmount_inodes(struct list_head *list) watches = &inode->inotify_watches; list_for_each_entry_safe(watch, next_w, watches, i_list) { struct inotify_handle *ih= watch->ih; + get_inotify_watch(watch); mutex_lock(&ih->mutex); ih->in_ops->handle_event(watch, watch->wd, IN_UNMOUNT, 0, NULL, NULL); inotify_remove_watch_locked(ih, watch); mutex_unlock(&ih->mutex); + put_inotify_watch(watch); } mutex_unlock(&inode->inotify_mutex); iput(inode); -- cgit v1.2.3 From 9c24624727f6d6c460e45762a408ca5f5b9b8ef2 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 9 Dec 2008 13:14:27 -0800 Subject: KSYM_SYMBOL_LEN fixes Miles Lane tailing /sys files hit a BUG which Pekka Enberg has tracked to my 966c8c12dc9e77f931e2281ba25d2f0244b06949 sprint_symbol(): use less stack exposing a bug in slub's list_locations() - kallsyms_lookup() writes a 0 to namebuf[KSYM_NAME_LEN-1], but that was beyond the end of page provided. The 100 slop which list_locations() allows at end of page looks roughly enough for all the other stuff it might print after the symbol before it checks again: break out KSYM_SYMBOL_LEN earlier than before. Latencytop and ftrace and are using KSYM_NAME_LEN buffers where they need KSYM_SYMBOL_LEN buffers, and vmallocinfo a 2*KSYM_NAME_LEN buffer where it wants a KSYM_SYMBOL_LEN buffer: fix those before anyone copies them. [akpm@linux-foundation.org: ftrace.h needs module.h] Signed-off-by: Hugh Dickins Cc: Christoph Lameter Cc Miles Lane Acked-by: Pekka Enberg Acked-by: Steven Rostedt Acked-by: Frederic Weisbecker Cc: Rusty Russell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 2 +- include/linux/ftrace.h | 3 ++- kernel/latencytop.c | 2 +- mm/slub.c | 2 +- mm/vmalloc.c | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..d4677603c889 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -371,7 +371,7 @@ static int lstats_show_proc(struct seq_file *m, void *v) task->latency_record[i].time, task->latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!task->latency_record[i].backtrace[q]) break; diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 703eb53cfa2b..9c5bc6be2b09 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #ifdef CONFIG_FUNCTION_TRACER @@ -231,7 +232,7 @@ ftrace_init_module(unsigned long *start, unsigned long *end) { } struct boot_trace { pid_t caller; - char func[KSYM_NAME_LEN]; + char func[KSYM_SYMBOL_LEN]; int result; unsigned long long duration; /* usecs */ ktime_t calltime; diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 5e7b45c56923..449db466bdbc 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -191,7 +191,7 @@ static int lstats_show(struct seq_file *m, void *v) latency_record[i].time, latency_record[i].max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { - char sym[KSYM_NAME_LEN]; + char sym[KSYM_SYMBOL_LEN]; char *c; if (!latency_record[i].backtrace[q]) break; diff --git a/mm/slub.c b/mm/slub.c index 749588a50a5a..a2cd47d89e0a 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3597,7 +3597,7 @@ static int list_locations(struct kmem_cache *s, char *buf, for (i = 0; i < t.count; i++) { struct location *l = &t.loc[i]; - if (len > PAGE_SIZE - 100) + if (len > PAGE_SIZE - KSYM_SYMBOL_LEN - 100) break; len += sprintf(buf + len, "%7ld ", l->count); diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f3f6e0758562..1ddb77ba3995 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1717,7 +1717,7 @@ static int s_show(struct seq_file *m, void *p) v->addr, v->addr + v->size, v->size); if (v->caller) { - char buff[2 * KSYM_NAME_LEN]; + char buff[KSYM_SYMBOL_LEN]; seq_putc(m, ' '); sprint_symbol(buff, (unsigned long)v->caller); -- cgit v1.2.3 From 83099bc647688d816c2f7fac8e51921bdfe8db73 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 5 Dec 2008 09:14:10 +0800 Subject: ocfs2: Always update xattr search when creating bucket. When we create xattr bucket during the process of xattr set, we always need to update the ocfs2_xattr_search since even if the bucket size is the same as block size, the offset will change because of the removal of the ocfs2_xattr_block header. Signed-off-by: Tao Ma Signed-off-by: Mark Fasheh --- fs/ocfs2/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 054e2efb0b7e..74d7367ade13 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -2645,9 +2645,9 @@ static int ocfs2_xattr_update_xattr_search(struct inode *inode, return ret; } - i = xs->here - old_xh->xh_entries; - xs->here = &xs->header->xh_entries[i]; } + i = xs->here - old_xh->xh_entries; + xs->here = &xs->header->xh_entries[i]; } return ret; -- cgit v1.2.3 From a97721894a6dc0f9ebfe1dbaa4bb112eaf399273 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Tue, 16 Dec 2008 18:10:18 -0800 Subject: ocfs2: Add JBD2 compat feature bit. Define the OCFS2_FEATURE_COMPAT_JBD2 bit in the filesystem header. Signed-off-by: Joel Becker Signed-off-by: Mark Fasheh --- fs/ocfs2/ocfs2_fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 5f180cf7abbd..5e0c0d0aef7d 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -86,7 +86,8 @@ #define OCFS2_CLEAR_INCOMPAT_FEATURE(sb,mask) \ OCFS2_SB(sb)->s_feature_incompat &= ~(mask) -#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB +#define OCFS2_FEATURE_COMPAT_SUPP (OCFS2_FEATURE_COMPAT_BACKUP_SB \ + | OCFS2_FEATURE_COMPAT_JBD2_SB) #define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ @@ -152,6 +153,11 @@ */ #define OCFS2_FEATURE_COMPAT_BACKUP_SB 0x0001 +/* + * The filesystem will correctly handle journal feature bits. + */ +#define OCFS2_FEATURE_COMPAT_JBD2_SB 0x0002 + /* * Unwritten extents support. */ -- cgit v1.2.3 From 331c31351044888916805c9cb32d8bb9e40c12e9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 17 Dec 2008 06:31:53 -0500 Subject: cifs: fix buffer overrun in parse_DFS_referrals While testing a kernel with memory poisoning enabled, I saw some warnings about the redzone getting clobbered when chasing DFS referrals. The buffer allocation for the unicode converted version of the searchName is too small and needs to take null termination into account. Signed-off-by: Jeff Layton Acked-by: Steve French Signed-off-by: Linus Torvalds --- fs/cifs/cifssmb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 2af8626ced43..6d51696dc762 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3983,7 +3983,8 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, node->flags = le16_to_cpu(pSMBr->DFSFlags); if (is_unicode) { - __le16 *tmp = kmalloc(strlen(searchName)*2, GFP_KERNEL); + __le16 *tmp = kmalloc(strlen(searchName)*2 + 2, + GFP_KERNEL); cifsConvertToUCS((__le16 *) tmp, searchName, PATH_MAX, nls_codepage, remap); node->path_consumed = hostlen_fromUCS(tmp, -- cgit v1.2.3