diff options
author | David Woodhouse <dwmw2@infradead.org> | 2007-10-13 14:43:54 +0100 |
---|---|---|
committer | David Woodhouse <dwmw2@infradead.org> | 2007-10-13 14:43:54 +0100 |
commit | b160292cc216a50fd0cd386b0bda2cd48352c73b (patch) | |
tree | ef07cf98f91353ee4c9ec1e1ca7a2a5d9d4b538a /fs | |
parent | b37bde147890c8fea8369a5a4e230dabdea4ebfb (diff) | |
parent | bbf25010f1a6b761914430f5fca081ec8c7accd1 (diff) |
Merge Linux 2.6.23
Diffstat (limited to 'fs')
61 files changed, 698 insertions, 521 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c index 08fa320b7e6d..15e05a15b575 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -92,23 +92,6 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry) return fid; } -struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry) -{ - struct p9_fid *fid; - struct v9fs_dentry *dent; - - dent = dentry->d_fsdata; - fid = v9fs_fid_lookup(dentry); - if (!IS_ERR(fid)) { - spin_lock(&dent->lock); - list_del(&fid->dlist); - spin_unlock(&dent->lock); - } - - return fid; -} - - /** * v9fs_fid_clone - lookup the fid for a dentry, clone a private copy and * release it diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 47a0ba742872..26e07df783b9 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -28,6 +28,5 @@ struct v9fs_dentry { }; struct p9_fid *v9fs_fid_lookup(struct dentry *dentry); -struct p9_fid *v9fs_fid_lookup_remove(struct dentry *dentry); struct p9_fid *v9fs_fid_clone(struct dentry *dentry); int v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid); diff --git a/fs/Kconfig b/fs/Kconfig index 84fb8428c023..bb02b39380a3 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -441,9 +441,6 @@ config OCFS2_FS Note: Features which OCFS2 does not support yet: - extended attributes - - shared writeable mmap - - loopback is supported, but data written will not - be cluster coherent. - quotas - cluster aware flock - Directory change notification (F_NOTIFY) diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index a3684dcc76e7..6f8c96fb29eb 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -235,8 +235,8 @@ static void *afs_mntpt_follow_link(struct dentry *dentry, struct nameidata *nd) err = do_add_mount(newmnt, nd, MNT_SHRINKABLE, &afs_vfsmounts); switch (err) { case 0: - mntput(nd->mnt); dput(nd->dentry); + mntput(nd->mnt); nd->mnt = newmnt; nd->dentry = dget(newmnt->mnt_root); schedule_delayed_work(&afs_mntpt_expiry_timer, @@ -1562,6 +1562,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, fput(file); return -EAGAIN; } + req->ki_filp = file; if (iocb->aio_flags & IOCB_FLAG_RESFD) { /* * If the IOCB_FLAG_RESFD flag of aio_flags is set, get an @@ -1576,7 +1577,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, } } - req->ki_filp = file; ret = put_user(req->ki_key, &user_iocb->aio_key); if (unlikely(ret)) { dprintk("EFAULT: aio_key\n"); diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 861141b4f6d6..fcb3405bb14e 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -742,6 +742,7 @@ static int load_flat_file(struct linux_binprm * bprm, * __start to address 4 so that is okay). */ if (rev > OLD_FLAT_VERSION) { + unsigned long persistent = 0; for (i=0; i < relocs; i++) { unsigned long addr, relval; @@ -749,6 +750,8 @@ static int load_flat_file(struct linux_binprm * bprm, relocated (of course, the address has to be relocated first). */ relval = ntohl(reloc[i]); + if (flat_set_persistent (relval, &persistent)) + continue; addr = flat_get_relocate_addr(relval); rp = (unsigned long *) calc_reloc(addr, libinfo, id, 1); if (rp == (unsigned long *)RELOC_FAILED) { @@ -757,7 +760,8 @@ static int load_flat_file(struct linux_binprm * bprm, } /* Get the pointer's value. */ - addr = flat_get_addr_from_rp(rp, relval, flags); + addr = flat_get_addr_from_rp(rp, relval, flags, + &persistent); if (addr != 0) { /* * Do the relocation. PIC relocs in the data section are diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index a6c9078af124..37310b0e8107 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -2311,8 +2311,10 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar struct iwreq __user *iwr_u; struct iw_point __user *iwp; struct compat_iw_point __user *iwp_u; - compat_caddr_t pointer; + compat_caddr_t pointer_u; + void __user *pointer; __u16 length, flags; + int ret; iwr_u = compat_ptr(arg); iwp_u = (struct compat_iw_point __user *) &iwr_u->u.data; @@ -2330,17 +2332,29 @@ static int do_wireless_ioctl(unsigned int fd, unsigned int cmd, unsigned long ar sizeof(iwr->ifr_ifrn.ifrn_name))) return -EFAULT; - if (__get_user(pointer, &iwp_u->pointer) || + if (__get_user(pointer_u, &iwp_u->pointer) || __get_user(length, &iwp_u->length) || __get_user(flags, &iwp_u->flags)) return -EFAULT; - if (__put_user(compat_ptr(pointer), &iwp->pointer) || + if (__put_user(compat_ptr(pointer_u), &iwp->pointer) || __put_user(length, &iwp->length) || __put_user(flags, &iwp->flags)) return -EFAULT; - return sys_ioctl(fd, cmd, (unsigned long) iwr); + ret = sys_ioctl(fd, cmd, (unsigned long) iwr); + + if (__get_user(pointer, &iwp->pointer) || + __get_user(length, &iwp->length) || + __get_user(flags, &iwp->flags)) + return -EFAULT; + + if (__put_user(ptr_to_compat(pointer), &iwp_u->pointer) || + __put_user(length, &iwp_u->length) || + __put_user(flags, &iwp_u->flags)) + return -EFAULT; + + return ret; } /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE @@ -3176,6 +3190,8 @@ COMPATIBLE_IOCTL(SIOCSIWRETRY) COMPATIBLE_IOCTL(SIOCGIWRETRY) COMPATIBLE_IOCTL(SIOCSIWPOWER) COMPATIBLE_IOCTL(SIOCGIWPOWER) +COMPATIBLE_IOCTL(SIOCSIWAUTH) +COMPATIBLE_IOCTL(SIOCGIWAUTH) /* hiddev */ COMPATIBLE_IOCTL(HIDIOCGVERSION) COMPATIBLE_IOCTL(HIDIOCAPPLICATION) diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index 5d40ad13ab5c..131954b3fb98 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -357,10 +357,6 @@ static struct dentry *ecryptfs_lookup(struct inode *dir, struct dentry *dentry, ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n"); goto out; } - if (special_file(lower_inode->i_mode)) { - ecryptfs_printk(KERN_DEBUG, "Is a special file; returning\n"); - goto out; - } if (!nd) { ecryptfs_printk(KERN_DEBUG, "We have a NULL nd, just leave" "as we *think* we are about to unlink\n"); diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index e4ab7bc14efe..fd3f94d4a668 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c @@ -834,7 +834,8 @@ static void ecryptfs_sync_page(struct page *page) ecryptfs_printk(KERN_DEBUG, "find_lock_page failed\n"); return; } - lower_page->mapping->a_ops->sync_page(lower_page); + if (lower_page->mapping->a_ops->sync_page) + lower_page->mapping->a_ops->sync_page(lower_page); ecryptfs_printk(KERN_DEBUG, "Unlocking page with index = [0x%.16x]\n", lower_page->index); unlock_page(lower_page); diff --git a/fs/exec.c b/fs/exec.c index c21a8cc06277..073b0b8c6d05 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -50,7 +50,6 @@ #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> -#include <linux/signalfd.h> #include <asm/uaccess.h> #include <asm/mmu_context.h> @@ -784,7 +783,6 @@ static int de_thread(struct task_struct *tsk) * and we can just re-use it all. */ if (atomic_read(&oldsighand->count) <= 1) { - signalfd_detach(tsk); exit_itimers(sig); return 0; } @@ -923,7 +921,6 @@ static int de_thread(struct task_struct *tsk) sig->flags = 0; no_thread_group: - signalfd_detach(tsk); exit_itimers(sig); if (leader) release_task(leader); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 1586807b8177..c1fa1908dba0 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -140,7 +140,8 @@ struct dx_frame struct dx_map_entry { u32 hash; - u32 offs; + u16 offs; + u16 size; }; #ifdef CONFIG_EXT3_INDEX @@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir, entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); - assert(dx_get_limit(entries) == dx_root_limit(dir, - root->info.info_length)); + + if (dx_get_limit(entries) != dx_root_limit(dir, + root->info.info_length)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != root limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + dxtrace (printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); - assert (count && count <= dx_get_limit(entries)); + if (!count || count > dx_get_limit(entries)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: no count or count > limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } + p = entries + 1; q = entries + count - 1; while (p <= q) @@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; - assert (dx_get_limit(entries) == dx_node_limit (dir)); + if (dx_get_limit(entries) != dx_node_limit (dir)) { + ext3_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != node limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } frame++; + frame->bh = NULL; } fail2: while (frame >= frame_in) { @@ -432,6 +455,10 @@ fail2: frame--; } fail: + if (*err == ERR_BAD_DX_DIR) + ext3_warning(dir->i_sb, __FUNCTION__, + "Corrupt dir inode %ld, running e2fsck is " + "recommended.", dir->i_ino); return NULL; } @@ -671,6 +698,10 @@ errout: * Directory block splitting, compacting */ +/* + * Create map of hash values, offsets, and sizes, stored at end of block. + * Returns number of entries mapped. + */ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { @@ -684,7 +715,8 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, ext3fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u32) ((char *) de - base); + map_tail->offs = (u16) ((char *) de - base); + map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); } @@ -694,6 +726,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size, return count; } +/* Sort map by hash value */ static void dx_sort_map (struct dx_map_entry *map, unsigned count) { struct dx_map_entry *p, *q, *top = map + count - 1; @@ -1091,6 +1124,10 @@ static inline void ext3_set_de_type(struct super_block *sb, } #ifdef CONFIG_EXT3_INDEX +/* + * Move count entries from end of map between two memory locations. + * Returns pointer to last entry moved. + */ static struct ext3_dir_entry_2 * dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) { @@ -1109,6 +1146,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) return (struct ext3_dir_entry_2 *) (to - rec_len); } +/* + * Compact each dir entry in the range to the minimal rec_len. + * Returns pointer to last entry in range. + */ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) { struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base; @@ -1131,6 +1172,11 @@ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size) return prev; } +/* + * Split a full leaf block to make room for a new dir entry. + * Allocate a new block, and move entries so that they are approx. equally full. + * Returns pointer to de in block into which the new entry will be inserted. + */ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo, int *error) @@ -1142,7 +1188,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; - unsigned split; + unsigned split, move, size, i; struct ext3_dir_entry_2 *de = NULL, *de2; int err = 0; @@ -1170,8 +1216,19 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, count = dx_make_map ((struct ext3_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - split = count/2; // need to adjust to actual middle dx_sort_map (map, count); + /* Split the existing block in the middle, size-wise */ + size = 0; + move = 0; + for (i = count-1; i >= 0; i--) { + /* is more than half of this entry in 2nd half of the block? */ + if (size + map[i].size/2 > blocksize/2) + break; + size += map[i].size; + move++; + } + /* map index at which we will split */ + split = count - move; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 22cfdd61c060..9537316a0714 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2578,8 +2578,11 @@ static int ext3_release_dquot(struct dquot *dquot) handle = ext3_journal_start(dquot_to_inode(dquot), EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + /* Release dquot anyway to avoid endless cycle in dqput() */ + dquot_release(dquot); return PTR_ERR(handle); + } ret = dquot_release(dquot); err = ext3_journal_stop(handle); if (!ret) @@ -2712,6 +2715,12 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); + if (!handle) { + printk(KERN_WARNING "EXT3-fs: Quota write (off=%Lu, len=%Lu)" + " cancelled because transaction is not started.\n", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index da224974af78..5fdb862e71c4 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -140,7 +140,8 @@ struct dx_frame struct dx_map_entry { u32 hash; - u32 offs; + u16 offs; + u16 size; }; #ifdef CONFIG_EXT4_INDEX @@ -379,13 +380,28 @@ dx_probe(struct dentry *dentry, struct inode *dir, entries = (struct dx_entry *) (((char *)&root->info) + root->info.info_length); - assert(dx_get_limit(entries) == dx_root_limit(dir, - root->info.info_length)); + + if (dx_get_limit(entries) != dx_root_limit(dir, + root->info.info_length)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != root limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail; + } + dxtrace (printk("Look up %x", hash)); while (1) { count = dx_get_count(entries); - assert (count && count <= dx_get_limit(entries)); + if (!count || count > dx_get_limit(entries)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: no count or count > limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } + p = entries + 1; q = entries + count - 1; while (p <= q) @@ -423,8 +439,15 @@ dx_probe(struct dentry *dentry, struct inode *dir, if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) goto fail2; at = entries = ((struct dx_node *) bh->b_data)->entries; - assert (dx_get_limit(entries) == dx_node_limit (dir)); + if (dx_get_limit(entries) != dx_node_limit (dir)) { + ext4_warning(dir->i_sb, __FUNCTION__, + "dx entry: limit != node limit"); + brelse(bh); + *err = ERR_BAD_DX_DIR; + goto fail2; + } frame++; + frame->bh = NULL; } fail2: while (frame >= frame_in) { @@ -432,6 +455,10 @@ fail2: frame--; } fail: + if (*err == ERR_BAD_DX_DIR) + ext4_warning(dir->i_sb, __FUNCTION__, + "Corrupt dir inode %ld, running e2fsck is " + "recommended.", dir->i_ino); return NULL; } @@ -671,6 +698,10 @@ errout: * Directory block splitting, compacting */ +/* + * Create map of hash values, offsets, and sizes, stored at end of block. + * Returns number of entries mapped. + */ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, struct dx_hash_info *hinfo, struct dx_map_entry *map_tail) { @@ -684,7 +715,8 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, ext4fs_dirhash(de->name, de->name_len, &h); map_tail--; map_tail->hash = h.hash; - map_tail->offs = (u32) ((char *) de - base); + map_tail->offs = (u16) ((char *) de - base); + map_tail->size = le16_to_cpu(de->rec_len); count++; cond_resched(); } @@ -694,6 +726,7 @@ static int dx_make_map (struct ext4_dir_entry_2 *de, int size, return count; } +/* Sort map by hash value */ static void dx_sort_map (struct dx_map_entry *map, unsigned count) { struct dx_map_entry *p, *q, *top = map + count - 1; @@ -1089,6 +1122,10 @@ static inline void ext4_set_de_type(struct super_block *sb, } #ifdef CONFIG_EXT4_INDEX +/* + * Move count entries from end of map between two memory locations. + * Returns pointer to last entry moved. + */ static struct ext4_dir_entry_2 * dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) { @@ -1107,6 +1144,10 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count) return (struct ext4_dir_entry_2 *) (to - rec_len); } +/* + * Compact each dir entry in the range to the minimal rec_len. + * Returns pointer to last entry in range. + */ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) { struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base; @@ -1129,6 +1170,11 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size) return prev; } +/* + * Split a full leaf block to make room for a new dir entry. + * Allocate a new block, and move entries so that they are approx. equally full. + * Returns pointer to de in block into which the new entry will be inserted. + */ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, struct buffer_head **bh,struct dx_frame *frame, struct dx_hash_info *hinfo, int *error) @@ -1140,7 +1186,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, u32 hash2; struct dx_map_entry *map; char *data1 = (*bh)->b_data, *data2; - unsigned split; + unsigned split, move, size, i; struct ext4_dir_entry_2 *de = NULL, *de2; int err = 0; @@ -1168,8 +1214,19 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, count = dx_make_map ((struct ext4_dir_entry_2 *) data1, blocksize, hinfo, map); map -= count; - split = count/2; // need to adjust to actual middle dx_sort_map (map, count); + /* Split the existing block in the middle, size-wise */ + size = 0; + move = 0; + for (i = count-1; i >= 0; i--) { + /* is more than half of this entry in 2nd half of the block? */ + if (size + map[i].size/2 > blocksize/2) + break; + size += map[i].size; + move++; + } + /* map index at which we will split */ + split = count - move; hash2 = map[split].hash; continued = hash2 == map[split - 1].hash; dxtrace(printk("Split block %i at %x, %i/%i\n", diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4550b83ab1c9..3c1397fa83df 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2698,8 +2698,11 @@ static int ext4_release_dquot(struct dquot *dquot) handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (IS_ERR(handle)) + if (IS_ERR(handle)) { + /* Release dquot anyway to avoid endless cycle in dqput() */ + dquot_release(dquot); return PTR_ERR(handle); + } ret = dquot_release(dquot); err = ext4_journal_stop(handle); if (!ret) @@ -2832,6 +2835,12 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, struct buffer_head *bh; handle_t *handle = journal_current_handle(); + if (!handle) { + printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" + " cancelled because transaction is not started.\n", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index c848a191525d..950c2fbb815b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -82,14 +82,19 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) int ret; /* - * vma alignment has already been checked by prepare_hugepage_range. - * If you add any error returns here, do so after setting VM_HUGETLB, - * so is_vm_hugetlb_page tests below unmap_region go the right way - * when do_mmap_pgoff unwinds (may be important on powerpc and ia64). + * vma address alignment (but not the pgoff alignment) has + * already been checked by prepare_hugepage_range. If you add + * any error returns here, do so after setting VM_HUGETLB, so + * is_vm_hugetlb_page tests below unmap_region go the right + * way when do_mmap_pgoff unwinds (may be important on powerpc + * and ia64). */ vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; + if (vma->vm_pgoff & ~(HPAGE_MASK >> PAGE_SHIFT)) + return -EINVAL; + vma_len = (loff_t)(vma->vm_end - vma->vm_start); mutex_lock(&inode->i_mutex); @@ -132,7 +137,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -ENOMEM; if (flags & MAP_FIXED) { - if (prepare_hugepage_range(addr, len, pgoff)) + if (prepare_hugepage_range(addr, len)) return -EINVAL; return addr; } diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index dd64ddc11d43..ed85f9afdbc8 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -647,7 +647,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, struct inode *inode = OFNI_EDONI_2SFFJ(f); struct page *pg; - pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, + pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, (void *)jffs2_do_readpage_unlock, inode); if (IS_ERR(pg)) return (void *)pg; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index a21e4bc5444b..d120ec39bcb0 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -171,19 +171,14 @@ found: * GRANTED_RES message by cookie, without having to rely on the client's IP * address. --okir */ -static inline struct nlm_block * -nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_file *file, - struct nlm_lock *lock, struct nlm_cookie *cookie) +static struct nlm_block * +nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host, + struct nlm_file *file, struct nlm_lock *lock, + struct nlm_cookie *cookie) { struct nlm_block *block; - struct nlm_host *host; struct nlm_rqst *call = NULL; - /* Create host handle for callback */ - host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len); - if (host == NULL) - return NULL; - call = nlm_alloc_call(host); if (call == NULL) return NULL; @@ -366,6 +361,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, struct nlm_lock *lock, int wait, struct nlm_cookie *cookie) { struct nlm_block *block = NULL; + struct nlm_host *host; int error; __be32 ret; @@ -377,6 +373,10 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, (long long)lock->fl.fl_end, wait); + /* Create host handle for callback */ + host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len); + if (host == NULL) + return nlm_lck_denied_nolocks; /* Lock file against concurrent access */ mutex_lock(&file->f_mutex); @@ -385,7 +385,8 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, */ block = nlmsvc_lookup_block(file, lock); if (block == NULL) { - block = nlmsvc_create_block(rqstp, file, lock, cookie); + block = nlmsvc_create_block(rqstp, nlm_get_host(host), file, + lock, cookie); ret = nlm_lck_denied_nolocks; if (block == NULL) goto out; @@ -449,6 +450,7 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, out: mutex_unlock(&file->f_mutex); nlmsvc_release_block(block); + nlm_release_host(host); dprintk("lockd: nlmsvc_lock returned %u\n", ret); return ret; } @@ -477,10 +479,17 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, if (block == NULL) { struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL); + struct nlm_host *host; if (conf == NULL) return nlm_granted; - block = nlmsvc_create_block(rqstp, file, lock, cookie); + /* Create host handle for callback */ + host = nlmsvc_lookup_host(rqstp, lock->caller, lock->len); + if (host == NULL) { + kfree(conf); + return nlm_lck_denied_nolocks; + } + block = nlmsvc_create_block(rqstp, host, file, lock, cookie); if (block == NULL) { kfree(conf); return nlm_granted; diff --git a/fs/locks.c b/fs/locks.c index 50857d2d3404..c795eaaf6c4c 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -782,7 +782,7 @@ find_conflict: if (request->fl_flags & FL_ACCESS) goto out; locks_copy_lock(new_fl, request); - locks_insert_lock(&inode->i_flock, new_fl); + locks_insert_lock(before, new_fl); new_fl = NULL; error = 0; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index a49f9feff776..a204484072f3 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -588,16 +588,6 @@ static int nfs_init_server(struct nfs_server *server, const struct nfs_mount_dat server->namelen = data->namlen; /* Create a client RPC handle for the NFSv3 ACL management interface */ nfs_init_server_aclclient(server); - if (clp->cl_nfsversion == 3) { - if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) - server->namelen = NFS3_MAXNAMLEN; - if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) - server->caps |= NFS_CAP_READDIRPLUS; - } else { - if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) - server->namelen = NFS2_MAXNAMLEN; - } - dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp); return 0; @@ -794,6 +784,16 @@ struct nfs_server *nfs_create_server(const struct nfs_mount_data *data, error = nfs_probe_fsinfo(server, mntfh, &fattr); if (error < 0) goto error; + if (server->nfs_client->rpc_ops->version == 3) { + if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) + server->namelen = NFS3_MAXNAMLEN; + if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) + server->caps |= NFS_CAP_READDIRPLUS; + } else { + if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN) + server->namelen = NFS2_MAXNAMLEN; + } + if (!(fattr.valid & NFS_ATTR_FATTR)) { error = server->nfs_client->rpc_ops->getattr(server, mntfh, &fattr); if (error < 0) { @@ -984,6 +984,9 @@ struct nfs_server *nfs4_create_server(const struct nfs4_mount_data *data, if (error < 0) goto error; + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + BUG_ON(!server->nfs_client); BUG_ON(!server->nfs_client->rpc_ops); BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); @@ -1056,6 +1059,9 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + dprintk("Referral FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); @@ -1115,6 +1121,9 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, if (error < 0) goto out_free_server; + if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) + server->namelen = NFS4_MAXNAMLEN; + dprintk("Cloned FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ea97408e423e..e4a04d16b8b0 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1162,6 +1162,8 @@ static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) } if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) return NULL; + if (name.len > NFS_SERVER(dir)->namelen) + return NULL; /* Note: caller is already holding the dir->i_mutex! */ dentry = d_alloc(parent, &name); if (dentry == NULL) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c87dc713b5d7..579cf8a7d4a7 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -316,7 +316,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset) if (offset != 0) return; /* Cancel any unstarted writes on this page */ - nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE); + nfs_wb_page_cancel(page->mapping->host, page); } static int nfs_release_page(struct page *page, gfp_t gfp) diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index d1cbf0a0fbb2..522e5ad4d8ad 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -175,6 +175,9 @@ next_component: path++; name.len = path - (const char *) name.name; + if (name.len > NFS4_MAXNAMLEN) + return -ENAMETOOLONG; + eat_dot_dir: while (*path == '/') path++; diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index aea76d0e5fbd..acfc56f9edc0 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -176,7 +176,7 @@ static void nfs_expire_automounts(struct work_struct *work) void nfs_release_automount_timer(void) { if (list_empty(&nfs_automount_list)) - cancel_delayed_work_sync(&nfs_automount_task); + cancel_delayed_work(&nfs_automount_task); } /* diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 62b3ae280310..4b90e17555a9 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -646,7 +646,7 @@ static int _nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state rcu_read_lock(); delegation = rcu_dereference(NFS_I(state->inode)->delegation); if (delegation != NULL && (delegation->flags & NFS_DELEGATION_NEED_RECLAIM) != 0) - delegation_type = delegation->flags; + delegation_type = delegation->type; rcu_read_unlock(); opendata->o_arg.u.delegation_type = delegation_type; status = nfs4_open_recover(opendata, state); @@ -1434,7 +1434,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) } res = d_add_unique(dentry, igrab(state->inode)); if (res != NULL) - dentry = res; + path.dentry = res; nfs4_intent_set_file(nd, &path, state); return res; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b2a851c1b8cb..b878528b64c1 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -345,8 +345,8 @@ void __exit unregister_nfs_fs(void) unregister_shrinker(&acl_shrinker); #ifdef CONFIG_NFS_V4 unregister_filesystem(&nfs4_fs_type); - nfs_unregister_sysctl(); #endif + nfs_unregister_sysctl(); unregister_filesystem(&nfs_fs_type); } @@ -911,13 +911,13 @@ static int nfs_parse_mount_options(char *raw, kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->flags &= ~NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_UDP; mnt->timeo = 7; mnt->retrans = 5; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->flags |= NFS_MOUNT_TCP; mnt->nfs_server.protocol = IPPROTO_TCP; mnt->timeo = 600; @@ -936,10 +936,10 @@ static int nfs_parse_mount_options(char *raw, kfree(string); switch (token) { - case Opt_udp: + case Opt_xprt_udp: mnt->mount_server.protocol = IPPROTO_UDP; break; - case Opt_tcp: + case Opt_xprt_tcp: mnt->mount_server.protocol = IPPROTO_TCP; break; default: @@ -1153,20 +1153,20 @@ static int nfs_validate_mount_data(struct nfs_mount_data **options, c = strchr(dev_name, ':'); if (c == NULL) return -EINVAL; - len = c - dev_name - 1; + len = c - dev_name; if (len > sizeof(data->hostname)) - return -EINVAL; + return -ENAMETOOLONG; strncpy(data->hostname, dev_name, len); args.nfs_server.hostname = data->hostname; c++; if (strlen(c) > NFS_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; args.nfs_server.export_path = c; status = nfs_try_mount(&args, mntfh); if (status) - return -EINVAL; + return status; /* * Translate to nfs_mount_data, which nfs_fill_super @@ -1303,34 +1303,6 @@ static void nfs_clone_super(struct super_block *sb, nfs_initialise_sb(sb); } -static int nfs_set_super(struct super_block *s, void *_server) -{ - struct nfs_server *server = _server; - int ret; - - s->s_fs_info = server; - ret = set_anon_super(s, server); - if (ret == 0) - server->s_dev = s->s_dev; - return ret; -} - -static int nfs_compare_super(struct super_block *sb, void *data) -{ - struct nfs_server *server = data, *old = NFS_SB(sb); - - if (memcmp(&old->nfs_client->cl_addr, - &server->nfs_client->cl_addr, - sizeof(old->nfs_client->cl_addr)) != 0) - return 0; - /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ - if (old->flags & NFS_MOUNT_UNSHARED) - return 0; - if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) - return 0; - return 1; -} - #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags) @@ -1359,9 +1331,46 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; - return 0; + return 1; Ebusy: - return -EBUSY; + return 0; +} + +struct nfs_sb_mountdata { + struct nfs_server *server; + int mntflags; +}; + +static int nfs_set_super(struct super_block *s, void *data) +{ + struct nfs_sb_mountdata *sb_mntdata = data; + struct nfs_server *server = sb_mntdata->server; + int ret; + + s->s_flags = sb_mntdata->mntflags; + s->s_fs_info = server; + ret = set_anon_super(s, server); + if (ret == 0) + server->s_dev = s->s_dev; + return ret; +} + +static int nfs_compare_super(struct super_block *sb, void *data) +{ + struct nfs_sb_mountdata *sb_mntdata = data; + struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb); + int mntflags = sb_mntdata->mntflags; + + if (memcmp(&old->nfs_client->cl_addr, + &server->nfs_client->cl_addr, + sizeof(old->nfs_client->cl_addr)) != 0) + return 0; + /* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */ + if (old->flags & NFS_MOUNT_UNSHARED) + return 0; + if (memcmp(&old->fsid, &server->fsid, sizeof(old->fsid)) != 0) + return 0; + return nfs_compare_mount_options(sb, server, mntflags); } static int nfs_get_sb(struct file_system_type *fs_type, @@ -1373,6 +1382,9 @@ static int nfs_get_sb(struct file_system_type *fs_type, struct nfs_mount_data *data = raw_data; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; int error; /* Validate the mount data */ @@ -1386,28 +1398,25 @@ static int nfs_get_sb(struct file_system_type *fs_type, error = PTR_ERR(server); goto out; } + sb_mntdata.server = server; if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; } if (s->s_fs_info != server) { - error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; - if (error < 0) - goto error_splat_super; } if (!s->s_root) { /* initial superblock/root creation */ - s->s_flags = flags; nfs_fill_super(s, data); } @@ -1460,6 +1469,9 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; int error; dprintk("--> nfs_xdev_get_sb()\n"); @@ -1470,28 +1482,25 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, error = PTR_ERR(server); goto out_err_noserver; } + sb_mntdata.server = server; if (server->flags & NFS_MOUNT_UNSHARED) compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; } if (s->s_fs_info != server) { - error = nfs_compare_mount_options(s, server, flags); nfs_free_server(server); server = NULL; - if (error < 0) - goto error_splat_super; } if (!s->s_root) { /* initial superblock/root creation */ - s->s_flags = flags; nfs_clone_super(s, data->sb); } @@ -1668,7 +1677,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, /* while calculating len, pretend ':' is '\0' */ len = c - dev_name; if (len > NFS4_MAXNAMLEN) - return -EINVAL; + return -ENAMETOOLONG; *hostname = kzalloc(len, GFP_KERNEL); if (*hostname == NULL) return -ENOMEM; @@ -1677,7 +1686,7 @@ static int nfs4_validate_mount_data(struct nfs4_mount_data **options, c++; /* step over the ':' */ len = strlen(c); if (len > NFS4_MAXPATHLEN) - return -EINVAL; + return -ENAMETOOLONG; *mntpath = kzalloc(len + 1, GFP_KERNEL); if (*mntpath == NULL) return -ENOMEM; @@ -1729,6 +1738,9 @@ static int nfs4_get_sb(struct file_system_type *fs_type, struct dentry *mntroot; char *mntpath = NULL, *hostname = NULL, *ip_addr = NULL; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; int error; /* Validate the mount data */ @@ -1744,12 +1756,13 @@ static int nfs4_get_sb(struct file_system_type *fs_type, error = PTR_ERR(server); goto out; } + sb_mntdata.server = server; if (server->flags & NFS4_MOUNT_UNSHARED) compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(fs_type, compare_super, nfs_set_super, server); + s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_free; @@ -1762,7 +1775,6 @@ static int nfs4_get_sb(struct file_system_type *fs_type, if (!s->s_root) { /* initial superblock/root creation */ - s->s_flags = flags; nfs4_fill_super(s); } @@ -1816,6 +1828,9 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, struct nfs_server *server; struct dentry *mntroot; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; int error; dprintk("--> nfs4_xdev_get_sb()\n"); @@ -1826,12 +1841,13 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, error = PTR_ERR(server); goto out_err_noserver; } + sb_mntdata.server = server; if (server->flags & NFS4_MOUNT_UNSHARED) compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1844,7 +1860,6 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ - s->s_flags = flags; nfs4_clone_super(s, data->sb); } @@ -1887,6 +1902,9 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, struct dentry *mntroot; struct nfs_fh mntfh; int (*compare_super)(struct super_block *, void *) = nfs_compare_super; + struct nfs_sb_mountdata sb_mntdata = { + .mntflags = flags, + }; int error; dprintk("--> nfs4_referral_get_sb()\n"); @@ -1897,12 +1915,13 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, error = PTR_ERR(server); goto out_err_noserver; } + sb_mntdata.server = server; if (server->flags & NFS4_MOUNT_UNSHARED) compare_super = NULL; /* Get a superblock - note that we may end up sharing one that already exists */ - s = sget(&nfs_fs_type, compare_super, nfs_set_super, server); + s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); if (IS_ERR(s)) { error = PTR_ERR(s); goto out_err_nosb; @@ -1915,7 +1934,6 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, if (!s->s_root) { /* initial superblock/root creation */ - s->s_flags = flags; nfs4_fill_super(s); } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index ef97e0c0f5b1..0d7a77cc394b 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1396,6 +1396,50 @@ out: return ret; } +int nfs_wb_page_cancel(struct inode *inode, struct page *page) +{ + struct nfs_page *req; + loff_t range_start = page_offset(page); + loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); + struct writeback_control wbc = { + .bdi = page->mapping->backing_dev_info, + .sync_mode = WB_SYNC_ALL, + .nr_to_write = LONG_MAX, + .range_start = range_start, + .range_end = range_end, + }; + int ret = 0; + + BUG_ON(!PageLocked(page)); + for (;;) { + req = nfs_page_find_request(page); + if (req == NULL) + goto out; + if (test_bit(PG_NEED_COMMIT, &req->wb_flags)) { + nfs_release_request(req); + break; + } + if (nfs_lock_request_dontget(req)) { + nfs_inode_remove_request(req); + /* + * In case nfs_inode_remove_request has marked the + * page as being dirty + */ + cancel_dirty_page(page, PAGE_CACHE_SIZE); + nfs_unlock_request(req); + break; + } + ret = nfs_wait_on_request(req); + if (ret < 0) + goto out; + } + if (!PagePrivate(page)) + return 0; + ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); +out: + return ret; +} + int nfs_wb_page_priority(struct inode *inode, struct page *page, int how) { loff_t range_start = page_offset(page); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 0eb464a39aae..7011d62acfc8 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -566,13 +566,23 @@ enum fsid_source fsid_source(struct svc_fh *fhp) case FSID_DEV: case FSID_ENCODE_DEV: case FSID_MAJOR_MINOR: - return FSIDSOURCE_DEV; + if (fhp->fh_export->ex_dentry->d_inode->i_sb->s_type->fs_flags + & FS_REQUIRES_DEV) + return FSIDSOURCE_DEV; + break; case FSID_NUM: - return FSIDSOURCE_FSID; - default: if (fhp->fh_export->ex_flags & NFSEXP_FSID) return FSIDSOURCE_FSID; - else - return FSIDSOURCE_UUID; + break; + default: + break; } + /* either a UUID type filehandle, or the filehandle doesn't + * match the export. + */ + if (fhp->fh_export->ex_flags & NFSEXP_FSID) + return FSIDSOURCE_FSID; + if (fhp->fh_export->ex_uuid) + return FSIDSOURCE_UUID; + return FSIDSOURCE_DEV; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index a0c2b253818b..7867151ebb83 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -115,7 +115,8 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, exp2 = rqst_exp_get_by_name(rqstp, mnt, mounts); if (IS_ERR(exp2)) { - err = PTR_ERR(exp2); + if (PTR_ERR(exp2) != -ENOENT) + err = PTR_ERR(exp2); dput(mounts); mntput(mnt); goto out; diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 4f517665c9a0..778a850b4634 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5602,6 +5602,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb, clusters_to_del; spin_unlock(&OCFS2_I(inode)->ip_lock); le32_add_cpu(&fe->i_clusters, -clusters_to_del); + inode->i_blocks = ocfs2_inode_sector_count(inode); status = ocfs2_trim_tree(inode, path, handle, tc, clusters_to_del, &delete_blk); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 460d440310f2..f37f25c931f5 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -855,6 +855,7 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, struct ocfs2_super *osb, loff_t pos, unsigned len, struct buffer_head *di_bh) { + u32 cend; struct ocfs2_write_ctxt *wc; wc = kzalloc(sizeof(struct ocfs2_write_ctxt), GFP_NOFS); @@ -862,7 +863,8 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp, return -ENOMEM; wc->w_cpos = pos >> osb->s_clustersize_bits; - wc->w_clen = ocfs2_clusters_for_bytes(osb->sb, len); + cend = (pos + len - 1) >> osb->s_clustersize_bits; + wc->w_clen = cend - wc->w_cpos + 1; get_bh(di_bh); wc->w_di_bh = di_bh; @@ -928,18 +930,11 @@ static void ocfs2_write_failure(struct inode *inode, loff_t user_pos, unsigned user_len) { int i; - unsigned from, to; + unsigned from = user_pos & (PAGE_CACHE_SIZE - 1), + to = user_pos + user_len; struct page *tmppage; - ocfs2_zero_new_buffers(wc->w_target_page, user_pos, user_len); - - if (wc->w_large_pages) { - from = wc->w_target_from; - to = wc->w_target_to; - } else { - from = 0; - to = PAGE_CACHE_SIZE; - } + ocfs2_zero_new_buffers(wc->w_target_page, from, to); for(i = 0; i < wc->w_num_pages; i++) { tmppage = wc->w_pages[i]; @@ -989,9 +984,6 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno, map_from = cluster_start; map_to = cluster_end; } - - wc->w_target_from = map_from; - wc->w_target_to = map_to; } else { /* * If we haven't allocated the new page yet, we @@ -1209,18 +1201,33 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping, loff_t pos, unsigned len) { int ret, i; + loff_t cluster_off; + unsigned int local_len = len; struct ocfs2_write_cluster_desc *desc; + struct ocfs2_super *osb = OCFS2_SB(mapping->host->i_sb); for (i = 0; i < wc->w_clen; i++) { desc = &wc->w_desc[i]; + /* + * We have to make sure that the total write passed in + * doesn't extend past a single cluster. + */ + local_len = len; + cluster_off = pos & (osb->s_clustersize - 1); + if ((cluster_off + local_len) > osb->s_clustersize) + local_len = osb->s_clustersize - cluster_off; + ret = ocfs2_write_cluster(mapping, desc->c_phys, desc->c_unwritten, data_ac, meta_ac, - wc, desc->c_cpos, pos, len); + wc, desc->c_cpos, pos, local_len); if (ret) { mlog_errno(ret); goto out; } + + len -= local_len; + pos += local_len; } ret = 0; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 4ffa715be09c..f3bc3658e7a5 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -314,7 +314,6 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, } i_size_write(inode, new_i_size); - inode->i_blocks = ocfs2_align_bytes_to_sectors(new_i_size); inode->i_ctime = inode->i_mtime = CURRENT_TIME; di = (struct ocfs2_dinode *) fe_bh->b_data; @@ -492,8 +491,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb, goto leave; } - status = ocfs2_claim_clusters(osb, handle, data_ac, 1, - &bit_off, &num_bits); + status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, + clusters_to_add, &bit_off, &num_bits); if (status < 0) { if (status != -ENOSPC) mlog_errno(status); diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 545f7892cdf3..d272847d5a07 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -514,8 +514,10 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, ac->ac_bh = osb->local_alloc_bh; status = 0; bail: - if (status < 0 && local_alloc_inode) + if (status < 0 && local_alloc_inode) { + mutex_unlock(&local_alloc_inode->i_mutex); iput(local_alloc_inode); + } mlog_exit(status); return status; @@ -524,13 +526,12 @@ bail: int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_alloc_context *ac, - u32 min_bits, + u32 bits_wanted, u32 *bit_off, u32 *num_bits) { int status, start; struct inode *local_alloc_inode; - u32 bits_wanted; void *bitmap; struct ocfs2_dinode *alloc; struct ocfs2_local_alloc *la; @@ -538,7 +539,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, mlog_entry_void(); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL); - bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; local_alloc_inode = ac->ac_inode; alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; la = OCFS2_LOCAL_ALLOC(alloc); diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 385a10152f9c..3f76631e110c 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h @@ -48,7 +48,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, handle_t *handle, struct ocfs2_alloc_context *ac, - u32 min_bits, + u32 bits_wanted, u32 *bit_off, u32 *num_bits); diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d9c5c9fcb30f..8f09f5235e3a 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1486,21 +1486,21 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, * contig. allocation, set to '1' to indicate we can deal with extents * of any size. */ -int ocfs2_claim_clusters(struct ocfs2_super *osb, - handle_t *handle, - struct ocfs2_alloc_context *ac, - u32 min_clusters, - u32 *cluster_start, - u32 *num_clusters) +int __ocfs2_claim_clusters(struct ocfs2_super *osb, + handle_t *handle, + struct ocfs2_alloc_context *ac, + u32 min_clusters, + u32 max_clusters, + u32 *cluster_start, + u32 *num_clusters) { int status; - unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; + unsigned int bits_wanted = max_clusters; u64 bg_blkno = 0; u16 bg_bit_off; mlog_entry_void(); - BUG_ON(!ac); BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted); BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL @@ -1557,6 +1557,19 @@ bail: return status; } +int ocfs2_claim_clusters(struct ocfs2_super *osb, + handle_t *handle, + struct ocfs2_alloc_context *ac, + u32 min_clusters, + u32 *cluster_start, + u32 *num_clusters) +{ + unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; + + return __ocfs2_claim_clusters(osb, handle, ac, min_clusters, + bits_wanted, cluster_start, num_clusters); +} + static inline int ocfs2_block_group_clear_bits(handle_t *handle, struct inode *alloc_inode, struct ocfs2_group_desc *bg, diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index f212dc01a84b..cafe93703095 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h @@ -85,6 +85,17 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb, u32 min_clusters, u32 *cluster_start, u32 *num_clusters); +/* + * Use this variant of ocfs2_claim_clusters to specify a maxiumum + * number of clusters smaller than the allocation reserved. + */ +int __ocfs2_claim_clusters(struct ocfs2_super *osb, + handle_t *handle, + struct ocfs2_alloc_context *ac, + u32 min_clusters, + u32 max_clusters, + u32 *cluster_start, + u32 *num_clusters); int ocfs2_free_suballoc_bits(handle_t *handle, struct inode *alloc_inode, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f2fc9a795deb..c034b5129c1e 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -81,8 +81,15 @@ static struct dentry *ocfs2_debugfs_root = NULL; MODULE_AUTHOR("Oracle"); MODULE_LICENSE("GPL"); +struct mount_options +{ + unsigned long mount_opt; + unsigned int atime_quantum; + signed short slot; +}; + static int ocfs2_parse_options(struct super_block *sb, char *options, - unsigned long *mount_opt, s16 *slot, + struct mount_options *mopt, int is_remount); static void ocfs2_put_super(struct super_block *sb); static int ocfs2_mount_volume(struct super_block *sb); @@ -367,24 +374,23 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) { int incompat_features; int ret = 0; - unsigned long parsed_options; - s16 slot; + struct mount_options parsed_options; struct ocfs2_super *osb = OCFS2_SB(sb); - if (!ocfs2_parse_options(sb, data, &parsed_options, &slot, 1)) { + if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) { ret = -EINVAL; goto out; } if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != - (parsed_options & OCFS2_MOUNT_HB_LOCAL)) { + (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); goto out; } if ((osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK) != - (parsed_options & OCFS2_MOUNT_DATA_WRITEBACK)) { + (parsed_options.mount_opt & OCFS2_MOUNT_DATA_WRITEBACK)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change data mode on remount\n"); goto out; @@ -435,7 +441,9 @@ unlock_osb: /* Only save off the new mount options in case of a successful * remount. */ - osb->s_mount_opt = parsed_options; + osb->s_mount_opt = parsed_options.mount_opt; + osb->s_atime_quantum = parsed_options.atime_quantum; + osb->preferred_slot = parsed_options.slot; } out: return ret; @@ -547,8 +555,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) { struct dentry *root; int status, sector_size; - unsigned long parsed_opt; - s16 slot; + struct mount_options parsed_options; struct inode *inode = NULL; struct ocfs2_super *osb = NULL; struct buffer_head *bh = NULL; @@ -556,14 +563,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) mlog_entry("%p, %p, %i", sb, data, silent); - if (!ocfs2_parse_options(sb, data, &parsed_opt, &slot, 0)) { + if (!ocfs2_parse_options(sb, data, &parsed_options, 0)) { status = -EINVAL; goto read_super_error; } /* for now we only have one cluster/node, make sure we see it * in the heartbeat universe */ - if (parsed_opt & OCFS2_MOUNT_HB_LOCAL) { + if (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL) { if (!o2hb_check_local_node_heartbeating()) { status = -EINVAL; goto read_super_error; @@ -585,8 +592,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) } brelse(bh); bh = NULL; - osb->s_mount_opt = parsed_opt; - osb->preferred_slot = slot; + osb->s_mount_opt = parsed_options.mount_opt; + osb->s_atime_quantum = parsed_options.atime_quantum; + osb->preferred_slot = parsed_options.slot; sb->s_magic = OCFS2_SUPER_MAGIC; @@ -728,8 +736,7 @@ static struct file_system_type ocfs2_fs_type = { static int ocfs2_parse_options(struct super_block *sb, char *options, - unsigned long *mount_opt, - s16 *slot, + struct mount_options *mopt, int is_remount) { int status; @@ -738,8 +745,9 @@ static int ocfs2_parse_options(struct super_block *sb, mlog_entry("remount: %d, options: \"%s\"\n", is_remount, options ? options : "(none)"); - *mount_opt = 0; - *slot = OCFS2_INVALID_SLOT; + mopt->mount_opt = 0; + mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; + mopt->slot = OCFS2_INVALID_SLOT; if (!options) { status = 1; @@ -749,7 +757,6 @@ static int ocfs2_parse_options(struct super_block *sb, while ((p = strsep(&options, ",")) != NULL) { int token, option; substring_t args[MAX_OPT_ARGS]; - struct ocfs2_super * osb = OCFS2_SB(sb); if (!*p) continue; @@ -757,10 +764,10 @@ static int ocfs2_parse_options(struct super_block *sb, token = match_token(p, tokens, args); switch (token) { case Opt_hb_local: - *mount_opt |= OCFS2_MOUNT_HB_LOCAL; + mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; break; case Opt_hb_none: - *mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; + mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; break; case Opt_barrier: if (match_int(&args[0], &option)) { @@ -768,27 +775,27 @@ static int ocfs2_parse_options(struct super_block *sb, goto bail; } if (option) - *mount_opt |= OCFS2_MOUNT_BARRIER; + mopt->mount_opt |= OCFS2_MOUNT_BARRIER; else - *mount_opt &= ~OCFS2_MOUNT_BARRIER; + mopt->mount_opt &= ~OCFS2_MOUNT_BARRIER; break; case Opt_intr: - *mount_opt &= ~OCFS2_MOUNT_NOINTR; + mopt->mount_opt &= ~OCFS2_MOUNT_NOINTR; break; case Opt_nointr: - *mount_opt |= OCFS2_MOUNT_NOINTR; + mopt->mount_opt |= OCFS2_MOUNT_NOINTR; break; case Opt_err_panic: - *mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; + mopt->mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; break; case Opt_err_ro: - *mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; + mopt->mount_opt &= ~OCFS2_MOUNT_ERRORS_PANIC; break; case Opt_data_ordered: - *mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; + mopt->mount_opt &= ~OCFS2_MOUNT_DATA_WRITEBACK; break; case Opt_data_writeback: - *mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; + mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; break; case Opt_atime_quantum: if (match_int(&args[0], &option)) { @@ -796,9 +803,7 @@ static int ocfs2_parse_options(struct super_block *sb, goto bail; } if (option >= 0) - osb->s_atime_quantum = option; - else - osb->s_atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; + mopt->atime_quantum = option; break; case Opt_slot: option = 0; @@ -807,7 +812,7 @@ static int ocfs2_parse_options(struct super_block *sb, goto bail; } if (option) - *slot = (s16)option; + mopt->slot = (s16)option; break; default: mlog(ML_ERROR, diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 66a13ee63d4c..c05358538f2b 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c @@ -66,7 +66,7 @@ struct ocfs2_vote_msg { struct ocfs2_msg_hdr v_hdr; __be32 v_reserved1; -}; +} __attribute__ ((packed)); /* Responses are given these values to maintain backwards * compatibility with older ocfs2 versions */ @@ -78,7 +78,7 @@ struct ocfs2_response_msg { struct ocfs2_msg_hdr r_hdr; __be32 r_response; -}; +} __attribute__ ((packed)); struct ocfs2_vote_work { struct list_head w_list; diff --git a/fs/proc/array.c b/fs/proc/array.c index 965625a0977d..ee4814dd98f9 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -320,7 +320,21 @@ int proc_pid_status(struct task_struct *task, char *buffer) return buffer - orig; } -static clock_t task_utime(struct task_struct *p) +/* + * Use precise platform statistics if available: + */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING +static cputime_t task_utime(struct task_struct *p) +{ + return p->utime; +} + +static cputime_t task_stime(struct task_struct *p) +{ + return p->stime; +} +#else +static cputime_t task_utime(struct task_struct *p) { clock_t utime = cputime_to_clock_t(p->utime), total = utime + cputime_to_clock_t(p->stime); @@ -337,10 +351,10 @@ static clock_t task_utime(struct task_struct *p) } utime = (clock_t)temp; - return utime; + return clock_t_to_cputime(utime); } -static clock_t task_stime(struct task_struct *p) +static cputime_t task_stime(struct task_struct *p) { clock_t stime; @@ -349,10 +363,12 @@ static clock_t task_stime(struct task_struct *p) * the total, to make sure the total observed by userspace * grows monotonically - apps rely on that): */ - stime = nsec_to_clock_t(p->se.sum_exec_runtime) - task_utime(p); + stime = nsec_to_clock_t(p->se.sum_exec_runtime) - + cputime_to_clock_t(task_utime(p)); - return stime; + return clock_t_to_cputime(stime); } +#endif static int do_task_stat(struct task_struct *task, char *buffer, int whole) { @@ -368,8 +384,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; unsigned long min_flt = 0, maj_flt = 0; - cputime_t cutime, cstime; - clock_t utime, stime; + cputime_t cutime, cstime, utime, stime; unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; @@ -387,8 +402,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) sigemptyset(&sigign); sigemptyset(&sigcatch); - cutime = cstime = cputime_zero; - utime = stime = 0; + cutime = cstime = utime = stime = cputime_zero; rcu_read_lock(); if (lock_task_sighand(task, &flags)) { @@ -414,15 +428,15 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) do { min_flt += t->min_flt; maj_flt += t->maj_flt; - utime += task_utime(t); - stime += task_stime(t); + utime = cputime_add(utime, task_utime(t)); + stime = cputime_add(stime, task_stime(t)); t = next_thread(t); } while (t != task); min_flt += sig->min_flt; maj_flt += sig->maj_flt; - utime += cputime_to_clock_t(sig->utime); - stime += cputime_to_clock_t(sig->stime); + utime = cputime_add(utime, sig->utime); + stime = cputime_add(stime, sig->stime); } sid = signal_session(sig); @@ -471,8 +485,8 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) cmin_flt, maj_flt, cmaj_flt, - utime, - stime, + cputime_to_clock_t(utime), + cputime_to_clock_t(stime), cputime_to_clock_t(cutime), cputime_to_clock_t(cstime), priority, diff --git a/fs/proc/inode.c b/fs/proc/inode.c index a5b0dfd89a17..0e4d37c93eea 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -11,6 +11,7 @@ #include <linux/string.h> #include <linux/stat.h> #include <linux/completion.h> +#include <linux/poll.h> #include <linux/file.h> #include <linux/limits.h> #include <linux/init.h> @@ -232,7 +233,7 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *pts) { struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); - unsigned int rv = 0; + unsigned int rv = DEFAULT_POLLMASK; unsigned int (*poll)(struct file *, struct poll_table_struct *); spin_lock(&pde->pde_unload_lock); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 5b68dd3f191a..a005451930b7 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1915,8 +1915,11 @@ static int reiserfs_release_dquot(struct dquot *dquot) ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - if (ret) + if (ret) { + /* Release dquot anyway to avoid endless cycle in dqput() */ + dquot_release(dquot); goto out; + } ret = dquot_release(dquot); err = journal_end(&th, dquot->dq_sb, @@ -2067,6 +2070,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, size_t towrite = len; struct buffer_head tmp_bh, *bh; + if (!current->journal_info) { + printk(KERN_WARNING "reiserfs: Quota write (off=%Lu, len=%Lu)" + " cancelled because transaction is not started.\n", + (unsigned long long)off, (unsigned long long)len); + return -EIO; + } mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA); while (towrite > 0) { tocopy = sb->s_blocksize - offset < towrite ? @@ -2098,7 +2107,7 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, data += tocopy; blk++; } - out: +out: if (len == towrite) return err; if (inode->i_size < off + len - towrite) diff --git a/fs/select.c b/fs/select.c index a974082b0824..46dca31c607a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -26,8 +26,6 @@ #include <asm/uaccess.h> -#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM) - struct poll_table_page { struct poll_table_page * next; struct poll_table_entry * entry; diff --git a/fs/signalfd.c b/fs/signalfd.c index a8e293d30034..aefb0be07942 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c @@ -11,8 +11,10 @@ * Now using anonymous inode source. * Thanks to Oleg Nesterov for useful code review and suggestions. * More comments and suggestions from Arnd Bergmann. - * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> + * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br> * Retrieve multiple signals with one read() call + * Sun Jul 15, 2007: Davide Libenzi <davidel@xmailserver.org> + * Attach to the sighand only during read() and poll(). */ #include <linux/file.h> @@ -27,102 +29,12 @@ #include <linux/signalfd.h> struct signalfd_ctx { - struct list_head lnk; - wait_queue_head_t wqh; sigset_t sigmask; - struct task_struct *tsk; }; -struct signalfd_lockctx { - struct task_struct *tsk; - unsigned long flags; -}; - -/* - * Tries to acquire the sighand lock. We do not increment the sighand - * use count, and we do not even pin the task struct, so we need to - * do it inside an RCU read lock, and we must be prepared for the - * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand - * being detached. We return 0 if the sighand has been detached, or - * 1 if we were able to pin the sighand lock. - */ -static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk) -{ - struct sighand_struct *sighand = NULL; - - rcu_read_lock(); - lk->tsk = rcu_dereference(ctx->tsk); - if (likely(lk->tsk != NULL)) - sighand = lock_task_sighand(lk->tsk, &lk->flags); - rcu_read_unlock(); - - if (!sighand) - return 0; - - if (!ctx->tsk) { - unlock_task_sighand(lk->tsk, &lk->flags); - return 0; - } - - if (lk->tsk->tgid == current->tgid) - lk->tsk = current; - - return 1; -} - -static void signalfd_unlock(struct signalfd_lockctx *lk) -{ - unlock_task_sighand(lk->tsk, &lk->flags); -} - -/* - * This must be called with the sighand lock held. - */ -void signalfd_deliver(struct task_struct *tsk, int sig) -{ - struct sighand_struct *sighand = tsk->sighand; - struct signalfd_ctx *ctx, *tmp; - - BUG_ON(!sig); - list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) { - /* - * We use a negative signal value as a way to broadcast that the - * sighand has been orphaned, so that we can notify all the - * listeners about this. Remember the ctx->sigmask is inverted, - * so if the user is interested in a signal, that corresponding - * bit will be zero. - */ - if (sig < 0) { - if (ctx->tsk == tsk) { - ctx->tsk = NULL; - list_del_init(&ctx->lnk); - wake_up(&ctx->wqh); - } - } else { - if (!sigismember(&ctx->sigmask, sig)) - wake_up(&ctx->wqh); - } - } -} - -static void signalfd_cleanup(struct signalfd_ctx *ctx) -{ - struct signalfd_lockctx lk; - - /* - * This is tricky. If the sighand is gone, we do not need to remove - * context from the list, the list itself won't be there anymore. - */ - if (signalfd_lock(ctx, &lk)) { - list_del(&ctx->lnk); - signalfd_unlock(&lk); - } - kfree(ctx); -} - static int signalfd_release(struct inode *inode, struct file *file) { - signalfd_cleanup(file->private_data); + kfree(file->private_data); return 0; } @@ -130,23 +42,15 @@ static unsigned int signalfd_poll(struct file *file, poll_table *wait) { struct signalfd_ctx *ctx = file->private_data; unsigned int events = 0; - struct signalfd_lockctx lk; - poll_wait(file, &ctx->wqh, wait); + poll_wait(file, ¤t->sighand->signalfd_wqh, wait); - /* - * Let the caller get a POLLIN in this case, ala socket recv() when - * the peer disconnects. - */ - if (signalfd_lock(ctx, &lk)) { - if ((lk.tsk == current && - next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) || - next_signal(&lk.tsk->signal->shared_pending, - &ctx->sigmask) > 0) - events |= POLLIN; - signalfd_unlock(&lk); - } else + spin_lock_irq(¤t->sighand->siglock); + if (next_signal(¤t->pending, &ctx->sigmask) || + next_signal(¤t->signal->shared_pending, + &ctx->sigmask)) events |= POLLIN; + spin_unlock_irq(¤t->sighand->siglock); return events; } @@ -219,59 +123,46 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info, int nonblock) { ssize_t ret; - struct signalfd_lockctx lk; DECLARE_WAITQUEUE(wait, current); - if (!signalfd_lock(ctx, &lk)) - return 0; - - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); + spin_lock_irq(¤t->sighand->siglock); + ret = dequeue_signal(current, &ctx->sigmask, info); switch (ret) { case 0: if (!nonblock) break; ret = -EAGAIN; default: - signalfd_unlock(&lk); + spin_unlock_irq(¤t->sighand->siglock); return ret; } - add_wait_queue(&ctx->wqh, &wait); + add_wait_queue(¤t->sighand->signalfd_wqh, &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - ret = dequeue_signal(lk.tsk, &ctx->sigmask, info); - signalfd_unlock(&lk); + ret = dequeue_signal(current, &ctx->sigmask, info); if (ret != 0) break; if (signal_pending(current)) { ret = -ERESTARTSYS; break; } + spin_unlock_irq(¤t->sighand->siglock); schedule(); - ret = signalfd_lock(ctx, &lk); - if (unlikely(!ret)) { - /* - * Let the caller read zero byte, ala socket - * recv() when the peer disconnect. This test - * must be done before doing a dequeue_signal(), - * because if the sighand has been orphaned, - * the dequeue_signal() call is going to crash - * because ->sighand will be long gone. - */ - break; - } + spin_lock_irq(¤t->sighand->siglock); } + spin_unlock_irq(¤t->sighand->siglock); - remove_wait_queue(&ctx->wqh, &wait); + remove_wait_queue(¤t->sighand->signalfd_wqh, &wait); __set_current_state(TASK_RUNNING); return ret; } /* - * Returns either the size of a "struct signalfd_siginfo", or zero if the - * sighand we are attached to, has been orphaned. The "count" parameter - * must be at least the size of a "struct signalfd_siginfo". + * Returns a multiple of the size of a "struct signalfd_siginfo", or a negative + * error code. The "count" parameter must be at least the size of a + * "struct signalfd_siginfo". */ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) @@ -287,7 +178,6 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, return -EINVAL; siginfo = (struct signalfd_siginfo __user *) buf; - do { ret = signalfd_dequeue(ctx, &info, nonblock); if (unlikely(ret <= 0)) @@ -300,7 +190,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count, nonblock = 1; } while (--count); - return total ? total : ret; + return total ? total: ret; } static const struct file_operations signalfd_fops = { @@ -309,20 +199,13 @@ static const struct file_operations signalfd_fops = { .read = signalfd_read, }; -/* - * Create a file descriptor that is associated with our signal - * state. We can pass it around to others if we want to, but - * it will always be _our_ signal state. - */ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask) { int error; sigset_t sigmask; struct signalfd_ctx *ctx; - struct sighand_struct *sighand; struct file *file; struct inode *inode; - struct signalfd_lockctx lk; if (sizemask != sizeof(sigset_t) || copy_from_user(&sigmask, user_mask, sizeof(sigmask))) @@ -335,17 +218,7 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas if (!ctx) return -ENOMEM; - init_waitqueue_head(&ctx->wqh); ctx->sigmask = sigmask; - ctx->tsk = current->group_leader; - - sighand = current->sighand; - /* - * Add this fd to the list of signal listeners. - */ - spin_lock_irq(&sighand->siglock); - list_add_tail(&ctx->lnk, &sighand->signalfd_list); - spin_unlock_irq(&sighand->siglock); /* * When we call this, the initialization must be complete, since @@ -364,23 +237,18 @@ asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemas fput(file); return -EINVAL; } - /* - * We need to be prepared of the fact that the sighand this fd - * is attached to, has been detched. In that case signalfd_lock() - * will return 0, and we'll just skip setting the new mask. - */ - if (signalfd_lock(ctx, &lk)) { - ctx->sigmask = sigmask; - signalfd_unlock(&lk); - } - wake_up(&ctx->wqh); + spin_lock_irq(¤t->sighand->siglock); + ctx->sigmask = sigmask; + spin_unlock_irq(¤t->sighand->siglock); + + wake_up(¤t->sighand->signalfd_wqh); fput(file); } return ufd; err_fdalloc: - signalfd_cleanup(ctx); + kfree(ctx); return error; } diff --git a/fs/splice.c b/fs/splice.c index c010a72ca2d2..e95a36228863 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1224,6 +1224,33 @@ static long do_splice(struct file *in, loff_t __user *off_in, } /* + * Do a copy-from-user while holding the mmap_semaphore for reading, in a + * manner safe from deadlocking with simultaneous mmap() (grabbing mmap_sem + * for writing) and page faulting on the user memory pointed to by src. + * This assumes that we will very rarely hit the partial != 0 path, or this + * will not be a win. + */ +static int copy_from_user_mmap_sem(void *dst, const void __user *src, size_t n) +{ + int partial; + + pagefault_disable(); + partial = __copy_from_user_inatomic(dst, src, n); + pagefault_enable(); + + /* + * Didn't copy everything, drop the mmap_sem and do a faulting copy + */ + if (unlikely(partial)) { + up_read(¤t->mm->mmap_sem); + partial = copy_from_user(dst, src, n); + down_read(¤t->mm->mmap_sem); + } + + return partial; +} + +/* * Map an iov into an array of pages and offset/length tupples. With the * partial_page structure, we can map several non-contiguous ranges into * our ones pages[] map instead of splitting that operation into pieces. @@ -1236,31 +1263,26 @@ static int get_iovec_page_array(const struct iovec __user *iov, { int buffers = 0, error = 0; - /* - * It's ok to take the mmap_sem for reading, even - * across a "get_user()". - */ down_read(¤t->mm->mmap_sem); while (nr_vecs) { unsigned long off, npages; + struct iovec entry; void __user *base; size_t len; int i; - /* - * Get user address base and length for this iovec. - */ - error = get_user(base, &iov->iov_base); - if (unlikely(error)) - break; - error = get_user(len, &iov->iov_len); - if (unlikely(error)) + error = -EFAULT; + if (copy_from_user_mmap_sem(&entry, iov, sizeof(entry))) break; + base = entry.iov_base; + len = entry.iov_len; + /* * Sanity check this iovec. 0 read succeeds. */ + error = 0; if (unlikely(!len)) break; error = -EFAULT; diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 135353f8a296..5afe2a26f5d8 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c @@ -248,12 +248,7 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr) void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr) { - if (sysfs_hash_and_remove(kobj->sd, attr->attr.name) < 0) { - printk(KERN_ERR "%s: " - "bad dentry or inode or no such file: \"%s\"\n", - __FUNCTION__, attr->attr.name); - dump_stack(); - } + sysfs_hash_and_remove(kobj->sd, attr->attr.name); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 048e6054c2fd..83e76b3813c9 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -762,12 +762,15 @@ static int sysfs_count_nlink(struct sysfs_dirent *sd) static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { + struct dentry *ret = NULL; struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata; struct sysfs_dirent * sd; struct bin_attribute *bin_attr; struct inode *inode; int found = 0; + mutex_lock(&sysfs_mutex); + for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) { if (sysfs_type(sd) && !strcmp(sd->s_name, dentry->d_name.name)) { @@ -778,14 +781,14 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, /* no such entry */ if (!found) - return NULL; + goto out_unlock; /* attach dentry and inode */ inode = sysfs_get_inode(sd); - if (!inode) - return ERR_PTR(-ENOMEM); - - mutex_lock(&sysfs_mutex); + if (!inode) { + ret = ERR_PTR(-ENOMEM); + goto out_unlock; + } if (inode->i_state & I_NEW) { /* initialize inode according to type */ @@ -815,9 +818,9 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, sysfs_instantiate(dentry, inode); sysfs_attach_dentry(sd, dentry); + out_unlock: mutex_unlock(&sysfs_mutex); - - return NULL; + return ret; } const struct inode_operations sysfs_dir_inode_operations = { @@ -942,6 +945,8 @@ int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd, if (error) goto out_drop; + mutex_lock(&sysfs_mutex); + dup_name = sd->s_name; sd->s_name = new_name; @@ -949,8 +954,6 @@ int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd, d_add(new_dentry, NULL); d_move(sd->s_dentry, new_dentry); - mutex_lock(&sysfs_mutex); - sysfs_unlink_sibling(sd); sysfs_get(new_parent_sd); sysfs_put(sd->s_parent); diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c index 276f7207a564..87e87dcd3f9c 100644 --- a/fs/udf/balloc.c +++ b/fs/udf/balloc.c @@ -540,26 +540,24 @@ static void udf_table_free_blocks(struct super_block *sb, if (epos.offset + adsize > sb->s_blocksize) { loffset = epos.offset; aed->lengthAllocDescs = cpu_to_le32(adsize); - sptr = UDF_I_DATA(inode) + epos.offset - - udf_file_entry_alloc_offset(inode) + - UDF_I_LENEATTR(inode) - adsize; + sptr = UDF_I_DATA(table) + epos.offset - adsize; dptr = epos.bh->b_data + sizeof(struct allocExtDesc); memcpy(dptr, sptr, adsize); epos.offset = sizeof(struct allocExtDesc) + adsize; } else { loffset = epos.offset + adsize; aed->lengthAllocDescs = cpu_to_le32(0); - sptr = oepos.bh->b_data + epos.offset; - epos.offset = sizeof(struct allocExtDesc); - if (oepos.bh) { + sptr = oepos.bh->b_data + epos.offset; aed = (struct allocExtDesc *)oepos.bh->b_data; aed->lengthAllocDescs = cpu_to_le32(le32_to_cpu(aed->lengthAllocDescs) + adsize); } else { + sptr = UDF_I_DATA(table) + epos.offset; UDF_I_LENALLOC(table) += adsize; mark_inode_dirty(table); } + epos.offset = sizeof(struct allocExtDesc); } if (UDF_SB_UDFREV(sb) >= 0x0200) udf_new_tag(epos.bh->b_data, TAG_IDENT_AED, 3, 1, diff --git a/fs/udf/super.c b/fs/udf/super.c index 382be7be5ae3..c68a6e730b97 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -89,7 +89,7 @@ static int udf_find_fileset(struct super_block *, kernel_lb_addr *, static void udf_load_pvoldesc(struct super_block *, struct buffer_head *); static void udf_load_fileset(struct super_block *, struct buffer_head *, kernel_lb_addr *); -static void udf_load_partdesc(struct super_block *, struct buffer_head *); +static int udf_load_partdesc(struct super_block *, struct buffer_head *); static void udf_open_lvid(struct super_block *); static void udf_close_lvid(struct super_block *); static unsigned int udf_count_free(struct super_block *); @@ -877,7 +877,7 @@ static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, root->logicalBlockNum, root->partitionReferenceNum); } -static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) +static int udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) { struct partitionDesc *p; int i; @@ -912,6 +912,11 @@ static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table = udf_iget(sb, loc); + if (!UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table) { + udf_debug("cannot load unallocSpaceTable (part %d)\n", + i); + return 1; + } UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_UNALLOC_TABLE; udf_debug("unallocSpaceTable (part %d) @ %ld\n", i, UDF_SB_PARTMAPS(sb)[i].s_uspace.s_table->i_ino); @@ -938,6 +943,11 @@ static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table = udf_iget(sb, loc); + if (!UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table) { + udf_debug("cannot load freedSpaceTable (part %d)\n", + i); + return 1; + } UDF_SB_PARTFLAGS(sb,i) |= UDF_PART_FLAG_FREED_TABLE; udf_debug("freedSpaceTable (part %d) @ %ld\n", i, UDF_SB_PARTMAPS(sb)[i].s_fspace.s_table->i_ino); @@ -966,6 +976,7 @@ static void udf_load_partdesc(struct super_block *sb, struct buffer_head *bh) le16_to_cpu(p->partitionNumber), i, UDF_SB_PARTTYPE(sb,i), UDF_SB_PARTROOT(sb,i), UDF_SB_PARTLEN(sb,i)); } + return 0; } static int udf_load_logicalvol(struct super_block *sb, struct buffer_head *bh, @@ -1177,12 +1188,19 @@ static int udf_process_sequence(struct super_block *sb, long block, long lastblo udf_load_logicalvol(sb, bh, fileset); } else if (i == VDS_POS_PARTITION_DESC) { struct buffer_head *bh2 = NULL; - udf_load_partdesc(sb, bh); + if (udf_load_partdesc(sb, bh)) { + brelse(bh); + return 1; + } for (j = vds[i].block + 1; j < vds[VDS_POS_TERMINATING_DESC].block; j++) { bh2 = udf_read_tagged(sb, j, j, &ident); gd = (struct generic_desc *)bh2->b_data; if (ident == TAG_IDENT_PD) - udf_load_partdesc(sb, bh2); + if (udf_load_partdesc(sb, bh2)) { + brelse(bh); + brelse(bh2); + return 1; + } brelse(bh2); } } diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 73402c5eeb8a..38eb0b7a1f3d 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -894,7 +894,7 @@ magic_found: goto again; } - + sbi->s_flags = flags;/*after that line some functions use s_flags*/ ufs_print_super_stuff(sb, usb1, usb2, usb3); /* @@ -1025,8 +1025,6 @@ magic_found: UFS_MOUNT_UFSTYPE_44BSD) uspi->s_maxsymlinklen = fs32_to_cpu(sb, usb3->fs_un2.fs_44.fs_maxsymlinklen); - - sbi->s_flags = flags; inode = iget(sb, UFS_ROOTINO); if (!inode || is_bad_inode(inode)) diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h index b4acc7f3c374..e6ea293f303c 100644 --- a/fs/xfs/linux-2.6/kmem.h +++ b/fs/xfs/linux-2.6/kmem.h @@ -103,7 +103,7 @@ extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast); static inline int kmem_shake_allow(gfp_t gfp_mask) { - return (gfp_mask & __GFP_WAIT); + return (gfp_mask & __GFP_WAIT) != 0; } #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index fd4105d662e0..5f152f60d74d 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -181,6 +181,7 @@ xfs_setfilesize( ip->i_d.di_size = isize; ip->i_update_core = 1; ip->i_update_size = 1; + mark_inode_dirty_sync(vn_to_inode(ioend->io_vnode)); } xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -652,7 +653,7 @@ xfs_probe_cluster( for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - size_t pg_offset, len = 0; + size_t pg_offset, pg_len = 0; if (tindex == tlast) { pg_offset = @@ -665,16 +666,16 @@ xfs_probe_cluster( pg_offset = PAGE_CACHE_SIZE; if (page->index == tindex && !TestSetPageLocked(page)) { - len = xfs_probe_page(page, pg_offset, mapped); + pg_len = xfs_probe_page(page, pg_offset, mapped); unlock_page(page); } - if (!len) { + if (!pg_len) { done = 1; break; } - total += len; + total += pg_len; tindex++; } diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index bb72c3d4141f..81565dea9af7 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c @@ -46,7 +46,7 @@ xfs_param_t xfs_params = { .inherit_nosym = { 0, 0, 1 }, .rotorstep = { 1, 1, 255 }, .inherit_nodfrg = { 0, 1, 1 }, - .fstrm_timer = { 1, 50, 3600*100}, + .fstrm_timer = { 1, 30*100, 3600*100}, }; /* diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 4528f9a3f304..491d1f4f202d 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -415,8 +415,10 @@ xfs_fs_write_inode( if (vp) { vn_trace_entry(vp, __FUNCTION__, (inst_t *)__return_address); - if (sync) + if (sync) { + filemap_fdatawait(inode->i_mapping); flags |= FLUSH_SYNC; + } error = bhv_vop_iflush(vp, flags); if (error == EAGAIN) error = sync? bhv_vop_iflush(vp, flags | FLUSH_LOG) : 0; diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 2d274b23ade5..6ff0f4de1630 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c @@ -120,7 +120,8 @@ xfs_Gqm_init(void) * Initialize the dquot hash tables. */ udqhash = kmem_zalloc_greedy(&hsize, - XFS_QM_HASHSIZE_LOW, XFS_QM_HASHSIZE_HIGH, + XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t), + XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t), KM_SLEEP | KM_MAYFAIL | KM_LARGE); gdqhash = kmem_zalloc(hsize, KM_SLEEP | KM_LARGE); hsize /= sizeof(xfs_dqhash_t); diff --git a/fs/xfs/support/debug.h b/fs/xfs/support/debug.h index a27a7c8c0526..855da0408647 100644 --- a/fs/xfs/support/debug.h +++ b/fs/xfs/support/debug.h @@ -34,10 +34,10 @@ extern void cmn_err(int, char *, ...) extern void assfail(char *expr, char *f, int l); #define ASSERT_ALWAYS(expr) \ - (unlikely((expr) != 0) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifndef DEBUG -# define ASSERT(expr) ((void)0) +#define ASSERT(expr) ((void)0) #ifndef STATIC # define STATIC static noinline @@ -49,8 +49,10 @@ extern void assfail(char *expr, char *f, int l); #else /* DEBUG */ -# define ASSERT(expr) ASSERT_ALWAYS(expr) -# include <linux/random.h> +#include <linux/random.h> + +#define ASSERT(expr) \ + (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__)) #ifndef STATIC # define STATIC noinline diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index aea37df4aa62..26d09e2e1a7f 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1975,7 +1975,6 @@ xfs_da_do_buf( error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED); if (unlikely(error == EFSCORRUPTED)) { if (xfs_error_level >= XFS_ERRLEVEL_LOW) { - int i; cmn_err(CE_ALERT, "xfs_da_do_buf: bno %lld\n", (long long)bno); cmn_err(CE_ALERT, "dir: inode %lld\n", diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index ce2278611bb7..36d8f6aa11af 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -350,9 +350,10 @@ _xfs_filestream_update_ag( /* xfs_fstrm_free_func(): callback for freeing cached stream items. */ void xfs_fstrm_free_func( - xfs_ino_t ino, - fstrm_item_t *item) + unsigned long ino, + void *data) { + fstrm_item_t *item = (fstrm_item_t *)data; xfs_inode_t *ip = item->ip; int ref; @@ -438,7 +439,7 @@ xfs_filestream_mount( grp_count = 10; err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, - (xfs_mru_cache_free_func_t)xfs_fstrm_free_func); + xfs_fstrm_free_func); return err; } @@ -467,8 +468,7 @@ void xfs_filestream_flush( xfs_mount_t *mp) { - /* point in time flush, so keep the reaper running */ - xfs_mru_cache_flush(mp->m_filestream, 1); + xfs_mru_cache_flush(mp->m_filestream); } /* diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 9d4c4fbeb3ee..9bfb69e1e885 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -2185,13 +2185,13 @@ xlog_state_do_callback( } cb = iclog->ic_callback; - while (cb != 0) { + while (cb) { iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_callback = NULL; LOG_UNLOCK(log, s); /* perform callbacks in the order given */ - for (; cb != 0; cb = cb_next) { + for (; cb; cb = cb_next) { cb_next = cb->cb_next; cb->cb_func(cb->cb_arg, aborted); } @@ -2202,7 +2202,7 @@ xlog_state_do_callback( loopdidcallbacks++; funcdidcallbacks++; - ASSERT(iclog->ic_callback == 0); + ASSERT(iclog->ic_callback == NULL); if (!(iclog->ic_state & XLOG_STATE_IOERROR)) iclog->ic_state = XLOG_STATE_DIRTY; @@ -3242,10 +3242,10 @@ xlog_ticket_put(xlog_t *log, #else /* When we debug, it is easier if tickets are cycled */ ticket->t_next = NULL; - if (log->l_tail != 0) { + if (log->l_tail) { log->l_tail->t_next = ticket; } else { - ASSERT(log->l_freelist == 0); + ASSERT(log->l_freelist == NULL); log->l_freelist = ticket; } log->l_tail = ticket; @@ -3463,7 +3463,7 @@ xlog_verify_iclog(xlog_t *log, s = LOG_LOCK(log); icptr = log->l_iclog; for (i=0; i < log->l_iclog_bufs; i++) { - if (icptr == 0) + if (icptr == NULL) xlog_panic("xlog_verify_iclog: invalid ptr"); icptr = icptr->ic_next; } diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index fddbb091a86f..8ae6e8e5f3db 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1366,7 +1366,7 @@ xlog_recover_add_to_cont_trans( int old_len; item = trans->r_itemq; - if (item == 0) { + if (item == NULL) { /* finish copying rest of trans header */ xlog_recover_add_item(&trans->r_itemq); ptr = (xfs_caddr_t) &trans->r_theader + @@ -1412,7 +1412,7 @@ xlog_recover_add_to_trans( if (!len) return 0; item = trans->r_itemq; - if (item == 0) { + if (item == NULL) { ASSERT(*(uint *)dp == XFS_TRANS_HEADER_MAGIC); if (len == sizeof(xfs_trans_header_t)) xlog_recover_add_item(&trans->r_itemq); @@ -1467,12 +1467,12 @@ xlog_recover_unlink_tid( xlog_recover_t *tp; int found = 0; - ASSERT(trans != 0); + ASSERT(trans != NULL); if (trans == *q) { *q = (*q)->r_next; } else { tp = *q; - while (tp != 0) { + while (tp) { if (tp->r_next == trans) { found = 1; break; @@ -1495,7 +1495,7 @@ xlog_recover_insert_item_backq( xlog_recover_item_t **q, xlog_recover_item_t *item) { - if (*q == 0) { + if (*q == NULL) { item->ri_prev = item->ri_next = item; *q = item; } else { @@ -1899,7 +1899,7 @@ xlog_recover_do_reg_buffer( break; nbits = xfs_contig_bits(data_map, map_size, bit); ASSERT(nbits > 0); - ASSERT(item->ri_buf[i].i_addr != 0); + ASSERT(item->ri_buf[i].i_addr != NULL); ASSERT(item->ri_buf[i].i_len % XFS_BLI_CHUNK == 0); ASSERT(XFS_BUF_COUNT(bp) >= ((uint)bit << XFS_BLI_SHIFT)+(nbits<<XFS_BLI_SHIFT)); diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 7deb9e3cbbd3..e0b358c1c533 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -206,8 +206,11 @@ _xfs_mru_cache_list_insert( */ if (!_xfs_mru_cache_migrate(mru, now)) { mru->time_zero = now; - if (!mru->next_reap) - mru->next_reap = mru->grp_count * mru->grp_time; + if (!mru->queued) { + mru->queued = 1; + queue_delayed_work(xfs_mru_reap_wq, &mru->work, + mru->grp_count * mru->grp_time); + } } else { grp = (now - mru->time_zero) / mru->grp_time; grp = (mru->lru_grp + grp) % mru->grp_count; @@ -271,29 +274,26 @@ _xfs_mru_cache_reap( struct work_struct *work) { xfs_mru_cache_t *mru = container_of(work, xfs_mru_cache_t, work.work); - unsigned long now; + unsigned long now, next; ASSERT(mru && mru->lists); if (!mru || !mru->lists) return; mutex_spinlock(&mru->lock); - now = jiffies; - if (mru->reap_all || - (mru->next_reap && time_after(now, mru->next_reap))) { - if (mru->reap_all) - now += mru->grp_count * mru->grp_time * 2; - mru->next_reap = _xfs_mru_cache_migrate(mru, now); - _xfs_mru_cache_clear_reap_list(mru); + next = _xfs_mru_cache_migrate(mru, jiffies); + _xfs_mru_cache_clear_reap_list(mru); + + mru->queued = next; + if ((mru->queued > 0)) { + now = jiffies; + if (next <= now) + next = 0; + else + next -= now; + queue_delayed_work(xfs_mru_reap_wq, &mru->work, next); } - /* - * the process that triggered the reap_all is responsible - * for restating the periodic reap if it is required. - */ - if (!mru->reap_all) - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); - mru->reap_all = 0; mutex_spinunlock(&mru->lock, 0); } @@ -352,7 +352,7 @@ xfs_mru_cache_create( /* An extra list is needed to avoid reaping up to a grp_time early. */ mru->grp_count = grp_count + 1; - mru->lists = kmem_alloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); + mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP); if (!mru->lists) { err = ENOMEM; @@ -374,11 +374,6 @@ xfs_mru_cache_create( mru->grp_time = grp_time; mru->free_func = free_func; - /* start up the reaper event */ - mru->next_reap = 0; - mru->reap_all = 0; - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); - *mrup = mru; exit: @@ -394,35 +389,25 @@ exit: * Call xfs_mru_cache_flush() to flush out all cached entries, calling their * free functions as they're deleted. When this function returns, the caller is * guaranteed that all the free functions for all the elements have finished - * executing. - * - * While we are flushing, we stop the periodic reaper event from triggering. - * Normally, we want to restart this periodic event, but if we are shutting - * down the cache we do not want it restarted. hence the restart parameter - * where 0 = do not restart reaper and 1 = restart reaper. + * executing and the reaper is not running. */ void xfs_mru_cache_flush( - xfs_mru_cache_t *mru, - int restart) + xfs_mru_cache_t *mru) { if (!mru || !mru->lists) return; - cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); - mutex_spinlock(&mru->lock); - mru->reap_all = 1; - mutex_spinunlock(&mru->lock, 0); + if (mru->queued) { + mutex_spinunlock(&mru->lock, 0); + cancel_rearming_delayed_workqueue(xfs_mru_reap_wq, &mru->work); + mutex_spinlock(&mru->lock); + } - queue_work(xfs_mru_reap_wq, &mru->work.work); - flush_workqueue(xfs_mru_reap_wq); + _xfs_mru_cache_migrate(mru, jiffies + mru->grp_count * mru->grp_time); + _xfs_mru_cache_clear_reap_list(mru); - mutex_spinlock(&mru->lock); - WARN_ON_ONCE(mru->reap_all != 0); - mru->reap_all = 0; - if (restart) - queue_delayed_work(xfs_mru_reap_wq, &mru->work, mru->grp_time); mutex_spinunlock(&mru->lock, 0); } @@ -433,8 +418,7 @@ xfs_mru_cache_destroy( if (!mru || !mru->lists) return; - /* we don't want the reaper to restart here */ - xfs_mru_cache_flush(mru, 0); + xfs_mru_cache_flush(mru); kmem_free(mru->lists, mru->grp_count * sizeof(*mru->lists)); kmem_free(mru, sizeof(*mru)); diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 624fd10ee8e5..dd58ea1bbebe 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -32,11 +32,9 @@ typedef struct xfs_mru_cache unsigned int grp_time; /* Time period spanned by grps. */ unsigned int lru_grp; /* Group containing time zero. */ unsigned long time_zero; /* Time first element was added. */ - unsigned long next_reap; /* Time that the reaper should - next do something. */ - unsigned int reap_all; /* if set, reap all lists */ xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ struct delayed_work work; /* Workqueue data for reaping. */ + unsigned int queued; /* work has been queued */ } xfs_mru_cache_t; int xfs_mru_cache_init(void); @@ -44,7 +42,7 @@ void xfs_mru_cache_uninit(void); int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms, unsigned int grp_count, xfs_mru_cache_free_func_t free_func); -void xfs_mru_cache_flush(xfs_mru_cache_t *mru, int restart); +void xfs_mru_cache_flush(xfs_mru_cache_t *mru); void xfs_mru_cache_destroy(struct xfs_mru_cache *mru); int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, void *value); diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 1a5ad8cd97b0..603459229904 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1082,6 +1082,9 @@ xfs_fsync( if (XFS_FORCED_SHUTDOWN(ip->i_mount)) return XFS_ERROR(EIO); + if (flag & FSYNC_DATA) + filemap_fdatawait(vn_to_inode(XFS_ITOV(ip))->i_mapping); + /* * We always need to make sure that the required inode state * is safe on disk. The vnode might be clean but because @@ -3769,12 +3772,16 @@ xfs_inode_flush( sync_lsn = log->l_last_sync_lsn; GRANT_UNLOCK(log, s); - if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) <= 0)) - return 0; + if ((XFS_LSN_CMP(iip->ili_last_lsn, sync_lsn) > 0)) { + if (flags & FLUSH_SYNC) + log_flags |= XFS_LOG_SYNC; + error = xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (error) + return error; + } - if (flags & FLUSH_SYNC) - log_flags |= XFS_LOG_SYNC; - return xfs_log_force(mp, iip->ili_last_lsn, log_flags); + if (ip->i_update_core == 0) + return 0; } } @@ -3788,9 +3795,6 @@ xfs_inode_flush( if (flags & FLUSH_INODE) { int flush_flags; - if (xfs_ipincount(ip)) - return EAGAIN; - if (flags & FLUSH_SYNC) { xfs_ilock(ip, XFS_ILOCK_SHARED); xfs_iflock(ip); |