diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-10 19:30:08 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-10 19:30:08 +0200 |
commit | 3dd392a407d15250a501fa109cc1f93fee95ef85 (patch) | |
tree | c1faca3fa8bd0f7c8790b3e0887229b4a5a90e8b /fs | |
parent | b27a43c1e90582facad44de67d02bc9e9f900289 (diff) | |
parent | d403a6484f0341bf0624d17ece46f24f741b6a92 (diff) |
Merge branch 'linus' into x86/pat2
Conflicts:
arch/x86/mm/init_64.c
Diffstat (limited to 'fs')
-rw-r--r-- | fs/9p/vfs_inode.c | 3 | ||||
-rw-r--r-- | fs/bfs/dir.c | 2 | ||||
-rw-r--r-- | fs/dcache.c | 10 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/inotify_user.c | 27 | ||||
-rw-r--r-- | fs/ocfs2/aops.c | 2 | ||||
-rw-r--r-- | fs/partitions/check.c | 4 | ||||
-rw-r--r-- | fs/proc/generic.c | 4 | ||||
-rw-r--r-- | fs/proc/proc_misc.c | 12 | ||||
-rw-r--r-- | fs/ramfs/file-nommu.c | 2 | ||||
-rw-r--r-- | fs/splice.c | 3 | ||||
-rw-r--r-- | fs/ubifs/debug.c | 2 | ||||
-rw-r--r-- | fs/ubifs/dir.c | 2 | ||||
-rw-r--r-- | fs/ubifs/find.c | 1 | ||||
-rw-r--r-- | fs/ubifs/gc.c | 14 | ||||
-rw-r--r-- | fs/ubifs/super.c | 3 | ||||
-rw-r--r-- | fs/ubifs/tnc.c | 2 | ||||
-rw-r--r-- | fs/udf/file.c | 1 | ||||
-rw-r--r-- | fs/udf/ialloc.c | 44 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 4 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 20 | ||||
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 44 | ||||
-rw-r--r-- | fs/xfs/xfs_dfrag.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 94 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 62 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_vnodeops.c | 26 |
27 files changed, 203 insertions, 197 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c95295c65045..e83aa5ebe861 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, return NULL; error: - if (fid) - p9_client_clunk(fid); + p9_client_clunk(fid); return ERR_PTR(result); } diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 87ee5ccee348..ed8feb052df9 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, inode->i_ino); if (err) { inode_dec_link_count(inode); - iput(inode); mutex_unlock(&info->bfs_lock); + iput(inode); return err; } mutex_unlock(&info->bfs_lock); diff --git a/fs/dcache.c b/fs/dcache.c index 80e93956aced..e7a1a99b7464 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) if (dentry->d_parent != parent) goto next; + /* non-existing due to RCU? */ + if (d_unhashed(dentry)) + goto next; + /* * It is safe to compare names since d_move() cannot * change the qstr (protected by d_lock). @@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) goto next; } - if (!d_unhashed(dentry)) { - atomic_inc(&dentry->d_count); - found = dentry; - } + atomic_inc(&dentry->d_count); + found = dentry; spin_unlock(&dentry->d_lock); break; next: diff --git a/fs/exec.c b/fs/exec.c index 32993beecbe9..cecee501ce78 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm) tsk->active_mm = mm; activate_mm(active_mm, mm); task_unlock(tsk); - mm_update_next_owner(old_mm); arch_pick_mmap_layout(mm); if (old_mm) { up_read(&old_mm->mmap_sem); BUG_ON(active_mm != old_mm); + mm_update_next_owner(old_mm); mmput(old_mm); return 0; } diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 60249429a253..d85c7d931cdf 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -323,7 +323,7 @@ out: } /* - * remove_kevent - cleans up and ultimately frees the given kevent + * remove_kevent - cleans up the given kevent * * Caller must hold dev->ev_mutex. */ @@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev, dev->event_count--; dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; +} +/* + * free_kevent - frees the given kevent. + */ +static void free_kevent(struct inotify_kernel_event *kevent) +{ kfree(kevent->name); kmem_cache_free(event_cachep, kevent); } @@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev) struct inotify_kernel_event *kevent; kevent = inotify_dev_get_event(dev); remove_kevent(dev, kevent); + free_kevent(kevent); } } @@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf, dev = file->private_data; while (1) { - int events; prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); mutex_lock(&dev->ev_mutex); - events = !list_empty(&dev->events); - mutex_unlock(&dev->ev_mutex); - if (events) { + if (!list_empty(&dev->events)) { ret = 0; break; } + mutex_unlock(&dev->ev_mutex); if (file->f_flags & O_NONBLOCK) { ret = -EAGAIN; @@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf, if (ret) return ret; - mutex_lock(&dev->ev_mutex); while (1) { struct inotify_kernel_event *kevent; @@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, } break; } + remove_kevent(dev, kevent); + + /* + * Must perform the copy_to_user outside the mutex in order + * to avoid a lock order reversal with mmap_sem. + */ + mutex_unlock(&dev->ev_mutex); if (copy_to_user(buf, &kevent->event, event_size)) { ret = -EFAULT; @@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf, count -= kevent->event.len; } - remove_kevent(dev, kevent); + free_kevent(kevent); + + mutex_lock(&dev->ev_mutex); } mutex_unlock(&dev->ev_mutex); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 506c24fb5078..a53da1466277 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -594,7 +594,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, goto bail; } - if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { + if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { ocfs2_error(inode->i_sb, "Inode %llu has a hole at block %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7d6b34e201db..ecc3330972e5 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -499,9 +499,9 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) if (!size) continue; if (from + size > get_capacity(disk)) { - printk(KERN_ERR " %s: p%d exceeds device capacity\n", + printk(KERN_WARNING + "%s: p%d exceeds device capacity\n", disk->disk_name, p); - continue; } res = add_partition(disk, p, from, size, state->parts[p].flags); if (res) { diff --git a/fs/proc/generic.c b/fs/proc/generic.c index bca0f81eb687..7821589a17d5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -547,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp for (tmp = dir->subdir; tmp; tmp = tmp->next) if (strcmp(tmp->name, dp->name) == 0) { - printk(KERN_WARNING "proc_dir_entry '%s' already " - "registered\n", dp->name); + printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", + dir->name, dp->name); dump_stack(); break; } diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 00f10a2dcf12..29e20c6b1f7f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -183,6 +183,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "SReclaimable: %8lu kB\n" "SUnreclaim: %8lu kB\n" "PageTables: %8lu kB\n" +#ifdef CONFIG_QUICKLIST + "Quicklists: %8lu kB\n" +#endif "NFS_Unstable: %8lu kB\n" "Bounce: %8lu kB\n" "WritebackTmp: %8lu kB\n" @@ -190,8 +193,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, "Committed_AS: %8lu kB\n" "VmallocTotal: %8lu kB\n" "VmallocUsed: %8lu kB\n" - "VmallocChunk: %8lu kB\n" - "Quicklists: %8lu kB\n", + "VmallocChunk: %8lu kB\n", K(i.totalram), K(i.freeram), K(i.bufferram), @@ -216,6 +218,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(global_page_state(NR_SLAB_RECLAIMABLE)), K(global_page_state(NR_SLAB_UNRECLAIMABLE)), K(global_page_state(NR_PAGETABLE)), +#ifdef CONFIG_QUICKLIST + K(quicklist_total_size()), +#endif K(global_page_state(NR_UNSTABLE_NFS)), K(global_page_state(NR_BOUNCE)), K(global_page_state(NR_WRITEBACK_TEMP)), @@ -223,8 +228,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off, K(committed), (unsigned long)VMALLOC_TOTAL >> 10, vmi.used >> 10, - vmi.largest_chunk >> 10, - K(quicklist_total_size()) + vmi.largest_chunk >> 10 ); len += hugetlb_report_meminfo(page + len); diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 52312ec93ff4..5145cb9125af 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = { * size 0 on the assumption that it's going to be used for an mmap of shared * memory */ -static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) +int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) { struct pagevec lru_pvec; unsigned long npages, xpages, loop, limit; diff --git a/fs/splice.c b/fs/splice.c index 1bbc6f4bb09c..a1e701c27156 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -898,6 +898,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, if (unlikely(!(out->f_mode & FMODE_WRITE))) return -EBADF; + if (unlikely(out->f_flags & O_APPEND)) + return -EINVAL; + ret = rw_verify_area(WRITE, out, ppos, len); if (unlikely(ret < 0)) return ret; diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b9cb77473758..d7f7645779f2 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); for (i = 0; i < n; i++) printk(KERN_DEBUG "\t ino %llu\n", - le64_to_cpu(orph->inos[i])); + (unsigned long long)le64_to_cpu(orph->inos[i])); break; } default: diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 2b267c9a1806..526c01ec8003 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) while (1) { dbg_gen("feed '%s', ino %llu, new f_pos %#x", - dent->name, le64_to_cpu(dent->inum), + dent->name, (unsigned long long)le64_to_cpu(dent->inum), key_hash_flash(c, &dent->key)); ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index e045c8b55423..47814cde2407 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c @@ -507,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, rsvd_idx_lebs = 0; lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - c->lst.taken_empty_lebs; - ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); if (rsvd_idx_lebs < lebs) /* * OK to allocate an empty LEB, but we still don't want to go diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 13f1019c859f..02aba36fe3d4 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c @@ -334,15 +334,15 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) err = move_nodes(c, sleb); if (err) - goto out; + goto out_inc_seq; err = gc_sync_wbufs(c); if (err) - goto out; + goto out_inc_seq; err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); if (err) - goto out; + goto out_inc_seq; /* Allow for races with TNC */ c->gced_lnum = lnum; @@ -369,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) out: ubifs_scan_destroy(sleb); return err; + +out_inc_seq: + /* We may have moved at least some nodes so allow for races with TNC */ + c->gced_lnum = lnum; + smp_wmb(); + c->gc_seq += 1; + smp_wmb(); + goto out; } /** diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 7562464ac83f..3f4902060c7a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1024,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c) goto out_dereg; } + sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); if (!mounted_read_only) { err = alloc_wbufs(c); if (err) goto out_cbuf; /* Create background thread */ - sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, - c->vi.vol_id); c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); if (!c->bgt) c->bgt = ERR_PTR(-EINVAL); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 7da209ab9378..7634c5970887 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -1476,7 +1476,7 @@ again: } err = fallible_read_node(c, key, &zbr, node); - if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) { + if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { /* * The node may have been GC'ed out from under us so try again * while keeping the TNC mutex locked. diff --git a/fs/udf/file.c b/fs/udf/file.c index 0ed6e146a0d9..eb91f3b70320 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = { .release = udf_release_file, .fsync = udf_fsync_file, .splice_read = generic_file_splice_read, + .llseek = generic_file_llseek, }; const struct inode_operations udf_file_inode_operations = { diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index eb9cfa23dc3d..a4f2b3ce45b0 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c @@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) *err = -ENOSPC; iinfo = UDF_I(inode); - iinfo->i_unique = 0; - iinfo->i_lenExtents = 0; - iinfo->i_next_alloc_block = 0; - iinfo->i_next_alloc_goal = 0; - iinfo->i_strat4096 = 0; + if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { + iinfo->i_efe = 1; + if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) + sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; + iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - + sizeof(struct extendedFileEntry), + GFP_KERNEL); + } else { + iinfo->i_efe = 0; + iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - + sizeof(struct fileEntry), + GFP_KERNEL); + } + if (!iinfo->i_ext.i_data) { + iput(inode); + *err = -ENOMEM; + return NULL; + } block = udf_new_block(dir->i_sb, NULL, dinfo->i_location.partitionReferenceNum, @@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) lvhd->uniqueID = cpu_to_le64(uniqueID); mark_buffer_dirty(sbi->s_lvid_bh); } + mutex_unlock(&sbi->s_alloc_mutex); inode->i_mode = mode; inode->i_uid = current->fsuid; if (dir->i_mode & S_ISGID) { @@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) iinfo->i_lenEAttr = 0; iinfo->i_lenAlloc = 0; iinfo->i_use = 0; - if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { - iinfo->i_efe = 1; - if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) - sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; - iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - - sizeof(struct extendedFileEntry), - GFP_KERNEL); - } else { - iinfo->i_efe = 0; - iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - - sizeof(struct fileEntry), - GFP_KERNEL); - } - if (!iinfo->i_ext.i_data) { - iput(inode); - *err = -ENOMEM; - mutex_unlock(&sbi->s_alloc_mutex); - return NULL; - } if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) @@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) iinfo->i_crtime = current_fs_time(inode->i_sb); insert_inode_hash(inode); mark_inode_dirty(inode); - mutex_unlock(&sbi->s_alloc_mutex); if (DQUOT_ALLOC_INODE(inode)) { DQUOT_DROP(inode); diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f42f80a3b1fa..a44d68eb50b5 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1338,6 +1338,10 @@ __xfs_get_blocks( offset = (xfs_off_t)iblock << inode->i_blkbits; ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); size = bh_result->b_size; + + if (!create && direct && offset >= i_size_read(inode)) + return 0; + error = xfs_iomap(XFS_I(inode), offset, size, create ? flags : BMAPI_READ, &iomap, &niomap); if (error) diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 73c65f19e549..18d3c8487835 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -1302,9 +1302,29 @@ xfs_fs_remount( mp->m_flags &= ~XFS_MOUNT_BARRIER; break; default: + /* + * Logically we would return an error here to prevent + * users from believing they might have changed + * mount options using remount which can't be changed. + * + * But unfortunately mount(8) adds all options from + * mtab and fstab to the mount arguments in some cases + * so we can't blindly reject options, but have to + * check for each specified option if it actually + * differs from the currently set option and only + * reject it if that's the case. + * + * Until that is implemented we return success for + * every remount request, and silently ignore all + * options that we can't actually change. + */ +#if 0 printk(KERN_INFO "XFS: mount option \"%s\" not supported for remount\n", p); return -EINVAL; +#else + return 0; +#endif } } diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 608c30c3f76b..002fc2617c8e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -732,6 +732,7 @@ xfs_buf_item_init( bip->bli_item.li_ops = &xfs_buf_item_ops; bip->bli_item.li_mountp = mp; bip->bli_buf = bp; + xfs_buf_hold(bp); bip->bli_format.blf_type = XFS_LI_BUF; bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); @@ -867,6 +868,21 @@ xfs_buf_item_dirty( return (bip->bli_flags & XFS_BLI_DIRTY); } +STATIC void +xfs_buf_item_free( + xfs_buf_log_item_t *bip) +{ +#ifdef XFS_TRANS_DEBUG + kmem_free(bip->bli_orig); + kmem_free(bip->bli_logged); +#endif /* XFS_TRANS_DEBUG */ + +#ifdef XFS_BLI_TRACE + ktrace_free(bip->bli_trace); +#endif + kmem_zone_free(xfs_buf_item_zone, bip); +} + /* * This is called when the buf log item is no longer needed. It should * free the buf log item associated with the given buffer and clear @@ -887,18 +903,8 @@ xfs_buf_item_relse( (XFS_BUF_IODONE_FUNC(bp) != NULL)) { XFS_BUF_CLR_IODONE_FUNC(bp); } - -#ifdef XFS_TRANS_DEBUG - kmem_free(bip->bli_orig); - bip->bli_orig = NULL; - kmem_free(bip->bli_logged); - bip->bli_logged = NULL; -#endif /* XFS_TRANS_DEBUG */ - -#ifdef XFS_BLI_TRACE - ktrace_free(bip->bli_trace); -#endif - kmem_zone_free(xfs_buf_item_zone, bip); + xfs_buf_rele(bp); + xfs_buf_item_free(bip); } @@ -1120,6 +1126,7 @@ xfs_buf_iodone( ASSERT(bip->bli_buf == bp); + xfs_buf_rele(bp); mp = bip->bli_item.li_mountp; /* @@ -1136,18 +1143,7 @@ xfs_buf_iodone( * xfs_trans_delete_ail() drops the AIL lock. */ xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); - -#ifdef XFS_TRANS_DEBUG - kmem_free(bip->bli_orig); - bip->bli_orig = NULL; - kmem_free(bip->bli_logged); - bip->bli_logged = NULL; -#endif /* XFS_TRANS_DEBUG */ - -#ifdef XFS_BLI_TRACE - ktrace_free(bip->bli_trace); -#endif - kmem_zone_free(xfs_buf_item_zone, bip); + xfs_buf_item_free(bip); } #if defined(XFS_BLI_TRACE) diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 760f4c5b5160..75b0cd4da0ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c @@ -149,7 +149,14 @@ xfs_swap_extents( sbp = &sxp->sx_stat; - xfs_lock_two_inodes(ip, tip, lock_flags); + /* + * we have to do two separate lock calls here to keep lockdep + * happy. If we try to get all the locks in one call, lock will + * report false positives when we drop the ILOCK and regain them + * below. + */ + xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); + xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); locked = 1; /* Verify that both files have the same format */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 00e80df9dd9d..dbd9cef852ec 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct( ASSERT(nextents <= XFS_LINEAR_EXTS); size = nextents * sizeof(xfs_bmbt_rec_t); - xfs_iext_irec_compact_full(ifp); + xfs_iext_irec_compact_pages(ifp); ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); ep = ifp->if_u1.if_ext_irec->er_extbuf; @@ -4449,8 +4449,7 @@ xfs_iext_irec_remove( * compaction policy is as follows: * * Full Compaction: Extents fit into a single page (or inline buffer) - * Full Compaction: Extents occupy less than 10% of allocated space - * Partial Compaction: Extents occupy > 10% and < 50% of allocated space + * Partial Compaction: Extents occupy less than 50% of allocated space * No Compaction: Extents occupy at least 50% of allocated space */ void @@ -4471,8 +4470,6 @@ xfs_iext_irec_compact( xfs_iext_direct_to_inline(ifp, nextents); } else if (nextents <= XFS_LINEAR_EXTS) { xfs_iext_indirect_to_direct(ifp); - } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { - xfs_iext_irec_compact_full(ifp); } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { xfs_iext_irec_compact_pages(ifp); } @@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages( erp_next = erp + 1; if (erp_next->er_extcount <= (XFS_LINEAR_EXTS - erp->er_extcount)) { - memmove(&erp->er_extbuf[erp->er_extcount], + memcpy(&erp->er_extbuf[erp->er_extcount], erp_next->er_extbuf, erp_next->er_extcount * sizeof(xfs_bmbt_rec_t)); erp->er_extcount += erp_next->er_extcount; @@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages( } /* - * Fully compact the extent records managed by the indirection array. - */ -void -xfs_iext_irec_compact_full( - xfs_ifork_t *ifp) /* inode fork pointer */ -{ - xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */ - xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ - int erp_idx = 0; /* extent irec index */ - int ext_avail; /* empty entries in ex list */ - int ext_diff; /* number of exts to add */ - int nlists; /* number of irec's (ex lists) */ - - ASSERT(ifp->if_flags & XFS_IFEXTIREC); - - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - erp = ifp->if_u1.if_ext_irec; - ep = &erp->er_extbuf[erp->er_extcount]; - erp_next = erp + 1; - ep_next = erp_next->er_extbuf; - - while (erp_idx < nlists - 1) { - /* - * Check how many extent records are available in this irec. - * If there is none skip the whole exercise. - */ - ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; - if (ext_avail) { - - /* - * Copy over as many as possible extent records into - * the previous page. - */ - ext_diff = MIN(ext_avail, erp_next->er_extcount); - memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); - erp->er_extcount += ext_diff; - erp_next->er_extcount -= ext_diff; - - /* - * If the next irec is empty now we can simply - * remove it. - */ - if (erp_next->er_extcount == 0) { - /* - * Free page before removing extent record - * so er_extoffs don't get modified in - * xfs_iext_irec_remove. - */ - kmem_free(erp_next->er_extbuf); - erp_next->er_extbuf = NULL; - xfs_iext_irec_remove(ifp, erp_idx + 1); - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; - - /* - * If the next irec is not empty move up the content - * that has not been copied to the previous page to - * the beggining of this one. - */ - } else { - memmove(erp_next->er_extbuf, &ep_next[ext_diff], - erp_next->er_extcount * - sizeof(xfs_bmbt_rec_t)); - ep_next = erp_next->er_extbuf; - memset(&ep_next[erp_next->er_extcount], 0, - (XFS_LINEAR_EXTS - - erp_next->er_extcount) * - sizeof(xfs_bmbt_rec_t)); - } - } - - if (erp->er_extcount == XFS_LINEAR_EXTS) { - erp_idx++; - if (erp_idx < nlists) - erp = &ifp->if_u1.if_ext_irec[erp_idx]; - else - break; - } - ep = &erp->er_extbuf[erp->er_extcount]; - erp_next = erp + 1; - ep_next = erp_next->er_extbuf; - } -} - -/* * This is called to update the er_extoff field in the indirection * array when extents have been added or removed from one of the * extent lists. erp_idx contains the irec index to begin updating diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ccba14eb9dbe..503ea89e8b9a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, STATIC int xlog_iclogs_empty(xlog_t *log); #if defined(XFS_LOG_TRACE) + +#define XLOG_TRACE_LOGGRANT_SIZE 2048 +#define XLOG_TRACE_ICLOG_SIZE 256 + +void +xlog_trace_loggrant_alloc(xlog_t *log) +{ + log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); +} + +void +xlog_trace_loggrant_dealloc(xlog_t *log) +{ + ktrace_free(log->l_grant_trace); +} + void xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) { unsigned long cnts; - if (!log->l_grant_trace) { - log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); - if (!log->l_grant_trace) - return; - } /* ticket counts are 1 byte each */ cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; @@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) } void +xlog_trace_iclog_alloc(xlog_in_core_t *iclog) +{ + iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); +} + +void +xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) +{ + ktrace_free(iclog->ic_trace); +} + +void xlog_trace_iclog(xlog_in_core_t *iclog, uint state) { - if (!iclog->ic_trace) - iclog->ic_trace = ktrace_alloc(256, KM_NOFS); ktrace_enter(iclog->ic_trace, (void *)((unsigned long)state), (void *)((unsigned long)current_pid()), @@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state) (void *)NULL, (void *)NULL); } #else + +#define xlog_trace_loggrant_alloc(log) +#define xlog_trace_loggrant_dealloc(log) #define xlog_trace_loggrant(log,tic,string) + +#define xlog_trace_iclog_alloc(iclog) +#define xlog_trace_iclog_dealloc(iclog) #define xlog_trace_iclog(iclog,state) + #endif /* XFS_LOG_TRACE */ @@ -1009,7 +1037,7 @@ xlog_iodone(xfs_buf_t *bp) * layer, it means the underlyin device no longer supports * barrier I/O. Warn loudly and turn off barriers. */ - if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { + if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) { l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; xfs_fs_cmn_err(CE_WARN, l->l_mp, "xlog_iodone: Barriers are no longer supported" @@ -1231,6 +1259,7 @@ xlog_alloc_log(xfs_mount_t *mp, spin_lock_init(&log->l_grant_lock); sv_init(&log->l_flush_wait, 0, "flush_wait"); + xlog_trace_loggrant_alloc(log); /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); @@ -1285,6 +1314,8 @@ xlog_alloc_log(xfs_mount_t *mp, sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); + xlog_trace_iclog_alloc(iclog); + iclogp = &iclog->ic_next; } *iclogp = log->l_iclog; /* complete ring */ @@ -1565,11 +1596,7 @@ xlog_dealloc_log(xlog_t *log) sv_destroy(&iclog->ic_force_wait); sv_destroy(&iclog->ic_write_wait); xfs_buf_free(iclog->ic_bp); -#ifdef XFS_LOG_TRACE - if (iclog->ic_trace != NULL) { - ktrace_free(iclog->ic_trace); - } -#endif + xlog_trace_iclog_dealloc(iclog); next_iclog = iclog->ic_next; kmem_free(iclog); iclog = next_iclog; @@ -1578,14 +1605,7 @@ xlog_dealloc_log(xlog_t *log) spinlock_destroy(&log->l_grant_lock); xfs_buf_free(log->l_xbuf); -#ifdef XFS_LOG_TRACE - if (log->l_trace != NULL) { - ktrace_free(log->l_trace); - } - if (log->l_grant_trace != NULL) { - ktrace_free(log->l_grant_trace); - } -#endif + xlog_trace_loggrant_dealloc(log); log->l_mp->m_log = NULL; kmem_free(log); } /* xlog_dealloc_log */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c8a5b22ee3e3..e7d8f84443fa 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -448,7 +448,6 @@ typedef struct log { int l_grant_write_bytes; #ifdef XFS_LOG_TRACE - struct ktrace *l_trace; struct ktrace *l_grant_trace; #endif diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index aa238c8fbd7a..8b6812f66a15 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c @@ -1838,6 +1838,12 @@ again: #endif } +/* + * xfs_lock_two_inodes() can only be used to lock one type of lock + * at a time - the iolock or the ilock, but not both at once. If + * we lock both at once, lockdep will report false positives saying + * we have violated locking orders. + */ void xfs_lock_two_inodes( xfs_inode_t *ip0, @@ -1848,6 +1854,8 @@ xfs_lock_two_inodes( int attempts = 0; xfs_log_item_t *lp; + if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) + ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); ASSERT(ip0->i_ino != ip1->i_ino); if (ip0->i_ino > ip1->i_ino) { @@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */ /* * Zero file bytes between startoff and endoff inclusive. * The iolock is held exclusive and no blocks are buffered. + * + * This function is used by xfs_free_file_space() to zero + * partial blocks when the range to free is not block aligned. + * When unreserving space with boundaries that are not block + * aligned we round up the start and round down the end + * boundaries and then use this function to zero the parts of + * the blocks that got dropped during the rounding. */ STATIC int xfs_zero_remaining_bytes( @@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes( int nimap; int error = 0; + /* + * Avoid doing I/O beyond eof - it's not necessary + * since nothing can read beyond eof. The space will + * be zeroed when the file is extended anyway. + */ + if (startoff >= ip->i_size) + return 0; + + if (endoff > ip->i_size) + endoff = ip->i_size; + bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp); |