diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-09 13:29:42 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-10-09 13:29:42 -0700 |
commit | a372bf8b6a12f23f68e716113ccaea4bf646dd0f (patch) | |
tree | 3852e11c78e5dea779f49e9a94288c8ac827e821 /fs/xfs/linux-2.6 | |
parent | 0d5d1aadc8e299874a6a014d65b6bb903b12424d (diff) | |
parent | e09d39968bd8befa087f10f970fa236e8694b643 (diff) |
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs:
xfs: stop calling filemap_fdatawait inside ->fsync
fix readahead calculations in xfs_dir2_leaf_getdents()
xfs: make sure xfs_sync_fsdata covers the log
xfs: mark inodes dirty before issuing I/O
xfs: cleanup ->sync_fs
xfs: fix xfs_quiesce_data
xfs: implement ->dirty_inode to fix timestamp handling
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 38 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_file.c | 9 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_iops.c | 41 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_lrw.c | 2 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_super.c | 59 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_sync.c | 36 |
6 files changed, 112 insertions, 73 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 381854461b28..c2e30eea74dc 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -186,19 +186,37 @@ xfs_destroy_ioend( } /* + * If the end of the current ioend is beyond the current EOF, + * return the new EOF value, otherwise zero. + */ +STATIC xfs_fsize_t +xfs_ioend_new_eof( + xfs_ioend_t *ioend) +{ + xfs_inode_t *ip = XFS_I(ioend->io_inode); + xfs_fsize_t isize; + xfs_fsize_t bsize; + + bsize = ioend->io_offset + ioend->io_size; + isize = MAX(ip->i_size, ip->i_new_size); + isize = MIN(isize, bsize); + return isize > ip->i_d.di_size ? isize : 0; +} + +/* * Update on-disk file size now that data has been written to disk. * The current in-memory file size is i_size. If a write is beyond * eof i_new_size will be the intended file size until i_size is * updated. If this write does not extend all the way to the valid * file size then restrict this update to the end of the write. */ + STATIC void xfs_setfilesize( xfs_ioend_t *ioend) { xfs_inode_t *ip = XFS_I(ioend->io_inode); xfs_fsize_t isize; - xfs_fsize_t bsize; ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG); ASSERT(ioend->io_type != IOMAP_READ); @@ -206,16 +224,10 @@ xfs_setfilesize( if (unlikely(ioend->io_error)) return; - bsize = ioend->io_offset + ioend->io_size; - xfs_ilock(ip, XFS_ILOCK_EXCL); - - isize = MAX(ip->i_size, ip->i_new_size); - isize = MIN(isize, bsize); - - if (ip->i_d.di_size < isize) { + isize = xfs_ioend_new_eof(ioend); + if (isize) { ip->i_d.di_size = isize; - ip->i_update_core = 1; xfs_mark_inode_dirty_sync(ip); } @@ -404,10 +416,16 @@ xfs_submit_ioend_bio( struct bio *bio) { atomic_inc(&ioend->io_remaining); - bio->bi_private = ioend; bio->bi_end_io = xfs_end_bio; + /* + * If the I/O is beyond EOF we mark the inode dirty immediately + * but don't update the inode size until I/O completion. + */ + if (xfs_ioend_new_eof(ioend)) + xfs_mark_inode_dirty_sync(XFS_I(ioend->io_inode)); + submit_bio(WRITE, bio); ASSERT(!bio_flagged(bio, BIO_EOPNOTSUPP)); bio_put(bio); diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 629370974e57..eff61e2732af 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -176,14 +176,7 @@ xfs_file_fsync( struct dentry *dentry, int datasync) { - struct inode *inode = dentry->d_inode; - struct xfs_inode *ip = XFS_I(inode); - int error; - - /* capture size updates in I/O completion before writing the inode. */ - error = filemap_fdatawait(inode->i_mapping); - if (error) - return error; + struct xfs_inode *ip = XFS_I(dentry->d_inode); xfs_iflags_clear(ip, XFS_ITRUNCATED); return -xfs_fsync(ip); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index da0159d99f82..cd42ef78f6b5 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -57,19 +57,22 @@ #include <linux/fiemap.h> /* - * Bring the atime in the XFS inode uptodate. - * Used before logging the inode to disk or when the Linux inode goes away. + * Bring the timestamps in the XFS inode uptodate. + * + * Used before writing the inode to disk. */ void -xfs_synchronize_atime( +xfs_synchronize_times( xfs_inode_t *ip) { struct inode *inode = VFS_I(ip); - if (!(inode->i_state & I_CLEAR)) { - ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; - ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; - } + ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec; + ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec; + ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec; + ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec; + ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec; + ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec; } /* @@ -106,32 +109,20 @@ xfs_ichgtime( if ((flags & XFS_ICHGTIME_MOD) && !timespec_equal(&inode->i_mtime, &tv)) { inode->i_mtime = tv; - ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec; - ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec; sync_it = 1; } if ((flags & XFS_ICHGTIME_CHG) && !timespec_equal(&inode->i_ctime, &tv)) { inode->i_ctime = tv; - ip->i_d.di_ctime.t_sec = (__int32_t)tv.tv_sec; - ip->i_d.di_ctime.t_nsec = (__int32_t)tv.tv_nsec; sync_it = 1; } /* - * We update the i_update_core field _after_ changing - * the timestamps in order to coordinate properly with - * xfs_iflush() so that we don't lose timestamp updates. - * This keeps us from having to hold the inode lock - * while doing this. We use the SYNCHRONIZE macro to - * ensure that the compiler does not reorder the update - * of i_update_core above the timestamp updates above. + * Update complete - now make sure everyone knows that the inode + * is dirty. */ - if (sync_it) { - SYNCHRONIZE(); - ip->i_update_core = 1; + if (sync_it) xfs_mark_inode_dirty_sync(ip); - } } /* @@ -506,10 +497,8 @@ xfs_vn_getattr( stat->gid = ip->i_d.di_gid; stat->ino = ip->i_ino; stat->atime = inode->i_atime; - stat->mtime.tv_sec = ip->i_d.di_mtime.t_sec; - stat->mtime.tv_nsec = ip->i_d.di_mtime.t_nsec; - stat->ctime.tv_sec = ip->i_d.di_ctime.t_sec; - stat->ctime.tv_nsec = ip->i_d.di_ctime.t_nsec; + stat->mtime = inode->i_mtime; + stat->ctime = inode->i_ctime; stat->blocks = XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks); diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 49e4a6aea73c..072050f8d346 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -667,7 +667,7 @@ start: xip->i_new_size = new_size; if (likely(!(ioflags & IO_INVIS))) - xfs_ichgtime(xip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + file_update_time(file); /* * If the offset is beyond the size of the file, we have a couple diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index bdd41c8c342f..18a4b8e11df2 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -977,6 +977,28 @@ xfs_fs_inode_init_once( } /* + * Dirty the XFS inode when mark_inode_dirty_sync() is called so that + * we catch unlogged VFS level updates to the inode. Care must be taken + * here - the transaction code calls mark_inode_dirty_sync() to mark the + * VFS inode dirty in a transaction and clears the i_update_core field; + * it must clear the field after calling mark_inode_dirty_sync() to + * correctly indicate that the dirty state has been propagated into the + * inode log item. + * + * We need the barrier() to maintain correct ordering between unlogged + * updates and the transaction commit code that clears the i_update_core + * field. This requires all updates to be completed before marking the + * inode dirty. + */ +STATIC void +xfs_fs_dirty_inode( + struct inode *inode) +{ + barrier(); + XFS_I(inode)->i_update_core = 1; +} + +/* * Attempt to flush the inode, this will actually fail * if the inode is pinned, but we dirty the inode again * at the point when it is unpinned after a log write, @@ -1126,7 +1148,7 @@ xfs_fs_put_super( } STATIC int -xfs_fs_sync_super( +xfs_fs_sync_fs( struct super_block *sb, int wait) { @@ -1134,23 +1156,23 @@ xfs_fs_sync_super( int error; /* - * Treat a sync operation like a freeze. This is to work - * around a race in sync_inodes() which works in two phases - * - an asynchronous flush, which can write out an inode - * without waiting for file size updates to complete, and a - * synchronous flush, which wont do anything because the - * async flush removed the inode's dirty flag. Also - * sync_inodes() will not see any files that just have - * outstanding transactions to be flushed because we don't - * dirty the Linux inode until after the transaction I/O - * completes. + * Not much we can do for the first async pass. Writing out the + * superblock would be counter-productive as we are going to redirty + * when writing out other data and metadata (and writing out a single + * block is quite fast anyway). + * + * Try to asynchronously kick off quota syncing at least. */ - if (wait || unlikely(sb->s_frozen == SB_FREEZE_WRITE)) - error = xfs_quiesce_data(mp); - else - error = xfs_sync_fsdata(mp, 0); + if (!wait) { + xfs_qm_sync(mp, SYNC_TRYLOCK); + return 0; + } + + error = xfs_quiesce_data(mp); + if (error) + return -error; - if (unlikely(laptop_mode)) { + if (laptop_mode) { int prev_sync_seq = mp->m_sync_seq; /* @@ -1169,7 +1191,7 @@ xfs_fs_sync_super( mp->m_sync_seq != prev_sync_seq); } - return -error; + return 0; } STATIC int @@ -1539,10 +1561,11 @@ xfs_fs_get_sb( static const struct super_operations xfs_super_operations = { .alloc_inode = xfs_fs_alloc_inode, .destroy_inode = xfs_fs_destroy_inode, + .dirty_inode = xfs_fs_dirty_inode, .write_inode = xfs_fs_write_inode, .clear_inode = xfs_fs_clear_inode, .put_super = xfs_fs_put_super, - .sync_fs = xfs_fs_sync_super, + .sync_fs = xfs_fs_sync_fs, .freeze_fs = xfs_fs_freeze, .statfs = xfs_fs_statfs, .remount_fs = xfs_fs_remount, diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 320be6aea492..961df0a22c78 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c @@ -309,11 +309,15 @@ xfs_sync_attr( STATIC int xfs_commit_dummy_trans( struct xfs_mount *mp, - uint log_flags) + uint flags) { struct xfs_inode *ip = mp->m_rootip; struct xfs_trans *tp; int error; + int log_flags = XFS_LOG_FORCE; + + if (flags & SYNC_WAIT) + log_flags |= XFS_LOG_SYNC; /* * Put a dummy transaction in the log to tell recovery @@ -331,13 +335,12 @@ xfs_commit_dummy_trans( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_ihold(tp, ip); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - /* XXX(hch): ignoring the error here.. */ error = xfs_trans_commit(tp, 0); - xfs_iunlock(ip, XFS_ILOCK_EXCL); + /* the log force ensures this transaction is pushed to disk */ xfs_log_force(mp, 0, log_flags); - return 0; + return error; } int @@ -385,7 +388,20 @@ xfs_sync_fsdata( else XFS_BUF_ASYNC(bp); - return xfs_bwrite(mp, bp); + error = xfs_bwrite(mp, bp); + if (error) + return error; + + /* + * If this is a data integrity sync make sure all pending buffers + * are flushed out for the log coverage check below. + */ + if (flags & SYNC_WAIT) + xfs_flush_buftarg(mp->m_ddev_targp, 1); + + if (xfs_log_need_covered(mp)) + error = xfs_commit_dummy_trans(mp, flags); + return error; out_brelse: xfs_buf_relse(bp); @@ -419,14 +435,16 @@ xfs_quiesce_data( /* push non-blocking */ xfs_sync_data(mp, 0); xfs_qm_sync(mp, SYNC_TRYLOCK); - xfs_filestream_flush(mp); - /* push and block */ + /* push and block till complete */ xfs_sync_data(mp, SYNC_WAIT); xfs_qm_sync(mp, SYNC_WAIT); + /* drop inode references pinned by filestreams */ + xfs_filestream_flush(mp); + /* write superblock and hoover up shutdown errors */ - error = xfs_sync_fsdata(mp, 0); + error = xfs_sync_fsdata(mp, SYNC_WAIT); /* flush data-only devices */ if (mp->m_rtdev_targp) @@ -570,8 +588,6 @@ xfs_sync_worker( /* dgc: errors ignored here */ error = xfs_qm_sync(mp, SYNC_TRYLOCK); error = xfs_sync_fsdata(mp, SYNC_TRYLOCK); - if (xfs_log_need_covered(mp)) - error = xfs_commit_dummy_trans(mp, XFS_LOG_FORCE); } mp->m_sync_seq++; wake_up(&mp->m_wait_single_sync_task); |