summaryrefslogtreecommitdiff
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 07:22:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 07:22:11 -0700
commit277a4ed1df42134f66503db837e40f0b583ec63d (patch)
tree9f3107c51fa9d45ff35f8bc5f68306440b0b2948 /fs/xfs/linux-2.6
parent03e62303cf56e87337115f14842321043df2b4bb (diff)
parente9cee8e6549b669080e9a7d02b8387086a5c911d (diff)
Merge branch 'for-linus' of git://oss.sgi.com/xfs/xfs
* 'for-linus' of git://oss.sgi.com/xfs/xfs: (54 commits) xfs: mark xfs_iomap_write_ helpers static xfs: clean up end index calculation in xfs_page_state_convert xfs: clean up mapping size calculation in __xfs_get_blocks xfs: clean up xfs_iomap_valid xfs: move I/O type flags into xfs_aops.c xfs: kill struct xfs_iomap xfs: report iomap_bn in block base xfs: report iomap_offset and iomap_bsize in block base xfs: remove iomap_delta xfs: remove iomap_target xfs: limit xfs_imap_to_bmap to a single mapping xfs: simplify buffer to transaction matching xfs: Make fiemap work in query mode. xfs: kill off l_sectbb_mask xfs: record log sector size rather than log2(that) xfs: remove dead XFS_LOUD_RECOVERY code xfs: removed unused XFS_QMOPT_ flags xfs: remove a few macro indirections in the quota code xfs: access quotainfo structure directly xfs: wait for direct I/O to complete in fsync and write_inode ...
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c231
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c91
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h150
11 files changed, 345 insertions, 185 deletions
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 0f8b9968a803..089eaca860b4 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -45,6 +45,15 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
+/*
+ * Types of I/O for bmap clustering and I/O completion tracking.
+ */
+enum {
+ IO_READ, /* mapping for a read */
+ IO_DELAY, /* mapping covers delalloc region */
+ IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
+ IO_NEW /* just allocated */
+};
/*
* Prime number of hash buckets since address is used as the key.
@@ -103,8 +112,9 @@ xfs_count_page_state(
STATIC struct block_device *
xfs_find_bdev_for_inode(
- struct xfs_inode *ip)
+ struct inode *inode)
{
+ struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
if (XFS_IS_REALTIME_INODE(ip))
@@ -183,7 +193,7 @@ xfs_setfilesize(
xfs_fsize_t isize;
ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
- ASSERT(ioend->io_type != IOMAP_READ);
+ ASSERT(ioend->io_type != IO_READ);
if (unlikely(ioend->io_error))
return 0;
@@ -214,7 +224,7 @@ xfs_finish_ioend(
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct workqueue_struct *wq;
- wq = (ioend->io_type == IOMAP_UNWRITTEN) ?
+ wq = (ioend->io_type == IO_UNWRITTEN) ?
xfsconvertd_workqueue : xfsdatad_workqueue;
queue_work(wq, &ioend->io_work);
if (wait)
@@ -237,7 +247,7 @@ xfs_end_io(
* For unwritten extents we need to issue transactions to convert a
* range to normal written extens after the data I/O has finished.
*/
- if (ioend->io_type == IOMAP_UNWRITTEN &&
+ if (ioend->io_type == IO_UNWRITTEN &&
likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
@@ -250,7 +260,7 @@ xfs_end_io(
* We might have to update the on-disk file size after extending
* writes.
*/
- if (ioend->io_type != IOMAP_READ) {
+ if (ioend->io_type != IO_READ) {
error = xfs_setfilesize(ioend);
ASSERT(!error || error == EAGAIN);
}
@@ -309,21 +319,25 @@ xfs_map_blocks(
struct inode *inode,
loff_t offset,
ssize_t count,
- xfs_iomap_t *mapp,
+ struct xfs_bmbt_irec *imap,
int flags)
{
int nmaps = 1;
+ int new = 0;
- return -xfs_iomap(XFS_I(inode), offset, count, flags, mapp, &nmaps);
+ return -xfs_iomap(XFS_I(inode), offset, count, flags, imap, &nmaps, &new);
}
STATIC int
-xfs_iomap_valid(
- xfs_iomap_t *iomapp,
- loff_t offset)
+xfs_imap_valid(
+ struct inode *inode,
+ struct xfs_bmbt_irec *imap,
+ xfs_off_t offset)
{
- return offset >= iomapp->iomap_offset &&
- offset < iomapp->iomap_offset + iomapp->iomap_bsize;
+ offset >>= inode->i_blkbits;
+
+ return offset >= imap->br_startoff &&
+ offset < imap->br_startoff + imap->br_blockcount;
}
/*
@@ -554,19 +568,23 @@ xfs_add_to_ioend(
STATIC void
xfs_map_buffer(
+ struct inode *inode,
struct buffer_head *bh,
- xfs_iomap_t *mp,
- xfs_off_t offset,
- uint block_bits)
+ struct xfs_bmbt_irec *imap,
+ xfs_off_t offset)
{
sector_t bn;
+ struct xfs_mount *m = XFS_I(inode)->i_mount;
+ xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
+ xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
- ASSERT(mp->iomap_bn != IOMAP_DADDR_NULL);
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
- bn = (mp->iomap_bn >> (block_bits - BBSHIFT)) +
- ((offset - mp->iomap_offset) >> block_bits);
+ bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
+ ((offset - iomap_offset) >> inode->i_blkbits);
- ASSERT(bn || (mp->iomap_flags & IOMAP_REALTIME));
+ ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
bh->b_blocknr = bn;
set_buffer_mapped(bh);
@@ -574,17 +592,17 @@ xfs_map_buffer(
STATIC void
xfs_map_at_offset(
+ struct inode *inode,
struct buffer_head *bh,
- loff_t offset,
- int block_bits,
- xfs_iomap_t *iomapp)
+ struct xfs_bmbt_irec *imap,
+ xfs_off_t offset)
{
- ASSERT(!(iomapp->iomap_flags & IOMAP_HOLE));
- ASSERT(!(iomapp->iomap_flags & IOMAP_DELAY));
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
lock_buffer(bh);
- xfs_map_buffer(bh, iomapp, offset, block_bits);
- bh->b_bdev = iomapp->iomap_target->bt_bdev;
+ xfs_map_buffer(inode, bh, imap, offset);
+ bh->b_bdev = xfs_find_bdev_for_inode(inode);
set_buffer_mapped(bh);
clear_buffer_delay(bh);
clear_buffer_unwritten(bh);
@@ -713,11 +731,11 @@ xfs_is_delayed_page(
bh = head = page_buffers(page);
do {
if (buffer_unwritten(bh))
- acceptable = (type == IOMAP_UNWRITTEN);
+ acceptable = (type == IO_UNWRITTEN);
else if (buffer_delay(bh))
- acceptable = (type == IOMAP_DELAY);
+ acceptable = (type == IO_DELAY);
else if (buffer_dirty(bh) && buffer_mapped(bh))
- acceptable = (type == IOMAP_NEW);
+ acceptable = (type == IO_NEW);
else
break;
} while ((bh = bh->b_this_page) != head);
@@ -740,7 +758,7 @@ xfs_convert_page(
struct inode *inode,
struct page *page,
loff_t tindex,
- xfs_iomap_t *mp,
+ struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
int startio,
@@ -750,7 +768,6 @@ xfs_convert_page(
xfs_off_t end_offset;
unsigned long p_offset;
unsigned int type;
- int bbits = inode->i_blkbits;
int len, page_dirty;
int count = 0, done = 0, uptodate = 1;
xfs_off_t offset = page_offset(page);
@@ -802,19 +819,19 @@ xfs_convert_page(
if (buffer_unwritten(bh) || buffer_delay(bh)) {
if (buffer_unwritten(bh))
- type = IOMAP_UNWRITTEN;
+ type = IO_UNWRITTEN;
else
- type = IOMAP_DELAY;
+ type = IO_DELAY;
- if (!xfs_iomap_valid(mp, offset)) {
+ if (!xfs_imap_valid(inode, imap, offset)) {
done = 1;
continue;
}
- ASSERT(!(mp->iomap_flags & IOMAP_HOLE));
- ASSERT(!(mp->iomap_flags & IOMAP_DELAY));
+ ASSERT(imap->br_startblock != HOLESTARTBLOCK);
+ ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
- xfs_map_at_offset(bh, offset, bbits, mp);
+ xfs_map_at_offset(inode, bh, imap, offset);
if (startio) {
xfs_add_to_ioend(inode, bh, offset,
type, ioendp, done);
@@ -826,7 +843,7 @@ xfs_convert_page(
page_dirty--;
count++;
} else {
- type = IOMAP_NEW;
+ type = IO_NEW;
if (buffer_mapped(bh) && all_bh && startio) {
lock_buffer(bh);
xfs_add_to_ioend(inode, bh, offset,
@@ -866,7 +883,7 @@ STATIC void
xfs_cluster_write(
struct inode *inode,
pgoff_t tindex,
- xfs_iomap_t *iomapp,
+ struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
int startio,
@@ -885,7 +902,7 @@ xfs_cluster_write(
for (i = 0; i < pagevec_count(&pvec); i++) {
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- iomapp, ioendp, wbc, startio, all_bh);
+ imap, ioendp, wbc, startio, all_bh);
if (done)
break;
}
@@ -930,7 +947,7 @@ xfs_aops_discard_page(
loff_t offset = page_offset(page);
ssize_t len = 1 << inode->i_blkbits;
- if (!xfs_is_delayed_page(page, IOMAP_DELAY))
+ if (!xfs_is_delayed_page(page, IO_DELAY))
goto out_invalidate;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -1042,15 +1059,15 @@ xfs_page_state_convert(
int unmapped) /* also implies page uptodate */
{
struct buffer_head *bh, *head;
- xfs_iomap_t iomap;
+ struct xfs_bmbt_irec imap;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
unsigned long p_offset = 0;
unsigned int type;
__uint64_t end_offset;
- pgoff_t end_index, last_index, tlast;
+ pgoff_t end_index, last_index;
ssize_t size, len;
- int flags, err, iomap_valid = 0, uptodate = 1;
+ int flags, err, imap_valid = 0, uptodate = 1;
int page_dirty, count = 0;
int trylock = 0;
int all_bh = unmapped;
@@ -1097,7 +1114,7 @@ xfs_page_state_convert(
bh = head = page_buffers(page);
offset = page_offset(page);
flags = BMAPI_READ;
- type = IOMAP_NEW;
+ type = IO_NEW;
/* TODO: cleanup count and page_dirty */
@@ -1111,12 +1128,12 @@ xfs_page_state_convert(
* the iomap is actually still valid, but the ioend
* isn't. shouldn't happen too often.
*/
- iomap_valid = 0;
+ imap_valid = 0;
continue;
}
- if (iomap_valid)
- iomap_valid = xfs_iomap_valid(&iomap, offset);
+ if (imap_valid)
+ imap_valid = xfs_imap_valid(inode, &imap, offset);
/*
* First case, map an unwritten extent and prepare for
@@ -1137,20 +1154,20 @@ xfs_page_state_convert(
* Make sure we don't use a read-only iomap
*/
if (flags == BMAPI_READ)
- iomap_valid = 0;
+ imap_valid = 0;
if (buffer_unwritten(bh)) {
- type = IOMAP_UNWRITTEN;
+ type = IO_UNWRITTEN;
flags = BMAPI_WRITE | BMAPI_IGNSTATE;
} else if (buffer_delay(bh)) {
- type = IOMAP_DELAY;
+ type = IO_DELAY;
flags = BMAPI_ALLOCATE | trylock;
} else {
- type = IOMAP_NEW;
+ type = IO_NEW;
flags = BMAPI_WRITE | BMAPI_MMAP;
}
- if (!iomap_valid) {
+ if (!imap_valid) {
/*
* if we didn't have a valid mapping then we
* need to ensure that we put the new mapping
@@ -1160,7 +1177,7 @@ xfs_page_state_convert(
* for unwritten extent conversion.
*/
new_ioend = 1;
- if (type == IOMAP_NEW) {
+ if (type == IO_NEW) {
size = xfs_probe_cluster(inode,
page, bh, head, 0);
} else {
@@ -1168,14 +1185,14 @@ xfs_page_state_convert(
}
err = xfs_map_blocks(inode, offset, size,
- &iomap, flags);
+ &imap, flags);
if (err)
goto error;
- iomap_valid = xfs_iomap_valid(&iomap, offset);
+ imap_valid = xfs_imap_valid(inode, &imap,
+ offset);
}
- if (iomap_valid) {
- xfs_map_at_offset(bh, offset,
- inode->i_blkbits, &iomap);
+ if (imap_valid) {
+ xfs_map_at_offset(inode, bh, &imap, offset);
if (startio) {
xfs_add_to_ioend(inode, bh, offset,
type, &ioend,
@@ -1194,40 +1211,41 @@ xfs_page_state_convert(
* That means it must already have extents allocated
* underneath it. Map the extent by reading it.
*/
- if (!iomap_valid || flags != BMAPI_READ) {
+ if (!imap_valid || flags != BMAPI_READ) {
flags = BMAPI_READ;
size = xfs_probe_cluster(inode, page, bh,
head, 1);
err = xfs_map_blocks(inode, offset, size,
- &iomap, flags);
+ &imap, flags);
if (err)
goto error;
- iomap_valid = xfs_iomap_valid(&iomap, offset);
+ imap_valid = xfs_imap_valid(inode, &imap,
+ offset);
}
/*
- * We set the type to IOMAP_NEW in case we are doing a
+ * We set the type to IO_NEW in case we are doing a
* small write at EOF that is extending the file but
* without needing an allocation. We need to update the
* file size on I/O completion in this case so it is
* the same case as having just allocated a new extent
* that we are writing into for the first time.
*/
- type = IOMAP_NEW;
+ type = IO_NEW;
if (trylock_buffer(bh)) {
ASSERT(buffer_mapped(bh));
- if (iomap_valid)
+ if (imap_valid)
all_bh = 1;
xfs_add_to_ioend(inode, bh, offset, type,
- &ioend, !iomap_valid);
+ &ioend, !imap_valid);
page_dirty--;
count++;
} else {
- iomap_valid = 0;
+ imap_valid = 0;
}
} else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
(unmapped || startio)) {
- iomap_valid = 0;
+ imap_valid = 0;
}
if (!iohead)
@@ -1241,12 +1259,23 @@ xfs_page_state_convert(
if (startio)
xfs_start_page_writeback(page, 1, count);
- if (ioend && iomap_valid) {
- offset = (iomap.iomap_offset + iomap.iomap_bsize - 1) >>
- PAGE_CACHE_SHIFT;
- tlast = min_t(pgoff_t, offset, last_index);
- xfs_cluster_write(inode, page->index + 1, &iomap, &ioend,
- wbc, startio, all_bh, tlast);
+ if (ioend && imap_valid) {
+ xfs_off_t end_index;
+
+ end_index = imap.br_startoff + imap.br_blockcount;
+
+ /* to bytes */
+ end_index <<= inode->i_blkbits;
+
+ /* to pages */
+ end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
+
+ /* check against file size */
+ if (end_index > last_index)
+ end_index = last_index;
+
+ xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
+ wbc, startio, all_bh, end_index);
}
if (iohead)
@@ -1448,10 +1477,11 @@ __xfs_get_blocks(
int direct,
bmapi_flags_t flags)
{
- xfs_iomap_t iomap;
+ struct xfs_bmbt_irec imap;
xfs_off_t offset;
ssize_t size;
- int niomap = 1;
+ int nimap = 1;
+ int new = 0;
int error;
offset = (xfs_off_t)iblock << inode->i_blkbits;
@@ -1462,22 +1492,21 @@ __xfs_get_blocks(
return 0;
error = xfs_iomap(XFS_I(inode), offset, size,
- create ? flags : BMAPI_READ, &iomap, &niomap);
+ create ? flags : BMAPI_READ, &imap, &nimap, &new);
if (error)
return -error;
- if (niomap == 0)
+ if (nimap == 0)
return 0;
- if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
+ if (imap.br_startblock != HOLESTARTBLOCK &&
+ imap.br_startblock != DELAYSTARTBLOCK) {
/*
* For unwritten extents do not report a disk address on
* the read case (treat as if we're reading into a hole).
*/
- if (create || !(iomap.iomap_flags & IOMAP_UNWRITTEN)) {
- xfs_map_buffer(bh_result, &iomap, offset,
- inode->i_blkbits);
- }
- if (create && (iomap.iomap_flags & IOMAP_UNWRITTEN)) {
+ if (create || !ISUNWRITTEN(&imap))
+ xfs_map_buffer(inode, bh_result, &imap, offset);
+ if (create && ISUNWRITTEN(&imap)) {
if (direct)
bh_result->b_private = inode;
set_buffer_unwritten(bh_result);
@@ -1488,7 +1517,7 @@ __xfs_get_blocks(
* If this is a realtime file, data may be on a different device.
* to that pointed to from the buffer_head b_bdev currently.
*/
- bh_result->b_bdev = iomap.iomap_target->bt_bdev;
+ bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
/*
* If we previously allocated a block out beyond eof and we are now
@@ -1502,10 +1531,10 @@ __xfs_get_blocks(
if (create &&
((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
(offset >= i_size_read(inode)) ||
- (iomap.iomap_flags & (IOMAP_NEW|IOMAP_UNWRITTEN))))
+ (new || ISUNWRITTEN(&imap))))
set_buffer_new(bh_result);
- if (iomap.iomap_flags & IOMAP_DELAY) {
+ if (imap.br_startblock == DELAYSTARTBLOCK) {
BUG_ON(direct);
if (create) {
set_buffer_uptodate(bh_result);
@@ -1514,11 +1543,23 @@ __xfs_get_blocks(
}
}
+ /*
+ * If this is O_DIRECT or the mpage code calling tell them how large
+ * the mapping is, so that we can avoid repeated get_blocks calls.
+ */
if (direct || size > (1 << inode->i_blkbits)) {
- ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
- offset = min_t(xfs_off_t,
- iomap.iomap_bsize - iomap.iomap_delta, size);
- bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
+ xfs_off_t mapping_size;
+
+ mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
+ mapping_size <<= inode->i_blkbits;
+
+ ASSERT(mapping_size > 0);
+ if (mapping_size > size)
+ mapping_size = size;
+ if (mapping_size > LONG_MAX)
+ mapping_size = LONG_MAX;
+
+ bh_result->b_size = mapping_size;
}
return 0;
@@ -1576,7 +1617,7 @@ xfs_end_io_direct(
*/
ioend->io_offset = offset;
ioend->io_size = size;
- if (ioend->io_type == IOMAP_READ) {
+ if (ioend->io_type == IO_READ) {
xfs_finish_ioend(ioend, 0);
} else if (private && size > 0) {
xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
@@ -1587,7 +1628,7 @@ xfs_end_io_direct(
* didn't map an unwritten extent so switch it's completion
* handler.
*/
- ioend->io_type = IOMAP_NEW;
+ ioend->io_type = IO_NEW;
xfs_finish_ioend(ioend, 0);
}
@@ -1612,10 +1653,10 @@ xfs_vm_direct_IO(
struct block_device *bdev;
ssize_t ret;
- bdev = xfs_find_bdev_for_inode(XFS_I(inode));
+ bdev = xfs_find_bdev_for_inode(inode);
iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
- IOMAP_UNWRITTEN : IOMAP_READ);
+ IO_UNWRITTEN : IO_READ);
ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
offset, nr_segs,
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 44c2b0ef9a41..f01de3c55c43 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1007,25 +1007,20 @@ xfs_bwrite(
struct xfs_mount *mp,
struct xfs_buf *bp)
{
- int iowait = (bp->b_flags & XBF_ASYNC) == 0;
- int error = 0;
+ int error;
bp->b_strat = xfs_bdstrat_cb;
bp->b_mount = mp;
bp->b_flags |= XBF_WRITE;
- if (!iowait)
- bp->b_flags |= _XBF_RUN_QUEUES;
+ bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
xfs_buf_delwri_dequeue(bp);
xfs_buf_iostrategy(bp);
- if (iowait) {
- error = xfs_buf_iowait(bp);
- if (error)
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
- xfs_buf_relse(bp);
- }
-
+ error = xfs_buf_iowait(bp);
+ if (error)
+ xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
+ xfs_buf_relse(bp);
return error;
}
@@ -1614,7 +1609,8 @@ xfs_mapping_buftarg(
STATIC int
xfs_alloc_delwrite_queue(
- xfs_buftarg_t *btp)
+ xfs_buftarg_t *btp,
+ const char *fsname)
{
int error = 0;
@@ -1622,7 +1618,7 @@ xfs_alloc_delwrite_queue(
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
spin_lock_init(&btp->bt_delwrite_lock);
btp->bt_flags = 0;
- btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd");
+ btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
if (IS_ERR(btp->bt_task)) {
error = PTR_ERR(btp->bt_task);
goto out_error;
@@ -1635,7 +1631,8 @@ out_error:
xfs_buftarg_t *
xfs_alloc_buftarg(
struct block_device *bdev,
- int external)
+ int external,
+ const char *fsname)
{
xfs_buftarg_t *btp;
@@ -1647,7 +1644,7 @@ xfs_alloc_buftarg(
goto error;
if (xfs_mapping_buftarg(btp, bdev))
goto error;
- if (xfs_alloc_delwrite_queue(btp))
+ if (xfs_alloc_delwrite_queue(btp, fsname))
goto error;
xfs_alloc_bufhash(btp, external);
return btp;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 386e7361e50e..5fbecefa5dfd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -390,7 +390,7 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
/*
* Handling of buftargs.
*/
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 42dd3bcfba6b..d8fb1b5d6cb5 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -115,6 +115,8 @@ xfs_file_fsync(
xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ xfs_ioend_wait(ip);
+
/*
* We always need to make sure that the required inode state is safe on
* disk. The inode might be clean but we still might need to force the
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 7b26cc2fd284..699b60cbab9c 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -527,6 +527,10 @@ xfs_attrmulti_by_handle(
if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
return -XFS_ERROR(EFAULT);
+ /* overflow check */
+ if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
+ return -E2BIG;
+
dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 593c05b4df8d..9287135e9bfc 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -420,6 +420,10 @@ xfs_compat_attrmulti_by_handle(
sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
return -XFS_ERROR(EFAULT);
+ /* overflow check */
+ if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
+ return -E2BIG;
+
dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index e65a7937f3a4..9c8019c78c92 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -673,7 +673,10 @@ xfs_vn_fiemap(
bm.bmv_length = BTOBB(length);
/* We add one because in getbmap world count includes the header */
- bm.bmv_count = fieinfo->fi_extents_max + 1;
+ bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
+ fieinfo->fi_extents_max + 1;
+ bm.bmv_count = min_t(__s32, bm.bmv_count,
+ (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
bm.bmv_iflags = BMV_IF_PREALLOC;
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
bm.bmv_iflags |= BMV_IF_ATTRFORK;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 29f1edca76de..e9002513e08f 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -789,18 +789,18 @@ xfs_open_devices(
* Setup xfs_mount buffer target pointers
*/
error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0);
+ mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1);
+ mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1);
+ mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
@@ -902,7 +902,8 @@ xfsaild_start(
struct xfs_ail *ailp)
{
ailp->xa_target = 0;
- ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild");
+ ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
+ ailp->xa_mount->m_fsname);
if (IS_ERR(ailp->xa_task))
return -PTR_ERR(ailp->xa_task);
return 0;
@@ -1092,6 +1093,7 @@ xfs_fs_write_inode(
* the code will only flush the inode if it isn't already
* being flushed.
*/
+ xfs_ioend_wait(ip);
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (ip->i_update_core) {
error = xfs_log_inode(ip);
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a427c638d909..3884e20bc14e 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -356,68 +356,23 @@ xfs_commit_dummy_trans(
STATIC int
xfs_sync_fsdata(
- struct xfs_mount *mp,
- int flags)
+ struct xfs_mount *mp)
{
struct xfs_buf *bp;
- struct xfs_buf_log_item *bip;
- int error = 0;
/*
- * If this is xfssyncd() then only sync the superblock if we can
- * lock it without sleeping and it is not pinned.
+ * If the buffer is pinned then push on the log so we won't get stuck
+ * waiting in the write for someone, maybe ourselves, to flush the log.
+ *
+ * Even though we just pushed the log above, we did not have the
+ * superblock buffer locked at that point so it can become pinned in
+ * between there and here.
*/
- if (flags & SYNC_TRYLOCK) {
- ASSERT(!(flags & SYNC_WAIT));
-
- bp = xfs_getsb(mp, XBF_TRYLOCK);
- if (!bp)
- goto out;
-
- bip = XFS_BUF_FSPRIVATE(bp, struct xfs_buf_log_item *);
- if (!bip || !xfs_buf_item_dirty(bip) || XFS_BUF_ISPINNED(bp))
- goto out_brelse;
- } else {
- bp = xfs_getsb(mp, 0);
-
- /*
- * If the buffer is pinned then push on the log so we won't
- * get stuck waiting in the write for someone, maybe
- * ourselves, to flush the log.
- *
- * Even though we just pushed the log above, we did not have
- * the superblock buffer locked at that point so it can
- * become pinned in between there and here.
- */
- if (XFS_BUF_ISPINNED(bp))
- xfs_log_force(mp, 0);
- }
-
-
- if (flags & SYNC_WAIT)
- XFS_BUF_UNASYNC(bp);
- else
- XFS_BUF_ASYNC(bp);
-
- error = xfs_bwrite(mp, bp);
- if (error)
- return error;
-
- /*
- * If this is a data integrity sync make sure all pending buffers
- * are flushed out for the log coverage check below.
- */
- if (flags & SYNC_WAIT)
- xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
- if (xfs_log_need_covered(mp))
- error = xfs_commit_dummy_trans(mp, flags);
- return error;
+ bp = xfs_getsb(mp, 0);
+ if (XFS_BUF_ISPINNED(bp))
+ xfs_log_force(mp, 0);
- out_brelse:
- xfs_buf_relse(bp);
- out:
- return error;
+ return xfs_bwrite(mp, bp);
}
/*
@@ -441,7 +396,7 @@ int
xfs_quiesce_data(
struct xfs_mount *mp)
{
- int error;
+ int error, error2 = 0;
/* push non-blocking */
xfs_sync_data(mp, 0);
@@ -452,13 +407,20 @@ xfs_quiesce_data(
xfs_qm_sync(mp, SYNC_WAIT);
/* write superblock and hoover up shutdown errors */
- error = xfs_sync_fsdata(mp, SYNC_WAIT);
+ error = xfs_sync_fsdata(mp);
+
+ /* make sure all delwri buffers are written out */
+ xfs_flush_buftarg(mp->m_ddev_targp, 1);
+
+ /* mark the log as covered if needed */
+ if (xfs_log_need_covered(mp))
+ error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
/* flush data-only devices */
if (mp->m_rtdev_targp)
XFS_bflush(mp->m_rtdev_targp);
- return error;
+ return error ? error : error2;
}
STATIC void
@@ -581,9 +543,9 @@ xfs_flush_inodes(
}
/*
- * Every sync period we need to unpin all items, reclaim inodes, sync
- * quota and write out the superblock. We might need to cover the log
- * to indicate it is idle.
+ * Every sync period we need to unpin all items, reclaim inodes and sync
+ * disk quotas. We might need to cover the log to indicate that the
+ * filesystem is idle.
*/
STATIC void
xfs_sync_worker(
@@ -597,7 +559,8 @@ xfs_sync_worker(
xfs_reclaim_inodes(mp, 0);
/* dgc: errors ignored here */
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
- error = xfs_sync_fsdata(mp, SYNC_TRYLOCK);
+ if (xfs_log_need_covered(mp))
+ error = xfs_commit_dummy_trans(mp, 0);
}
mp->m_sync_seq++;
wake_up(&mp->m_wait_single_sync_task);
@@ -660,7 +623,7 @@ xfs_syncd_init(
mp->m_sync_work.w_syncer = xfs_sync_worker;
mp->m_sync_work.w_mount = mp;
mp->m_sync_work.w_completion = NULL;
- mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd");
+ mp->m_sync_task = kthread_run(xfssyncd, mp, "xfssyncd/%s", mp->m_fsname);
if (IS_ERR(mp->m_sync_task))
return -PTR_ERR(mp->m_sync_task);
return 0;
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index 5a107601e969..207fa77f63ae 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -41,7 +41,6 @@
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
-#include "xfs_attr_sf.h"
#include "xfs_attr_leaf.h"
#include "xfs_log_priv.h"
#include "xfs_buf_item.h"
@@ -50,6 +49,9 @@
#include "xfs_aops.h"
#include "quota/xfs_dquot_item.h"
#include "quota/xfs_dquot.h"
+#include "xfs_log_recover.h"
+#include "xfs_buf_item.h"
+#include "xfs_inode_item.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index fcaa62f0799e..8a319cfd2901 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -32,6 +32,10 @@ struct xfs_da_node_entry;
struct xfs_dquot;
struct xlog_ticket;
struct log;
+struct xlog_recover;
+struct xlog_recover_item;
+struct xfs_buf_log_format;
+struct xfs_inode_log_format;
DECLARE_EVENT_CLASS(xfs_attr_list_class,
TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -562,18 +566,21 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(int, count)
+ __field(int, pincount)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->count = atomic_read(&VFS_I(ip)->i_count);
+ __entry->pincount = atomic_read(&ip->i_pincount);
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
+ TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->count,
+ __entry->pincount,
(char *)__entry->caller_ip)
)
@@ -583,6 +590,10 @@ DEFINE_EVENT(xfs_inode_class, name, \
TP_ARGS(ip, caller_ip))
DEFINE_INODE_EVENT(xfs_ihold);
DEFINE_INODE_EVENT(xfs_irele);
+DEFINE_INODE_EVENT(xfs_inode_pin);
+DEFINE_INODE_EVENT(xfs_inode_unpin);
+DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
+
/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
DEFINE_INODE_EVENT(xfs_inode);
#define xfs_itrace_entry(ip) \
@@ -642,8 +653,6 @@ DEFINE_EVENT(xfs_dquot_class, name, \
TP_PROTO(struct xfs_dquot *dqp), \
TP_ARGS(dqp))
DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
@@ -658,7 +667,6 @@ DEFINE_DQUOT_EVENT(xfs_dqread_fail);
DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_move);
DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
DEFINE_DQUOT_EVENT(xfs_dqget_hit);
DEFINE_DQUOT_EVENT(xfs_dqget_miss);
@@ -1495,6 +1503,140 @@ DEFINE_EVENT(xfs_swap_extent_class, name, \
DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
+DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
+ TP_PROTO(struct log *log, struct xlog_recover *trans,
+ struct xlog_recover_item *item, int pass),
+ TP_ARGS(log, trans, item, pass),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned long, item)
+ __field(xlog_tid_t, tid)
+ __field(int, type)
+ __field(int, pass)
+ __field(int, count)
+ __field(int, total)
+ ),
+ TP_fast_assign(
+ __entry->dev = log->l_mp->m_super->s_dev;
+ __entry->item = (unsigned long)item;
+ __entry->tid = trans->r_log_tid;
+ __entry->type = ITEM_TYPE(item);
+ __entry->pass = pass;
+ __entry->count = item->ri_cnt;
+ __entry->total = item->ri_total;
+ ),
+ TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
+ "item region count/total %d/%d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->tid,
+ __entry->pass,
+ (void *)__entry->item,
+ __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
+ __entry->count,
+ __entry->total)
+)
+
+#define DEFINE_LOG_RECOVER_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_item_class, name, \
+ TP_PROTO(struct log *log, struct xlog_recover *trans, \
+ struct xlog_recover_item *item, int pass), \
+ TP_ARGS(log, trans, item, pass))
+
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
+DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
+ TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
+ TP_ARGS(log, buf_f),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(__int64_t, blkno)
+ __field(unsigned short, len)
+ __field(unsigned short, flags)
+ __field(unsigned short, size)
+ __field(unsigned int, map_size)
+ ),
+ TP_fast_assign(
+ __entry->dev = log->l_mp->m_super->s_dev;
+ __entry->blkno = buf_f->blf_blkno;
+ __entry->len = buf_f->blf_len;
+ __entry->flags = buf_f->blf_flags;
+ __entry->size = buf_f->blf_size;
+ __entry->map_size = buf_f->blf_map_size;
+ ),
+ TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
+ "map_size %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->blkno,
+ __entry->len,
+ __entry->flags,
+ __entry->size,
+ __entry->map_size)
+)
+
+#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
+ TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
+ TP_ARGS(log, buf_f))
+
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
+DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
+
+DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
+ TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
+ TP_ARGS(log, in_f),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned short, size)
+ __field(int, fields)
+ __field(unsigned short, asize)
+ __field(unsigned short, dsize)
+ __field(__int64_t, blkno)
+ __field(int, len)
+ __field(int, boffset)
+ ),
+ TP_fast_assign(
+ __entry->dev = log->l_mp->m_super->s_dev;
+ __entry->ino = in_f->ilf_ino;
+ __entry->size = in_f->ilf_size;
+ __entry->fields = in_f->ilf_fields;
+ __entry->asize = in_f->ilf_asize;
+ __entry->dsize = in_f->ilf_dsize;
+ __entry->blkno = in_f->ilf_blkno;
+ __entry->len = in_f->ilf_len;
+ __entry->boffset = in_f->ilf_boffset;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
+ "dsize %d, blkno 0x%llx, len %d, boffset %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->fields,
+ __entry->asize,
+ __entry->dsize,
+ __entry->blkno,
+ __entry->len,
+ __entry->boffset)
+)
+#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
+DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
+ TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
+ TP_ARGS(log, in_f))
+
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
+DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH