diff options
Diffstat (limited to 'fs/xfs/xfs_buf_item.c')
-rw-r--r-- | fs/xfs/xfs_buf_item.c | 422 |
1 files changed, 212 insertions, 210 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 9e75e8d6042e..5bb6f22cc11a 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -12,8 +12,13 @@ #include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_trans.h" -#include "xfs_buf_item.h" #include "xfs_trans_priv.h" +#include "xfs_buf_item.h" +#include "xfs_inode.h" +#include "xfs_inode_item.h" +#include "xfs_quota.h" +#include "xfs_dquot_item.h" +#include "xfs_dquot.h" #include "xfs_trace.h" #include "xfs_log.h" @@ -25,7 +30,7 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip) return container_of(lip, struct xfs_buf_log_item, bli_item); } -STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp); +static void xfs_buf_item_done(struct xfs_buf *bp); /* Is this log iovec plausibly large enough to contain the buffer log format? */ bool @@ -457,10 +462,9 @@ xfs_buf_item_unpin( * the AIL lock. */ if (bip->bli_flags & XFS_BLI_STALE_INODE) { - xfs_buf_do_callbacks(bp); - bp->b_log_item = NULL; - list_del_init(&bp->b_li_list); - bp->b_iodone = NULL; + xfs_buf_item_done(bp); + xfs_iflush_done(bp); + ASSERT(list_empty(&bp->b_li_list)); } else { xfs_trans_ail_delete(lip, SHUTDOWN_LOG_IO_ERROR); xfs_buf_item_relse(bp); @@ -734,7 +738,7 @@ xfs_buf_item_init( return 0; } - bip = kmem_zone_zalloc(xfs_buf_item_zone, 0); + bip = kmem_cache_zalloc(xfs_buf_item_zone, GFP_KERNEL | __GFP_NOFAIL); xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops); bip->bli_buf = bp; @@ -936,11 +940,7 @@ xfs_buf_item_free( } /* - * This is called when the buf log item is no longer needed. It should - * free the buf log item associated with the given buffer and clear - * the buffer's pointer to the buf log item. If there are no more - * items in the list, clear the b_iodone field of the buffer (see - * xfs_buf_attach_iodone() below). + * xfs_buf_item_relse() is called when the buf log item is no longer needed. */ void xfs_buf_item_relse( @@ -952,137 +952,28 @@ xfs_buf_item_relse( ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags)); bp->b_log_item = NULL; - if (list_empty(&bp->b_li_list)) - bp->b_iodone = NULL; - xfs_buf_rele(bp); xfs_buf_item_free(bip); } - /* - * Add the given log item with its callback to the list of callbacks - * to be called when the buffer's I/O completes. If it is not set - * already, set the buffer's b_iodone() routine to be - * xfs_buf_iodone_callbacks() and link the log item into the list of - * items rooted at b_li_list. + * Decide if we're going to retry the write after a failure, and prepare + * the buffer for retrying the write. */ -void -xfs_buf_attach_iodone( - struct xfs_buf *bp, - void (*cb)(struct xfs_buf *, struct xfs_log_item *), - struct xfs_log_item *lip) -{ - ASSERT(xfs_buf_islocked(bp)); - - lip->li_cb = cb; - list_add_tail(&lip->li_bio_list, &bp->b_li_list); - - ASSERT(bp->b_iodone == NULL || - bp->b_iodone == xfs_buf_iodone_callbacks); - bp->b_iodone = xfs_buf_iodone_callbacks; -} - -/* - * We can have many callbacks on a buffer. Running the callbacks individually - * can cause a lot of contention on the AIL lock, so we allow for a single - * callback to be able to scan the remaining items in bp->b_li_list for other - * items of the same type and callback to be processed in the first call. - * - * As a result, the loop walking the callback list below will also modify the - * list. it removes the first item from the list and then runs the callback. - * The loop then restarts from the new first item int the list. This allows the - * callback to scan and modify the list attached to the buffer and we don't - * have to care about maintaining a next item pointer. - */ -STATIC void -xfs_buf_do_callbacks( - struct xfs_buf *bp) -{ - struct xfs_buf_log_item *blip = bp->b_log_item; - struct xfs_log_item *lip; - - /* If there is a buf_log_item attached, run its callback */ - if (blip) { - lip = &blip->bli_item; - lip->li_cb(bp, lip); - } - - while (!list_empty(&bp->b_li_list)) { - lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, - li_bio_list); - - /* - * Remove the item from the list, so we don't have any - * confusion if the item is added to another buf. - * Don't touch the log item after calling its - * callback, because it could have freed itself. - */ - list_del_init(&lip->li_bio_list); - lip->li_cb(bp, lip); - } -} - -/* - * Invoke the error state callback for each log item affected by the failed I/O. - * - * If a metadata buffer write fails with a non-permanent error, the buffer is - * eventually resubmitted and so the completion callbacks are not run. The error - * state may need to be propagated to the log items attached to the buffer, - * however, so the next AIL push of the item knows hot to handle it correctly. - */ -STATIC void -xfs_buf_do_callbacks_fail( - struct xfs_buf *bp) -{ - struct xfs_log_item *lip; - struct xfs_ail *ailp; - - /* - * Buffer log item errors are handled directly by xfs_buf_item_push() - * and xfs_buf_iodone_callback_error, and they have no IO error - * callbacks. Check only for items in b_li_list. - */ - if (list_empty(&bp->b_li_list)) - return; - - lip = list_first_entry(&bp->b_li_list, struct xfs_log_item, - li_bio_list); - ailp = lip->li_ailp; - spin_lock(&ailp->ail_lock); - list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { - if (lip->li_ops->iop_error) - lip->li_ops->iop_error(lip, bp); - } - spin_unlock(&ailp->ail_lock); -} - static bool -xfs_buf_iodone_callback_error( +xfs_buf_ioerror_fail_without_retry( struct xfs_buf *bp) { - struct xfs_buf_log_item *bip = bp->b_log_item; - struct xfs_log_item *lip; - struct xfs_mount *mp; + struct xfs_mount *mp = bp->b_mount; static ulong lasttime; static xfs_buftarg_t *lasttarg; - struct xfs_error_cfg *cfg; - - /* - * The failed buffer might not have a buf_log_item attached or the - * log_item list might be empty. Get the mp from the available - * xfs_log_item - */ - lip = list_first_entry_or_null(&bp->b_li_list, struct xfs_log_item, - li_bio_list); - mp = lip ? lip->li_mountp : bip->bli_item.li_mountp; /* * If we've already decided to shutdown the filesystem because of * I/O errors, there's no point in giving this a retry. */ if (XFS_FORCED_SHUTDOWN(mp)) - goto out_stale; + return true; if (bp->b_target != lasttarg || time_after(jiffies, (lasttime + 5*HZ))) { @@ -1093,129 +984,240 @@ xfs_buf_iodone_callback_error( /* synchronous writes will have callers process the error */ if (!(bp->b_flags & XBF_ASYNC)) - goto out_stale; - - trace_xfs_buf_item_iodone_async(bp, _RET_IP_); - ASSERT(bp->b_iodone != NULL); - - cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); + return true; + return false; +} - /* - * If the write was asynchronous then no one will be looking for the - * error. If this is the first failure of this type, clear the error - * state and write the buffer out again. This means we always retry an - * async write failure at least once, but we also need to set the buffer - * up to behave correctly now for repeated failures. - */ - if (!(bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) || - bp->b_last_error != bp->b_error) { - bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL); - bp->b_last_error = bp->b_error; - if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && - !bp->b_first_retry_time) - bp->b_first_retry_time = jiffies; +static bool +xfs_buf_ioerror_retry( + struct xfs_buf *bp, + struct xfs_error_cfg *cfg) +{ + if ((bp->b_flags & (XBF_STALE | XBF_WRITE_FAIL)) && + bp->b_last_error == bp->b_error) + return false; - xfs_buf_ioerror(bp, 0); - xfs_buf_submit(bp); - return true; - } + bp->b_flags |= (XBF_WRITE | XBF_DONE | XBF_WRITE_FAIL); + bp->b_last_error = bp->b_error; + if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && + !bp->b_first_retry_time) + bp->b_first_retry_time = jiffies; + return true; +} - /* - * Repeated failure on an async write. Take action according to the - * error configuration we have been set up to use. - */ +/* + * Account for this latest trip around the retry handler, and decide if + * we've failed enough times to constitute a permanent failure. + */ +static bool +xfs_buf_ioerror_permanent( + struct xfs_buf *bp, + struct xfs_error_cfg *cfg) +{ + struct xfs_mount *mp = bp->b_mount; if (cfg->max_retries != XFS_ERR_RETRY_FOREVER && ++bp->b_retries > cfg->max_retries) - goto permanent_error; + return true; if (cfg->retry_timeout != XFS_ERR_RETRY_FOREVER && time_after(jiffies, cfg->retry_timeout + bp->b_first_retry_time)) - goto permanent_error; + return true; /* At unmount we may treat errors differently */ if ((mp->m_flags & XFS_MOUNT_UNMOUNTING) && mp->m_fail_unmount) - goto permanent_error; + return true; - /* - * Still a transient error, run IO completion failure callbacks and let - * the higher layers retry the buffer. - */ - xfs_buf_do_callbacks_fail(bp); - xfs_buf_ioerror(bp, 0); - xfs_buf_relse(bp); - return true; + return false; +} + +/* + * On a sync write or shutdown we just want to stale the buffer and let the + * caller handle the error in bp->b_error appropriately. + * + * If the write was asynchronous then no one will be looking for the error. If + * this is the first failure of this type, clear the error state and write the + * buffer out again. This means we always retry an async write failure at least + * once, but we also need to set the buffer up to behave correctly now for + * repeated failures. + * + * If we get repeated async write failures, then we take action according to the + * error configuration we have been set up to use. + * + * Multi-state return value: + * + * XBF_IOERROR_FINISH: clear IO error retry state and run callback completions + * XBF_IOERROR_DONE: resubmitted immediately, do not run any completions + * XBF_IOERROR_FAIL: transient error, run failure callback completions and then + * release the buffer + */ +enum { + XBF_IOERROR_FINISH, + XBF_IOERROR_DONE, + XBF_IOERROR_FAIL, +}; + +static int +xfs_buf_iodone_error( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_mount; + struct xfs_error_cfg *cfg; + + if (xfs_buf_ioerror_fail_without_retry(bp)) + goto out_stale; + + trace_xfs_buf_item_iodone_async(bp, _RET_IP_); + + cfg = xfs_error_get_cfg(mp, XFS_ERR_METADATA, bp->b_error); + if (xfs_buf_ioerror_retry(bp, cfg)) { + xfs_buf_ioerror(bp, 0); + xfs_buf_submit(bp); + return XBF_IOERROR_DONE; + } /* * Permanent error - we need to trigger a shutdown if we haven't already * to indicate that inconsistency will result from this action. */ -permanent_error: - xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + if (xfs_buf_ioerror_permanent(bp, cfg)) { + xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); + goto out_stale; + } + + /* Still considered a transient error. Caller will schedule retries. */ + return XBF_IOERROR_FAIL; + out_stale: xfs_buf_stale(bp); bp->b_flags |= XBF_DONE; trace_xfs_buf_error_relse(bp, _RET_IP_); - return false; + return XBF_IOERROR_FINISH; } -/* - * This is the iodone() function for buffers which have had callbacks attached - * to them by xfs_buf_attach_iodone(). We need to iterate the items on the - * callback list, mark the buffer as having no more callbacks and then push the - * buffer through IO completion processing. - */ -void -xfs_buf_iodone_callbacks( +static void +xfs_buf_item_done( struct xfs_buf *bp) { - /* - * If there is an error, process it. Some errors require us - * to run callbacks after failure processing is done so we - * detect that and take appropriate action. - */ - if (bp->b_error && xfs_buf_iodone_callback_error(bp)) + struct xfs_buf_log_item *bip = bp->b_log_item; + + if (!bip) return; /* - * Successful IO or permanent error. Either way, we can clear the - * retry state here in preparation for the next error that may occur. + * If we are forcibly shutting down, this may well be off the AIL + * already. That's because we simulate the log-committed callbacks to + * unpin these buffers. Or we may never have put this item on AIL + * because of the transaction was aborted forcibly. + * xfs_trans_ail_delete() takes care of these. + * + * Either way, AIL is useless if we're forcing a shutdown. */ + xfs_trans_ail_delete(&bip->bli_item, SHUTDOWN_CORRUPT_INCORE); + bp->b_log_item = NULL; + xfs_buf_item_free(bip); + xfs_buf_rele(bp); +} + +static inline void +xfs_buf_clear_ioerror_retry_state( + struct xfs_buf *bp) +{ bp->b_last_error = 0; bp->b_retries = 0; bp->b_first_retry_time = 0; +} - xfs_buf_do_callbacks(bp); - bp->b_log_item = NULL; - list_del_init(&bp->b_li_list); - bp->b_iodone = NULL; - xfs_buf_ioend(bp); +/* + * Inode buffer iodone callback function. + */ +void +xfs_buf_inode_iodone( + struct xfs_buf *bp) +{ + if (bp->b_error) { + struct xfs_log_item *lip; + int ret = xfs_buf_iodone_error(bp); + + if (ret == XBF_IOERROR_FINISH) + goto finish_iodone; + if (ret == XBF_IOERROR_DONE) + return; + ASSERT(ret == XBF_IOERROR_FAIL); + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { + set_bit(XFS_LI_FAILED, &lip->li_flags); + } + xfs_buf_ioerror(bp, 0); + xfs_buf_relse(bp); + return; + } + +finish_iodone: + xfs_buf_clear_ioerror_retry_state(bp); + xfs_buf_item_done(bp); + xfs_iflush_done(bp); + xfs_buf_ioend_finish(bp); } /* - * This is the iodone() function for buffers which have been - * logged. It is called when they are eventually flushed out. - * It should remove the buf item from the AIL, and free the buf item. - * It is called by xfs_buf_iodone_callbacks() above which will take - * care of cleaning up the buffer itself. + * Dquot buffer iodone callback function. */ void -xfs_buf_iodone( - struct xfs_buf *bp, - struct xfs_log_item *lip) +xfs_buf_dquot_iodone( + struct xfs_buf *bp) { - ASSERT(BUF_ITEM(lip)->bli_buf == bp); + if (bp->b_error) { + struct xfs_log_item *lip; + int ret = xfs_buf_iodone_error(bp); + + if (ret == XBF_IOERROR_FINISH) + goto finish_iodone; + if (ret == XBF_IOERROR_DONE) + return; + ASSERT(ret == XBF_IOERROR_FAIL); + spin_lock(&bp->b_mount->m_ail->ail_lock); + list_for_each_entry(lip, &bp->b_li_list, li_bio_list) { + xfs_set_li_failed(lip, bp); + } + spin_unlock(&bp->b_mount->m_ail->ail_lock); + xfs_buf_ioerror(bp, 0); + xfs_buf_relse(bp); + return; + } - xfs_buf_rele(bp); +finish_iodone: + xfs_buf_clear_ioerror_retry_state(bp); + /* a newly allocated dquot buffer might have a log item attached */ + xfs_buf_item_done(bp); + xfs_dquot_done(bp); + xfs_buf_ioend_finish(bp); +} - /* - * If we are forcibly shutting down, this may well be off the AIL - * already. That's because we simulate the log-committed callbacks to - * unpin these buffers. Or we may never have put this item on AIL - * because of the transaction was aborted forcibly. - * xfs_trans_ail_delete() takes care of these. - * - * Either way, AIL is useless if we're forcing a shutdown. - */ - xfs_trans_ail_delete(lip, SHUTDOWN_CORRUPT_INCORE); - xfs_buf_item_free(BUF_ITEM(lip)); +/* + * Dirty buffer iodone callback function. + * + * Note that for things like remote attribute buffers, there may not be a buffer + * log item here, so processing the buffer log item must remain be optional. + */ +void +xfs_buf_iodone( + struct xfs_buf *bp) +{ + if (bp->b_error) { + int ret = xfs_buf_iodone_error(bp); + + if (ret == XBF_IOERROR_FINISH) + goto finish_iodone; + if (ret == XBF_IOERROR_DONE) + return; + ASSERT(ret == XBF_IOERROR_FAIL); + ASSERT(list_empty(&bp->b_li_list)); + xfs_buf_ioerror(bp, 0); + xfs_buf_relse(bp); + return; + } + +finish_iodone: + xfs_buf_clear_ioerror_retry_state(bp); + xfs_buf_item_done(bp); + xfs_buf_ioend_finish(bp); } |