diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-20 09:50:11 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-20 09:50:11 -0700 |
commit | c2661b806092d8ea2dccb7b02b65776555e0ee47 (patch) | |
tree | ebc73fa58706e5a0a79235953c532ac6c6e2b539 /fs/jbd2 | |
parent | f114040e3ea6e07372334ade75d1ee0775c355e1 (diff) | |
parent | 813d32f91333e4c33d5a19b67167c4bae42dae75 (diff) |
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o:
"A large number of cleanups and bug fixes, with some (minor) journal
optimizations"
[ This got sent to me before -rc1, but was stuck in my spam folder. - Linus ]
* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (67 commits)
ext4: check s_chksum_driver when looking for bg csum presence
ext4: move error report out of atomic context in ext4_init_block_bitmap()
ext4: Replace open coded mdata csum feature to helper function
ext4: delete useless comments about ext4_move_extents
ext4: fix reservation overflow in ext4_da_write_begin
ext4: add ext4_iget_normal() which is to be used for dir tree lookups
ext4: don't orphan or truncate the boot loader inode
ext4: grab missed write_count for EXT4_IOC_SWAP_BOOT
ext4: optimize block allocation on grow indepth
ext4: get rid of code duplication
ext4: fix over-defensive complaint after journal abort
ext4: fix return value of ext4_do_update_inode
ext4: fix mmap data corruption when blocksize < pagesize
vfs: fix data corruption when blocksize < pagesize for mmaped data
ext4: fold ext4_nojournal_sops into ext4_sops
ext4: support freezing ext2 (nojournal) file systems
ext4: fold ext4_sync_fs_nojournal() into ext4_sync_fs()
ext4: don't check quota format when there are no quota files
jbd2: simplify calling convention around __jbd2_journal_clean_checkpoint_list
jbd2: avoid pointless scanning of checkpoint lists
...
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/checkpoint.c | 334 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 18 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 1 |
3 files changed, 146 insertions, 207 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 7f34f4716165..988b32ed4c87 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -96,15 +96,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_transaction == NULL && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_write_io_error(bh)) { - /* - * Get our reference so that bh cannot be freed before - * we unlock it - */ - get_bh(bh); JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __jbd2_journal_remove_checkpoint(jh) + 1; - BUFFER_TRACE(bh, "release"); - __brelse(bh); } return ret; } @@ -122,8 +115,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) nblocks = jbd2_space_needed(journal); while (jbd2_log_space_left(journal) < nblocks) { - if (journal->j_flags & JBD2_ABORT) - return; write_unlock(&journal->j_state_lock); mutex_lock(&journal->j_checkpoint_mutex); @@ -139,6 +130,10 @@ void __jbd2_log_wait_for_space(journal_t *journal) * trace for forensic evidence. */ write_lock(&journal->j_state_lock); + if (journal->j_flags & JBD2_ABORT) { + mutex_unlock(&journal->j_checkpoint_mutex); + return; + } spin_lock(&journal->j_list_lock); nblocks = jbd2_space_needed(journal); space_left = jbd2_log_space_left(journal); @@ -183,58 +178,6 @@ void __jbd2_log_wait_for_space(journal_t *journal) } } -/* - * Clean up transaction's list of buffers submitted for io. - * We wait for any pending IO to complete and remove any clean - * buffers. Note that we take the buffers in the opposite ordering - * from the one in which they were submitted for IO. - * - * Return 0 on success, and return <0 if some buffers have failed - * to be written out. - * - * Called with j_list_lock held. - */ -static int __wait_cp_io(journal_t *journal, transaction_t *transaction) -{ - struct journal_head *jh; - struct buffer_head *bh; - tid_t this_tid; - int released = 0; - int ret = 0; - - this_tid = transaction->t_tid; -restart: - /* Did somebody clean up the transaction in the meanwhile? */ - if (journal->j_checkpoint_transactions != transaction || - transaction->t_tid != this_tid) - return ret; - while (!released && transaction->t_checkpoint_io_list) { - jh = transaction->t_checkpoint_io_list; - bh = jh2bh(jh); - get_bh(bh); - if (buffer_locked(bh)) { - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - spin_lock(&journal->j_list_lock); - goto restart; - } - if (unlikely(buffer_write_io_error(bh))) - ret = -EIO; - - /* - * Now in whatever state the buffer currently is, we know that - * it has been written out and so we can drop it from the list - */ - released = __jbd2_journal_remove_checkpoint(jh); - __brelse(bh); - } - - return ret; -} - static void __flush_batch(journal_t *journal, int *batch_count) { @@ -255,81 +198,6 @@ __flush_batch(journal_t *journal, int *batch_count) } /* - * Try to flush one buffer from the checkpoint list to disk. - * - * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. Return <0 if the buffer has failed to - * be written out. - * - * Called with j_list_lock held and drops it if 1 is returned - */ -static int __process_buffer(journal_t *journal, struct journal_head *jh, - int *batch_count, transaction_t *transaction) -{ - struct buffer_head *bh = jh2bh(jh); - int ret = 0; - - if (buffer_locked(bh)) { - get_bh(bh); - spin_unlock(&journal->j_list_lock); - wait_on_buffer(bh); - /* the journal_head may have gone by now */ - BUFFER_TRACE(bh, "brelse"); - __brelse(bh); - ret = 1; - } else if (jh->b_transaction != NULL) { - transaction_t *t = jh->b_transaction; - tid_t tid = t->t_tid; - - transaction->t_chp_stats.cs_forced_to_close++; - spin_unlock(&journal->j_list_lock); - if (unlikely(journal->j_flags & JBD2_UNMOUNT)) - /* - * The journal thread is dead; so starting and - * waiting for a commit to finish will cause - * us to wait for a _very_ long time. - */ - printk(KERN_ERR "JBD2: %s: " - "Waiting for Godot: block %llu\n", - journal->j_devname, - (unsigned long long) bh->b_blocknr); - jbd2_log_start_commit(journal, tid); - jbd2_log_wait_commit(journal, tid); - ret = 1; - } else if (!buffer_dirty(bh)) { - ret = 1; - if (unlikely(buffer_write_io_error(bh))) - ret = -EIO; - get_bh(bh); - BUFFER_TRACE(bh, "remove from checkpoint"); - __jbd2_journal_remove_checkpoint(jh); - spin_unlock(&journal->j_list_lock); - __brelse(bh); - } else { - /* - * Important: we are about to write the buffer, and - * possibly block, while still holding the journal lock. - * We cannot afford to let the transaction logic start - * messing around with this buffer before we write it to - * disk, as that would break recoverability. - */ - BUFFER_TRACE(bh, "queue"); - get_bh(bh); - J_ASSERT_BH(bh, !buffer_jwrite(bh)); - journal->j_chkpt_bhs[*batch_count] = bh; - __buffer_relink_io(jh); - transaction->t_chp_stats.cs_written++; - (*batch_count)++; - if (*batch_count == JBD2_NR_BATCH) { - spin_unlock(&journal->j_list_lock); - __flush_batch(journal, batch_count); - ret = 1; - } - } - return ret; -} - -/* * Perform an actual checkpoint. We take the first transaction on the * list of transactions to be checkpointed and send all its buffers * to disk. We submit larger chunks of data at once. @@ -339,9 +207,11 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, */ int jbd2_log_do_checkpoint(journal_t *journal) { - transaction_t *transaction; - tid_t this_tid; - int result; + struct journal_head *jh; + struct buffer_head *bh; + transaction_t *transaction; + tid_t this_tid; + int result, batch_count = 0; jbd_debug(1, "Start checkpoint\n"); @@ -374,45 +244,117 @@ restart: * done (maybe it's a new transaction, but it fell at the same * address). */ - if (journal->j_checkpoint_transactions == transaction && - transaction->t_tid == this_tid) { - int batch_count = 0; - struct journal_head *jh; - int retry = 0, err; - - while (!retry && transaction->t_checkpoint_list) { - jh = transaction->t_checkpoint_list; - retry = __process_buffer(journal, jh, &batch_count, - transaction); - if (retry < 0 && !result) - result = retry; - if (!retry && (need_resched() || - spin_needbreak(&journal->j_list_lock))) { - spin_unlock(&journal->j_list_lock); - retry = 1; - break; - } + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + goto out; + + /* checkpoint all of the transaction's buffers */ + while (transaction->t_checkpoint_list) { + jh = transaction->t_checkpoint_list; + bh = jh2bh(jh); + + if (buffer_locked(bh)) { + spin_unlock(&journal->j_list_lock); + get_bh(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + goto retry; } + if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; - if (batch_count) { - if (!retry) { - spin_unlock(&journal->j_list_lock); - retry = 1; - } - __flush_batch(journal, &batch_count); + transaction->t_chp_stats.cs_forced_to_close++; + spin_unlock(&journal->j_list_lock); + if (unlikely(journal->j_flags & JBD2_UNMOUNT)) + /* + * The journal thread is dead; so + * starting and waiting for a commit + * to finish will cause us to wait for + * a _very_ long time. + */ + printk(KERN_ERR + "JBD2: %s: Waiting for Godot: block %llu\n", + journal->j_devname, (unsigned long long) bh->b_blocknr); + + jbd2_log_start_commit(journal, tid); + jbd2_log_wait_commit(journal, tid); + goto retry; + } + if (!buffer_dirty(bh)) { + if (unlikely(buffer_write_io_error(bh)) && !result) + result = -EIO; + BUFFER_TRACE(bh, "remove from checkpoint"); + if (__jbd2_journal_remove_checkpoint(jh)) + /* The transaction was released; we're done */ + goto out; + continue; } + /* + * Important: we are about to write the buffer, and + * possibly block, while still holding the journal + * lock. We cannot afford to let the transaction + * logic start messing around with this buffer before + * we write it to disk, as that would break + * recoverability. + */ + BUFFER_TRACE(bh, "queue"); + get_bh(bh); + J_ASSERT_BH(bh, !buffer_jwrite(bh)); + journal->j_chkpt_bhs[batch_count++] = bh; + __buffer_relink_io(jh); + transaction->t_chp_stats.cs_written++; + if ((batch_count == JBD2_NR_BATCH) || + need_resched() || + spin_needbreak(&journal->j_list_lock)) + goto unlock_and_flush; + } - if (retry) { + if (batch_count) { + unlock_and_flush: + spin_unlock(&journal->j_list_lock); + retry: + if (batch_count) + __flush_batch(journal, &batch_count); spin_lock(&journal->j_list_lock); goto restart; + } + + /* + * Now we issued all of the transaction's buffers, let's deal + * with the buffers that are out for I/O. + */ +restart2: + /* Did somebody clean up the transaction in the meanwhile? */ + if (journal->j_checkpoint_transactions != transaction || + transaction->t_tid != this_tid) + goto out; + + while (transaction->t_checkpoint_io_list) { + jh = transaction->t_checkpoint_io_list; + bh = jh2bh(jh); + if (buffer_locked(bh)) { + spin_unlock(&journal->j_list_lock); + get_bh(bh); + wait_on_buffer(bh); + /* the journal_head may have gone by now */ + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); + spin_lock(&journal->j_list_lock); + goto restart2; } + if (unlikely(buffer_write_io_error(bh)) && !result) + result = -EIO; + /* - * Now we have cleaned up the first transaction's checkpoint - * list. Let's clean up the second one + * Now in whatever state the buffer currently is, we + * know that it has been written out and so we can + * drop it from the list */ - err = __wait_cp_io(journal, transaction); - if (!result) - result = err; + if (__jbd2_journal_remove_checkpoint(jh)) + break; } out: spin_unlock(&journal->j_list_lock); @@ -478,18 +420,16 @@ int jbd2_cleanup_journal_tail(journal_t *journal) * Find all the written-back checkpoint buffers in the given list and * release them. * - * Called with the journal locked. * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) + * Returns 1 if we freed the transaction, 0 otherwise. */ - -static int journal_clean_one_cp_list(struct journal_head *jh, int *released) +static int journal_clean_one_cp_list(struct journal_head *jh) { struct journal_head *last_jh; struct journal_head *next_jh = jh; - int ret, freed = 0; + int ret; + int freed = 0; - *released = 0; if (!jh) return 0; @@ -498,13 +438,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) jh = next_jh; next_jh = jh->b_cpnext; ret = __try_to_free_cp_buf(jh); - if (ret) { - freed++; - if (ret == 2) { - *released = 1; - return freed; - } - } + if (!ret) + return freed; + if (ret == 2) + return 1; + freed = 1; /* * This function only frees up some memory * if possible so we dont have an obligation @@ -523,49 +461,49 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released) * * Find all the written-back checkpoint buffers in the journal and release them. * - * Called with the journal locked. * Called with j_list_lock held. - * Returns number of buffers reaped (for debug) */ - -int __jbd2_journal_clean_checkpoint_list(journal_t *journal) +void __jbd2_journal_clean_checkpoint_list(journal_t *journal) { transaction_t *transaction, *last_transaction, *next_transaction; - int ret = 0; - int released; + int ret; transaction = journal->j_checkpoint_transactions; if (!transaction) - goto out; + return; last_transaction = transaction->t_cpprev; next_transaction = transaction; do { transaction = next_transaction; next_transaction = transaction->t_cpnext; - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_list, &released); + ret = journal_clean_one_cp_list(transaction->t_checkpoint_list); /* * This function only frees up some memory if possible so we * dont have an obligation to finish processing. Bail out if * preemption requested: */ if (need_resched()) - goto out; - if (released) + return; + if (ret) continue; /* * It is essential that we are as careful as in the case of * t_checkpoint_list with removing the buffer from the list as * we can possibly see not yet submitted buffers on io_list */ - ret += journal_clean_one_cp_list(transaction-> - t_checkpoint_io_list, &released); + ret = journal_clean_one_cp_list(transaction-> + t_checkpoint_io_list); if (need_resched()) - goto out; + return; + /* + * Stop scanning if we couldn't free the transaction. This + * avoids pointless scanning of transactions which still + * weren't checkpointed. + */ + if (!ret) + return; } while (transaction != last_transaction); -out: - return ret; } /* diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 19d74d86d99c..e4dc74713a43 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1237,7 +1237,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) goto out_err; } - bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + bh = getblk_unmovable(journal->j_dev, blocknr, journal->j_blocksize); if (!bh) { printk(KERN_ERR "%s: Cannot get buffer for journal superblock\n", @@ -1522,14 +1522,6 @@ static int journal_get_superblock(journal_t *journal) goto out; } - if (jbd2_journal_has_csum_v2or3(journal) && - JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { - /* Can't have checksum v1 and v2 on at the same time! */ - printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " - "at the same time!\n"); - goto out; - } - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) && JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) { /* Can't have checksum v2 and v3 at the same time! */ @@ -1538,6 +1530,14 @@ static int journal_get_superblock(journal_t *journal) goto out; } + if (jbd2_journal_has_csum_v2or3(journal) && + JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) { + /* Can't have checksum v1 and v2 on at the same time! */ + printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " + "at the same time!\n"); + goto out; + } + if (!jbd2_verify_csum_type(journal, sb)) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); goto out; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 9b329b55ffe3..bcbef08a4d8f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -525,6 +525,7 @@ static int do_one_pass(journal_t *journal, !jbd2_descr_block_csum_verify(journal, bh->b_data)) { err = -EIO; + brelse(bh); goto failed; } |