diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/ext4.h | 13 | ||||
-rw-r--r-- | fs/ext4/extents.c | 49 | ||||
-rw-r--r-- | fs/ext4/inode.c | 136 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 110 | ||||
-rw-r--r-- | fs/ext4/super.c | 10 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 13 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 30 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 144 | ||||
-rw-r--r-- | fs/ocfs2/suballoc.c | 19 |
9 files changed, 315 insertions, 209 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 03db3e71676c..3616f1b0c987 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -198,6 +198,12 @@ struct ext4_system_blocks { */ #define EXT4_IO_END_UNWRITTEN 0x0001 +struct ext4_io_end_vec { + struct list_head list; /* list of io_end_vec */ + loff_t offset; /* offset in the file */ + ssize_t size; /* size of the extent */ +}; + /* * For converting unwritten extents on a work queue. 'handle' is used for * buffered writeback. @@ -211,8 +217,7 @@ typedef struct ext4_io_end { * bios covering the extent */ unsigned int flag; /* unwritten or not */ atomic_t count; /* reference counter */ - loff_t offset; /* offset in the file */ - ssize_t size; /* size of the extent */ + struct list_head list_vec; /* list of ext4_io_end_vec */ } ext4_io_end_t; struct ext4_io_submit { @@ -3264,6 +3269,8 @@ extern long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len); extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, loff_t offset, ssize_t len); +extern int ext4_convert_unwritten_io_end_vec(handle_t *handle, + ext4_io_end_t *io_end); extern int ext4_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern int ext4_ext_calc_metadata_amount(struct inode *inode, @@ -3322,6 +3329,8 @@ extern int ext4_bio_write_page(struct ext4_io_submit *io, int len, struct writeback_control *wbc, bool keep_towrite); +extern struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end); +extern struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end); /* mmp.c */ extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index fb0f99dc8c22..cf6c5f64cb58 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4962,23 +4962,13 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, int ret = 0; int ret2 = 0; struct ext4_map_blocks map; - unsigned int credits, blkbits = inode->i_blkbits; + unsigned int blkbits = inode->i_blkbits; + unsigned int credits = 0; map.m_lblk = offset >> blkbits; max_blocks = EXT4_MAX_BLOCKS(len, offset, blkbits); - /* - * This is somewhat ugly but the idea is clear: When transaction is - * reserved, everything goes into it. Otherwise we rather start several - * smaller transactions for conversion of each extent separately. - */ - if (handle) { - handle = ext4_journal_start_reserved(handle, - EXT4_HT_EXT_CONVERT); - if (IS_ERR(handle)) - return PTR_ERR(handle); - credits = 0; - } else { + if (!handle) { /* * credits to insert 1 extent into extent tree */ @@ -5009,11 +4999,40 @@ int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, if (ret <= 0 || ret2) break; } - if (!credits) - ret2 = ext4_journal_stop(handle); return ret > 0 ? ret2 : ret; } +int ext4_convert_unwritten_io_end_vec(handle_t *handle, ext4_io_end_t *io_end) +{ + int ret, err = 0; + struct ext4_io_end_vec *io_end_vec; + + /* + * This is somewhat ugly but the idea is clear: When transaction is + * reserved, everything goes into it. Otherwise we rather start several + * smaller transactions for conversion of each extent separately. + */ + if (handle) { + handle = ext4_journal_start_reserved(handle, + EXT4_HT_EXT_CONVERT); + if (IS_ERR(handle)) + return PTR_ERR(handle); + } + + list_for_each_entry(io_end_vec, &io_end->list_vec, list) { + ret = ext4_convert_unwritten_extents(handle, io_end->inode, + io_end_vec->offset, + io_end_vec->size); + if (ret) + break; + } + + if (handle) + err = ext4_journal_stop(handle); + + return ret < 0 ? ret : err; +} + /* * If newes is not existing extent (newes->ec_pblk equals zero) find * delayed extent at start of newes and update newes accordingly and diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index abaaf7d96ca4..0d8971b819e9 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2341,6 +2341,75 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd, } /* + * mpage_process_page - update page buffers corresponding to changed extent and + * may submit fully mapped page for IO + * + * @mpd - description of extent to map, on return next extent to map + * @m_lblk - logical block mapping. + * @m_pblk - corresponding physical mapping. + * @map_bh - determines on return whether this page requires any further + * mapping or not. + * Scan given page buffers corresponding to changed extent and update buffer + * state according to new extent state. + * We map delalloc buffers to their physical location, clear unwritten bits. + * If the given page is not fully mapped, we update @map to the next extent in + * the given page that needs mapping & return @map_bh as true. + */ +static int mpage_process_page(struct mpage_da_data *mpd, struct page *page, + ext4_lblk_t *m_lblk, ext4_fsblk_t *m_pblk, + bool *map_bh) +{ + struct buffer_head *head, *bh; + ext4_io_end_t *io_end = mpd->io_submit.io_end; + ext4_lblk_t lblk = *m_lblk; + ext4_fsblk_t pblock = *m_pblk; + int err = 0; + int blkbits = mpd->inode->i_blkbits; + ssize_t io_end_size = 0; + struct ext4_io_end_vec *io_end_vec = ext4_last_io_end_vec(io_end); + + bh = head = page_buffers(page); + do { + if (lblk < mpd->map.m_lblk) + continue; + if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { + /* + * Buffer after end of mapped extent. + * Find next buffer in the page to map. + */ + mpd->map.m_len = 0; + mpd->map.m_flags = 0; + io_end_vec->size += io_end_size; + io_end_size = 0; + + err = mpage_process_page_bufs(mpd, head, bh, lblk); + if (err > 0) + err = 0; + if (!err && mpd->map.m_len && mpd->map.m_lblk > lblk) { + io_end_vec = ext4_alloc_io_end_vec(io_end); + io_end_vec->offset = mpd->map.m_lblk << blkbits; + } + *map_bh = true; + goto out; + } + if (buffer_delay(bh)) { + clear_buffer_delay(bh); + bh->b_blocknr = pblock++; + } + clear_buffer_unwritten(bh); + io_end_size += (1 << blkbits); + } while (lblk++, (bh = bh->b_this_page) != head); + + io_end_vec->size += io_end_size; + io_end_size = 0; + *map_bh = false; +out: + *m_lblk = lblk; + *m_pblk = pblock; + return err; +} + +/* * mpage_map_buffers - update buffers corresponding to changed extent and * submit fully mapped pages for IO * @@ -2359,12 +2428,12 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) struct pagevec pvec; int nr_pages, i; struct inode *inode = mpd->inode; - struct buffer_head *head, *bh; int bpp_bits = PAGE_SHIFT - inode->i_blkbits; pgoff_t start, end; ext4_lblk_t lblk; - sector_t pblock; + ext4_fsblk_t pblock; int err; + bool map_bh = false; start = mpd->map.m_lblk >> bpp_bits; end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; @@ -2380,50 +2449,19 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - bh = head = page_buffers(page); - do { - if (lblk < mpd->map.m_lblk) - continue; - if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { - /* - * Buffer after end of mapped extent. - * Find next buffer in the page to map. - */ - mpd->map.m_len = 0; - mpd->map.m_flags = 0; - /* - * FIXME: If dioread_nolock supports - * blocksize < pagesize, we need to make - * sure we add size mapped so far to - * io_end->size as the following call - * can submit the page for IO. - */ - err = mpage_process_page_bufs(mpd, head, - bh, lblk); - pagevec_release(&pvec); - if (err > 0) - err = 0; - return err; - } - if (buffer_delay(bh)) { - clear_buffer_delay(bh); - bh->b_blocknr = pblock++; - } - clear_buffer_unwritten(bh); - } while (lblk++, (bh = bh->b_this_page) != head); - + err = mpage_process_page(mpd, page, &lblk, &pblock, + &map_bh); /* - * FIXME: This is going to break if dioread_nolock - * supports blocksize < pagesize as we will try to - * convert potentially unmapped parts of inode. + * If map_bh is true, means page may require further bh + * mapping, or maybe the page was submitted for IO. + * So we return to call further extent mapping. */ - mpd->io_submit.io_end->size += PAGE_SIZE; + if (err < 0 || map_bh == true) + goto out; /* Page fully mapped - let IO run! */ err = mpage_submit_page(mpd, page); - if (err < 0) { - pagevec_release(&pvec); - return err; - } + if (err < 0) + goto out; } pagevec_release(&pvec); } @@ -2431,6 +2469,9 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) mpd->map.m_len = 0; mpd->map.m_flags = 0; return 0; +out: + pagevec_release(&pvec); + return err; } static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) @@ -2510,9 +2551,10 @@ static int mpage_map_and_submit_extent(handle_t *handle, int err; loff_t disksize; int progress = 0; + ext4_io_end_t *io_end = mpd->io_submit.io_end; + struct ext4_io_end_vec *io_end_vec = ext4_alloc_io_end_vec(io_end); - mpd->io_submit.io_end->offset = - ((loff_t)map->m_lblk) << inode->i_blkbits; + io_end_vec->offset = ((loff_t)map->m_lblk) << inode->i_blkbits; do { err = mpage_map_one_extent(handle, mpd); if (err < 0) { @@ -3613,6 +3655,7 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ssize_t size, void *private) { ext4_io_end_t *io_end = private; + struct ext4_io_end_vec *io_end_vec; /* if not async direct IO just return */ if (!io_end) @@ -3630,8 +3673,9 @@ static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, ext4_clear_io_unwritten_flag(io_end); size = 0; } - io_end->offset = offset; - io_end->size = size; + io_end_vec = ext4_alloc_io_end_vec(io_end); + io_end_vec->offset = offset; + io_end_vec->size = size; ext4_put_io_end(io_end); return 0; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 12ceadef32c5..893913d1c2fe 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -31,18 +31,56 @@ #include "acl.h" static struct kmem_cache *io_end_cachep; +static struct kmem_cache *io_end_vec_cachep; int __init ext4_init_pageio(void) { io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT); if (io_end_cachep == NULL) return -ENOMEM; + + io_end_vec_cachep = KMEM_CACHE(ext4_io_end_vec, 0); + if (io_end_vec_cachep == NULL) { + kmem_cache_destroy(io_end_cachep); + return -ENOMEM; + } return 0; } void ext4_exit_pageio(void) { kmem_cache_destroy(io_end_cachep); + kmem_cache_destroy(io_end_vec_cachep); +} + +struct ext4_io_end_vec *ext4_alloc_io_end_vec(ext4_io_end_t *io_end) +{ + struct ext4_io_end_vec *io_end_vec; + + io_end_vec = kmem_cache_zalloc(io_end_vec_cachep, GFP_NOFS); + if (!io_end_vec) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&io_end_vec->list); + list_add_tail(&io_end_vec->list, &io_end->list_vec); + return io_end_vec; +} + +static void ext4_free_io_end_vec(ext4_io_end_t *io_end) +{ + struct ext4_io_end_vec *io_end_vec, *tmp; + + if (list_empty(&io_end->list_vec)) + return; + list_for_each_entry_safe(io_end_vec, tmp, &io_end->list_vec, list) { + list_del(&io_end_vec->list); + kmem_cache_free(io_end_vec_cachep, io_end_vec); + } +} + +struct ext4_io_end_vec *ext4_last_io_end_vec(ext4_io_end_t *io_end) +{ + BUG_ON(list_empty(&io_end->list_vec)); + return list_last_entry(&io_end->list_vec, struct ext4_io_end_vec, list); } /* @@ -125,6 +163,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) ext4_finish_bio(bio); bio_put(bio); } + ext4_free_io_end_vec(io_end); kmem_cache_free(io_end_cachep, io_end); } @@ -136,29 +175,26 @@ static void ext4_release_io_end(ext4_io_end_t *io_end) * cannot get to ext4_ext_truncate() before all IOs overlapping that range are * completed (happens from ext4_free_ioend()). */ -static int ext4_end_io(ext4_io_end_t *io) +static int ext4_end_io_end(ext4_io_end_t *io_end) { - struct inode *inode = io->inode; - loff_t offset = io->offset; - ssize_t size = io->size; - handle_t *handle = io->handle; + struct inode *inode = io_end->inode; + handle_t *handle = io_end->handle; int ret = 0; - ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," + ext4_debug("ext4_end_io_nolock: io_end 0x%p from inode %lu,list->next 0x%p," "list->prev 0x%p\n", - io, inode->i_ino, io->list.next, io->list.prev); + io_end, inode->i_ino, io_end->list.next, io_end->list.prev); - io->handle = NULL; /* Following call will use up the handle */ - ret = ext4_convert_unwritten_extents(handle, inode, offset, size); + io_end->handle = NULL; /* Following call will use up the handle */ + ret = ext4_convert_unwritten_io_end_vec(handle, io_end); if (ret < 0 && !ext4_forced_shutdown(EXT4_SB(inode->i_sb))) { ext4_msg(inode->i_sb, KERN_EMERG, "failed to convert unwritten extents to written " "extents -- potential data loss! " - "(inode %lu, offset %llu, size %zd, error %d)", - inode->i_ino, offset, size, ret); + "(inode %lu, error %d)", inode->i_ino, ret); } - ext4_clear_io_unwritten_flag(io); - ext4_release_io_end(io); + ext4_clear_io_unwritten_flag(io_end); + ext4_release_io_end(io_end); return ret; } @@ -166,21 +202,21 @@ static void dump_completed_IO(struct inode *inode, struct list_head *head) { #ifdef EXT4FS_DEBUG struct list_head *cur, *before, *after; - ext4_io_end_t *io, *io0, *io1; + ext4_io_end_t *io_end, *io_end0, *io_end1; if (list_empty(head)) return; ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); - list_for_each_entry(io, head, list) { - cur = &io->list; + list_for_each_entry(io_end, head, list) { + cur = &io_end->list; before = cur->prev; - io0 = container_of(before, ext4_io_end_t, list); + io_end0 = container_of(before, ext4_io_end_t, list); after = cur->next; - io1 = container_of(after, ext4_io_end_t, list); + io_end1 = container_of(after, ext4_io_end_t, list); ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", - io, inode->i_ino, io0, io1); + io_end, inode->i_ino, io_end0, io_end1); } #endif } @@ -207,7 +243,7 @@ static void ext4_add_complete_io(ext4_io_end_t *io_end) static int ext4_do_flush_completed_IO(struct inode *inode, struct list_head *head) { - ext4_io_end_t *io; + ext4_io_end_t *io_end; struct list_head unwritten; unsigned long flags; struct ext4_inode_info *ei = EXT4_I(inode); @@ -219,11 +255,11 @@ static int ext4_do_flush_completed_IO(struct inode *inode, spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); while (!list_empty(&unwritten)) { - io = list_entry(unwritten.next, ext4_io_end_t, list); - BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN)); - list_del_init(&io->list); + io_end = list_entry(unwritten.next, ext4_io_end_t, list); + BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); + list_del_init(&io_end->list); - err = ext4_end_io(io); + err = ext4_end_io_end(io_end); if (unlikely(!ret && err)) ret = err; } @@ -242,19 +278,22 @@ void ext4_end_io_rsv_work(struct work_struct *work) ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) { - ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags); - if (io) { - io->inode = inode; - INIT_LIST_HEAD(&io->list); - atomic_set(&io->count, 1); + ext4_io_end_t *io_end = kmem_cache_zalloc(io_end_cachep, flags); + + if (io_end) { + io_end->inode = inode; + INIT_LIST_HEAD(&io_end->list); + INIT_LIST_HEAD(&io_end->list_vec); + atomic_set(&io_end->count, 1); } - return io; + return io_end; } void ext4_put_io_end_defer(ext4_io_end_t *io_end) { if (atomic_dec_and_test(&io_end->count)) { - if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { + if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || + list_empty(&io_end->list_vec)) { ext4_release_io_end(io_end); return; } @@ -268,9 +307,8 @@ int ext4_put_io_end(ext4_io_end_t *io_end) if (atomic_dec_and_test(&io_end->count)) { if (io_end->flag & EXT4_IO_END_UNWRITTEN) { - err = ext4_convert_unwritten_extents(io_end->handle, - io_end->inode, io_end->offset, - io_end->size); + err = ext4_convert_unwritten_io_end_vec(io_end->handle, + io_end); io_end->handle = NULL; ext4_clear_io_unwritten_flag(io_end); } @@ -307,10 +345,8 @@ static void ext4_end_bio(struct bio *bio) struct inode *inode = io_end->inode; ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu " - "(offset %llu size %ld starting block %llu)", + "starting block %llu)", bio->bi_status, inode->i_ino, - (unsigned long long) io_end->offset, - (long) io_end->size, (unsigned long long) bi_sector >> (inode->i_blkbits - 9)); mapping_set_error(inode->i_mapping, diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dd654e53ba3d..7796e2ffc294 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2105,16 +2105,6 @@ static int parse_options(char *options, struct super_block *sb, } } #endif - if (test_opt(sb, DIOREAD_NOLOCK)) { - int blocksize = - BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size); - - if (blocksize < PAGE_SIZE) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "dioread_nolock if block size != PAGE_SIZE"); - return 0; - } - } return 1; } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 132fb92098c7..8e1ff875de43 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -482,10 +482,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (jh->b_committed_data) { struct buffer_head *bh = jh2bh(jh); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); jbd2_free(jh->b_committed_data, bh->b_size); jh->b_committed_data = NULL; - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); } jbd2_journal_refile_buffer(journal, jh); } @@ -918,6 +918,7 @@ restart_loop: transaction_t *cp_transaction; struct buffer_head *bh; int try_to_free = 0; + bool drop_ref; jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); @@ -927,7 +928,7 @@ restart_loop: * done with it. */ get_bh(bh); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction); /* @@ -1022,8 +1023,10 @@ restart_loop: try_to_free = 1; } JBUFFER_TRACE(jh, "refile or unfile buffer"); - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); + drop_ref = __jbd2_journal_refile_buffer(jh); + spin_unlock(&jh->b_state_lock); + if (drop_ref) + jbd2_journal_put_journal_head(jh); if (try_to_free) release_buffer_page(bh); /* Drops bh reference */ else diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 1c58859aa592..f6034ce4a107 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -363,7 +363,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, /* keep subsequent assertions sane */ atomic_set(&new_bh->b_count, 1); - jbd_lock_bh_state(bh_in); + spin_lock(&jh_in->b_state_lock); repeat: /* * If a new transaction has already done a buffer copy-out, then @@ -405,13 +405,13 @@ repeat: if (need_copy_out && !done_copy_out) { char *tmp; - jbd_unlock_bh_state(bh_in); + spin_unlock(&jh_in->b_state_lock); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); if (!tmp) { brelse(new_bh); return -ENOMEM; } - jbd_lock_bh_state(bh_in); + spin_lock(&jh_in->b_state_lock); if (jh_in->b_frozen_data) { jbd2_free(tmp, bh_in->b_size); goto repeat; @@ -464,7 +464,7 @@ repeat: __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); spin_unlock(&journal->j_list_lock); set_buffer_shadow(bh_in); - jbd_unlock_bh_state(bh_in); + spin_unlock(&jh_in->b_state_lock); return do_escape | (done_copy_out << 1); } @@ -2410,6 +2410,8 @@ static struct journal_head *journal_alloc_journal_head(void) ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS | __GFP_NOFAIL); } + if (ret) + spin_lock_init(&ret->b_state_lock); return ret; } @@ -2529,17 +2531,23 @@ static void __journal_remove_journal_head(struct buffer_head *bh) J_ASSERT_BH(bh, buffer_jbd(bh)); J_ASSERT_BH(bh, jh2bh(jh) == bh); BUFFER_TRACE(bh, "remove journal_head"); + + /* Unlink before dropping the lock */ + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); +} + +static void journal_release_journal_head(struct journal_head *jh, size_t b_size) +{ if (jh->b_frozen_data) { printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); - jbd2_free(jh->b_frozen_data, bh->b_size); + jbd2_free(jh->b_frozen_data, b_size); } if (jh->b_committed_data) { printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); - jbd2_free(jh->b_committed_data, bh->b_size); + jbd2_free(jh->b_committed_data, b_size); } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); journal_free_journal_head(jh); } @@ -2557,9 +2565,11 @@ void jbd2_journal_put_journal_head(struct journal_head *jh) if (!jh->b_jcount) { __journal_remove_journal_head(bh); jbd_unlock_bh_journal_head(bh); + journal_release_journal_head(jh, bh->b_size); __brelse(bh); - } else + } else { jbd_unlock_bh_journal_head(bh); + } } /* diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index bee8498d7792..7c11afe60532 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -879,7 +879,7 @@ repeat: start_lock = jiffies; lock_buffer(bh); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); /* If it takes too long to lock the buffer, trace it */ time_lock = jbd2_time_diff(start_lock, jiffies); @@ -929,7 +929,7 @@ repeat: error = -EROFS; if (is_handle_aborted(handle)) { - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); goto out; } error = 0; @@ -993,7 +993,7 @@ repeat: */ if (buffer_shadow(bh)) { JBUFFER_TRACE(jh, "on shadow: sleep"); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); wait_on_bit_io(&bh->b_state, BH_Shadow, TASK_UNINTERRUPTIBLE); goto repeat; } @@ -1014,7 +1014,7 @@ repeat: JBUFFER_TRACE(jh, "generate frozen data"); if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS | __GFP_NOFAIL); goto repeat; @@ -1033,7 +1033,7 @@ attach_next: jh->b_next_transaction = transaction; done: - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); /* * If we are about to journal a buffer, then any revoke pending on it is @@ -1172,7 +1172,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) * that case: the transaction must have deleted the buffer for it to be * reused here. */ - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); J_ASSERT_JH(jh, (jh->b_transaction == transaction || jh->b_transaction == NULL || (jh->b_transaction == journal->j_committing_transaction && @@ -1207,7 +1207,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) jh->b_next_transaction = transaction; spin_unlock(&journal->j_list_lock); } - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); /* * akpm: I added this. ext3_alloc_branch can pick up new indirect @@ -1275,13 +1275,13 @@ repeat: committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS|__GFP_NOFAIL); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); if (!jh->b_committed_data) { /* Copy out the current buffer contents into the * preserved, committed copy. */ JBUFFER_TRACE(jh, "generate b_committed data"); if (!committed_data) { - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); goto repeat; } @@ -1289,7 +1289,7 @@ repeat: committed_data = NULL; memcpy(jh->b_committed_data, bh->b_data, bh->b_size); } - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); out: jbd2_journal_put_journal_head(jh); if (unlikely(committed_data)) @@ -1390,16 +1390,16 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) */ if (jh->b_transaction != transaction && jh->b_next_transaction != transaction) { - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); J_ASSERT_JH(jh, jh->b_transaction == transaction || jh->b_next_transaction == transaction); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); } if (jh->b_modified == 1) { /* If it's in our transaction it must be in BJ_Metadata list. */ if (jh->b_transaction == transaction && jh->b_jlist != BJ_Metadata) { - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); if (jh->b_transaction == transaction && jh->b_jlist != BJ_Metadata) pr_err("JBD2: assertion failure: h_type=%u " @@ -1409,13 +1409,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) jh->b_jlist); J_ASSERT_JH(jh, jh->b_transaction != transaction || jh->b_jlist == BJ_Metadata); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); } goto out; } journal = transaction->t_journal; - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); if (jh->b_modified == 0) { /* @@ -1501,7 +1501,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); spin_unlock(&journal->j_list_lock); out_unlock_bh: - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); out: JBUFFER_TRACE(jh, "exit"); return ret; @@ -1539,18 +1539,20 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) BUFFER_TRACE(bh, "entry"); - jbd_lock_bh_state(bh); + jh = jbd2_journal_grab_journal_head(bh); + if (!jh) { + __bforget(bh); + return 0; + } - if (!buffer_jbd(bh)) - goto not_jbd; - jh = bh2jh(bh); + spin_lock(&jh->b_state_lock); /* Critical error: attempting to delete a bitmap buffer, maybe? * Don't do any jbd operations, and return an error. */ if (!J_EXPECT_JH(jh, !jh->b_committed_data, "inconsistent data on disk")) { err = -EIO; - goto not_jbd; + goto drop; } /* keep track of whether or not this transaction modified us */ @@ -1598,10 +1600,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); } else { __jbd2_journal_unfile_buffer(jh); - if (!buffer_jbd(bh)) { - spin_unlock(&journal->j_list_lock); - goto not_jbd; - } + jbd2_journal_put_journal_head(jh); } spin_unlock(&journal->j_list_lock); } else if (jh->b_transaction) { @@ -1643,7 +1642,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) if (!jh->b_cp_transaction) { JBUFFER_TRACE(jh, "belongs to none transaction"); spin_unlock(&journal->j_list_lock); - goto not_jbd; + goto drop; } /* @@ -1653,7 +1652,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) if (!buffer_dirty(bh)) { __jbd2_journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); - goto not_jbd; + goto drop; } /* @@ -1666,20 +1665,15 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); spin_unlock(&journal->j_list_lock); } - - jbd_unlock_bh_state(bh); - __brelse(bh); drop: + __brelse(bh); + spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); if (drop_reserve) { /* no need to reserve log space for this block -bzzz */ handle->h_buffer_credits++; } return err; - -not_jbd: - jbd_unlock_bh_state(bh); - __bforget(bh); - goto drop; } /** @@ -1878,7 +1872,7 @@ free_and_exit: * * j_list_lock is held. * - * jbd_lock_bh_state(jh2bh(jh)) is held. + * jh->b_state_lock is held. */ static inline void @@ -1902,7 +1896,7 @@ __blist_add_buffer(struct journal_head **list, struct journal_head *jh) * * Called with j_list_lock held, and the journal may not be locked. * - * jbd_lock_bh_state(jh2bh(jh)) is held. + * jh->b_state_lock is held. */ static inline void @@ -1934,7 +1928,7 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) transaction_t *transaction; struct buffer_head *bh = jh2bh(jh); - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + lockdep_assert_held(&jh->b_state_lock); transaction = jh->b_transaction; if (transaction) assert_spin_locked(&transaction->t_journal->j_list_lock); @@ -1971,17 +1965,15 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) } /* - * Remove buffer from all transactions. + * Remove buffer from all transactions. The caller is responsible for dropping + * the jh reference that belonged to the transaction. * * Called with bh_state lock and j_list_lock - * - * jh and bh may be already freed when this function returns. */ static void __jbd2_journal_unfile_buffer(struct journal_head *jh) { __jbd2_journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; - jbd2_journal_put_journal_head(jh); } void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) @@ -1990,18 +1982,19 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) /* Get reference so that buffer cannot be freed before we unlock it */ get_bh(bh); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); spin_lock(&journal->j_list_lock); __jbd2_journal_unfile_buffer(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); + jbd2_journal_put_journal_head(jh); __brelse(bh); } /* * Called from jbd2_journal_try_to_free_buffers(). * - * Called under jbd_lock_bh_state(bh) + * Called under jh->b_state_lock */ static void __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) @@ -2088,10 +2081,10 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal, if (!jh) continue; - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); __journal_try_to_free_buffer(journal, bh); + spin_unlock(&jh->b_state_lock); jbd2_journal_put_journal_head(jh); - jbd_unlock_bh_state(bh); if (buffer_jbd(bh)) goto busy; } while ((bh = bh->b_this_page) != head); @@ -2112,7 +2105,7 @@ busy: * * Called under j_list_lock. * - * Called under jbd_lock_bh_state(bh). + * Called under jh->b_state_lock. */ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) { @@ -2133,6 +2126,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) } else { JBUFFER_TRACE(jh, "on running transaction"); __jbd2_journal_unfile_buffer(jh); + jbd2_journal_put_journal_head(jh); } return may_free; } @@ -2199,18 +2193,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, * holding the page lock. --sct */ - if (!buffer_jbd(bh)) + jh = jbd2_journal_grab_journal_head(bh); + if (!jh) goto zap_buffer_unlocked; /* OK, we have data buffer in journaled mode */ write_lock(&journal->j_state_lock); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); spin_lock(&journal->j_list_lock); - jh = jbd2_journal_grab_journal_head(bh); - if (!jh) - goto zap_buffer_no_jh; - /* * We cannot remove the buffer from checkpoint lists until the * transaction adding inode to orphan list (let's call it T) @@ -2289,10 +2280,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, * for commit and try again. */ if (partial_page) { - jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); write_unlock(&journal->j_state_lock); + jbd2_journal_put_journal_head(jh); return -EBUSY; } /* @@ -2304,10 +2295,10 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, set_buffer_freed(bh); if (journal->j_running_transaction && buffer_jbddirty(bh)) jh->b_next_transaction = journal->j_running_transaction; - jbd2_journal_put_journal_head(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); write_unlock(&journal->j_state_lock); + jbd2_journal_put_journal_head(jh); return 0; } else { /* Good, the buffer belongs to the running transaction. @@ -2331,11 +2322,10 @@ zap_buffer: * here. */ jh->b_modified = 0; - jbd2_journal_put_journal_head(jh); -zap_buffer_no_jh: spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(bh); + spin_unlock(&jh->b_state_lock); write_unlock(&journal->j_state_lock); + jbd2_journal_put_journal_head(jh); zap_buffer_unlocked: clear_buffer_dirty(bh); J_ASSERT_BH(bh, !buffer_jbddirty(bh)); @@ -2422,7 +2412,7 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, int was_dirty = 0; struct buffer_head *bh = jh2bh(jh); - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + lockdep_assert_held(&jh->b_state_lock); assert_spin_locked(&transaction->t_journal->j_list_lock); J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); @@ -2484,11 +2474,11 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, void jbd2_journal_file_buffer(struct journal_head *jh, transaction_t *transaction, int jlist) { - jbd_lock_bh_state(jh2bh(jh)); + spin_lock(&jh->b_state_lock); spin_lock(&transaction->t_journal->j_list_lock); __jbd2_journal_file_buffer(jh, transaction, jlist); spin_unlock(&transaction->t_journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); + spin_unlock(&jh->b_state_lock); } /* @@ -2498,23 +2488,25 @@ void jbd2_journal_file_buffer(struct journal_head *jh, * buffer on that transaction's metadata list. * * Called under j_list_lock - * Called under jbd_lock_bh_state(jh2bh(jh)) + * Called under jh->b_state_lock * - * jh and bh may be already free when this function returns + * When this function returns true, there's no next transaction to refile to + * and the caller has to drop jh reference through + * jbd2_journal_put_journal_head(). */ -void __jbd2_journal_refile_buffer(struct journal_head *jh) +bool __jbd2_journal_refile_buffer(struct journal_head *jh) { int was_dirty, jlist; struct buffer_head *bh = jh2bh(jh); - J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); + lockdep_assert_held(&jh->b_state_lock); if (jh->b_transaction) assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); /* If the buffer is now unused, just drop it. */ if (jh->b_next_transaction == NULL) { __jbd2_journal_unfile_buffer(jh); - return; + return true; } /* @@ -2542,6 +2534,7 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) if (was_dirty) set_buffer_jbddirty(bh); + return false; } /* @@ -2552,16 +2545,15 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh) */ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) { - struct buffer_head *bh = jh2bh(jh); + bool drop; - /* Get reference so that buffer cannot be freed before we unlock it */ - get_bh(bh); - jbd_lock_bh_state(bh); + spin_lock(&jh->b_state_lock); spin_lock(&journal->j_list_lock); - __jbd2_journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); + drop = __jbd2_journal_refile_buffer(jh); + spin_unlock(&jh->b_state_lock); spin_unlock(&journal->j_list_lock); - __brelse(bh); + if (drop) + jbd2_journal_put_journal_head(jh); } /* diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 69c21a3843af..4180c3ef0a68 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1252,6 +1252,7 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, int nr) { struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; + struct journal_head *jh; int ret; if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap)) @@ -1260,13 +1261,14 @@ static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, if (!buffer_jbd(bg_bh)) return 1; - jbd_lock_bh_state(bg_bh); - bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data; + jh = bh2jh(bg_bh); + spin_lock(&jh->b_state_lock); + bg = (struct ocfs2_group_desc *) jh->b_committed_data; if (bg) ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap); else ret = 1; - jbd_unlock_bh_state(bg_bh); + spin_unlock(&jh->b_state_lock); return ret; } @@ -2387,6 +2389,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, int status; unsigned int tmp; struct ocfs2_group_desc *undo_bg = NULL; + struct journal_head *jh; /* The caller got this descriptor from * ocfs2_read_group_descriptor(). Any corruption is a code bug. */ @@ -2405,10 +2408,10 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, goto bail; } + jh = bh2jh(group_bh); if (undo_fn) { - jbd_lock_bh_state(group_bh); - undo_bg = (struct ocfs2_group_desc *) - bh2jh(group_bh)->b_committed_data; + spin_lock(&jh->b_state_lock); + undo_bg = (struct ocfs2_group_desc *) jh->b_committed_data; BUG_ON(!undo_bg); } @@ -2423,7 +2426,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, le16_add_cpu(&bg->bg_free_bits_count, num_bits); if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { if (undo_fn) - jbd_unlock_bh_state(group_bh); + spin_unlock(&jh->b_state_lock); return ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit count %u but claims %u are freed. num_bits %d\n", (unsigned long long)le64_to_cpu(bg->bg_blkno), le16_to_cpu(bg->bg_bits), @@ -2432,7 +2435,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, } if (undo_fn) - jbd_unlock_bh_state(group_bh); + spin_unlock(&jh->b_state_lock); ocfs2_journal_dirty(handle, group_bh); bail: |