diff options
Diffstat (limited to 'fs/buffer.c')
-rw-r--r-- | fs/buffer.c | 138 |
1 files changed, 82 insertions, 56 deletions
diff --git a/fs/buffer.c b/fs/buffer.c index b144fc367b8b..09bb80c479d8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2274,51 +2274,64 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, struct inode *inode = page->mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocksize = 1 << blkbits; - struct buffer_head map_bh; - struct buffer_head *read_bh[MAX_BUF_PER_PAGE]; + struct buffer_head *head, *bh; unsigned block_in_page; - unsigned block_start; + unsigned block_start, block_end; sector_t block_in_file; char *kaddr; int nr_reads = 0; - int i; int ret = 0; int is_mapped_to_disk = 1; + if (page_has_buffers(page)) + return block_prepare_write(page, from, to, get_block); + if (PageMappedToDisk(page)) return 0; + /* + * Allocate buffers so that we can keep track of state, and potentially + * attach them to the page if an error occurs. In the common case of + * no error, they will just be freed again without ever being attached + * to the page (which is all OK, because we're under the page lock). + * + * Be careful: the buffer linked list is a NULL terminated one, rather + * than the circular one we're used to. + */ + head = alloc_page_buffers(page, blocksize, 0); + if (!head) + return -ENOMEM; + block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits); - map_bh.b_page = page; /* * We loop across all blocks in the page, whether or not they are * part of the affected region. This is so we can discover if the * page is fully mapped-to-disk. */ - for (block_start = 0, block_in_page = 0; + for (block_start = 0, block_in_page = 0, bh = head; block_start < PAGE_CACHE_SIZE; - block_in_page++, block_start += blocksize) { - unsigned block_end = block_start + blocksize; + block_in_page++, block_start += blocksize, bh = bh->b_this_page) { int create; - map_bh.b_state = 0; + block_end = block_start + blocksize; + bh->b_state = 0; create = 1; if (block_start >= to) create = 0; - map_bh.b_size = blocksize; ret = get_block(inode, block_in_file + block_in_page, - &map_bh, create); + bh, create); if (ret) goto failed; - if (!buffer_mapped(&map_bh)) + if (!buffer_mapped(bh)) is_mapped_to_disk = 0; - if (buffer_new(&map_bh)) - unmap_underlying_metadata(map_bh.b_bdev, - map_bh.b_blocknr); - if (PageUptodate(page)) + if (buffer_new(bh)) + unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); + if (PageUptodate(page)) { + set_buffer_uptodate(bh); continue; - if (buffer_new(&map_bh) || !buffer_mapped(&map_bh)) { + } + if (buffer_new(bh) || !buffer_mapped(bh)) { kaddr = kmap_atomic(page, KM_USER0); if (block_start < from) memset(kaddr+block_start, 0, from-block_start); @@ -2328,49 +2341,26 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, kunmap_atomic(kaddr, KM_USER0); continue; } - if (buffer_uptodate(&map_bh)) + if (buffer_uptodate(bh)) continue; /* reiserfs does this */ if (block_start < from || block_end > to) { - struct buffer_head *bh = alloc_buffer_head(GFP_NOFS); - - if (!bh) { - ret = -ENOMEM; - goto failed; - } - bh->b_state = map_bh.b_state; - atomic_set(&bh->b_count, 0); - bh->b_this_page = NULL; - bh->b_page = page; - bh->b_blocknr = map_bh.b_blocknr; - bh->b_size = blocksize; - bh->b_data = (char *)(long)block_start; - bh->b_bdev = map_bh.b_bdev; - bh->b_private = NULL; - read_bh[nr_reads++] = bh; + lock_buffer(bh); + bh->b_end_io = end_buffer_read_nobh; + submit_bh(READ, bh); + nr_reads++; } } if (nr_reads) { - struct buffer_head *bh; - /* * The page is locked, so these buffers are protected from * any VM or truncate activity. Hence we don't need to care * for the buffer_head refcounts. */ - for (i = 0; i < nr_reads; i++) { - bh = read_bh[i]; - lock_buffer(bh); - bh->b_end_io = end_buffer_read_nobh; - submit_bh(READ, bh); - } - for (i = 0; i < nr_reads; i++) { - bh = read_bh[i]; + for (bh = head; bh; bh = bh->b_this_page) { wait_on_buffer(bh); if (!buffer_uptodate(bh)) ret = -EIO; - free_buffer_head(bh); - read_bh[i] = NULL; } if (ret) goto failed; @@ -2379,21 +2369,54 @@ int nobh_prepare_write(struct page *page, unsigned from, unsigned to, if (is_mapped_to_disk) SetPageMappedToDisk(page); + do { + bh = head; + head = head->b_this_page; + free_buffer_head(bh); + } while (head); + return 0; failed: - for (i = 0; i < nr_reads; i++) { - if (read_bh[i]) - free_buffer_head(read_bh[i]); - } - /* - * Error recovery is pretty slack. Clear the page and mark it dirty - * so we'll later zero out any blocks which _were_ allocated. + * Error recovery is a bit difficult. We need to zero out blocks that + * were newly allocated, and dirty them to ensure they get written out. + * Buffers need to be attached to the page at this point, otherwise + * the handling of potential IO errors during writeout would be hard + * (could try doing synchronous writeout, but what if that fails too?) */ - zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); - SetPageUptodate(page); - set_page_dirty(page); + spin_lock(&page->mapping->private_lock); + bh = head; + block_start = 0; + do { + if (PageUptodate(page)) + set_buffer_uptodate(bh); + if (PageDirty(page)) + set_buffer_dirty(bh); + + block_end = block_start+blocksize; + if (block_end <= from) + goto next; + if (block_start >= to) + goto next; + + if (buffer_new(bh)) { + clear_buffer_new(bh); + if (!buffer_uptodate(bh)) { + zero_user_page(page, block_start, bh->b_size, KM_USER0); + set_buffer_uptodate(bh); + } + mark_buffer_dirty(bh); + } +next: + block_start = block_end; + if (!bh->b_this_page) + bh->b_this_page = head; + bh = bh->b_this_page; + } while (bh != head); + attach_page_buffers(page, head); + spin_unlock(&page->mapping->private_lock); + return ret; } EXPORT_SYMBOL(nobh_prepare_write); @@ -2408,6 +2431,9 @@ int nobh_commit_write(struct file *file, struct page *page, struct inode *inode = page->mapping->host; loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; + if (page_has_buffers(page)) + return generic_commit_write(file, page, from, to); + SetPageUptodate(page); set_page_dirty(page); if (pos > inode->i_size) { |