diff options
author | Qu Wenruo <wqu@suse.com> | 2024-09-02 14:29:06 +0930 |
---|---|---|
committer | David Sterba <dsterba@suse.com> | 2024-09-10 16:51:22 +0200 |
commit | bd610c0937aaf03b2835638ada1fab8b0524c61a (patch) | |
tree | 9445683fc1ea1b80aa8bea50b43b48285021ca71 | |
parent | 49a9907368a4633fe19c477159da7a3199c808ee (diff) |
btrfs: only unlock the to-be-submitted ranges inside a folio
[SUBPAGE COMPRESSION LIMITS]
Currently inside writepage_delalloc(), if a delalloc range is going to
be submitted asynchronously (inline or compression, the page
dirty/writeback/unlock are all handled in at different time, not at the
submission time), then we return 1 and extent_writepage() will skip the
submission.
This is fine if every sector matches page size, but if a sector is
smaller than page size (aka, subpage case), then it can be very
problematic, for example for the following 64K page:
0 16K 32K 48K 64K
|/| |///////| |/|
| |
4K 52K
Where |/| is the dirty range we need to submit.
In the above case, we need the following different handling for the 3
ranges:
- [0, 4K) needs to be submitted for regular write
A single sector cannot be compressed.
- [16K, 32K) needs to be submitted for compressed write
- [48K, 52K) needs to be submitted for regular write.
Above, if we try to submit [16K, 32K) for compressed write, we will
return 1 and immediately, and without submitting the remaining
[48K, 52K) range.
Furthermore, since extent_writepage() will exit without unlocking any
sectors, the submitted range [0, 4K) will not have sector unlocked.
That's the reason why for now subpage is only allowed for full page
range.
[ENHANCEMENT]
- Introduce a submission bitmap at btrfs_bio_ctrl::submit_bitmap
This records which sectors will be submitted by extent_writepage_io().
This allows us to track which sectors needs to be submitted thus later
to be properly unlocked.
For asynchronously submitted range (inline/compression), the
corresponding bits will be cleared from that bitmap.
- Only return 1 if no sector needs to be submitted in
writepage_delalloc()
- Only submit sectors marked by submission bitmap inside
extent_writepage_io()
So we won't touch the asynchronously submitted part.
- Introduce btrfs_folio_end_writer_lock_bitmap() helper
This will only unlock the involved sectors specified by @bitmap
parameter, to avoid touching the range asynchronously submitted.
Please note that, since subpage compression is still limited to page
aligned range, this change is only a preparation for future sector
perfect compression support for subpage.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r-- | fs/btrfs/extent_io.c | 89 | ||||
-rw-r--r-- | fs/btrfs/subpage.c | 33 | ||||
-rw-r--r-- | fs/btrfs/subpage.h | 2 |
3 files changed, 86 insertions, 38 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 70be1150c34e..39c9677c47d5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -101,6 +101,13 @@ struct btrfs_bio_ctrl { blk_opf_t opf; btrfs_bio_end_io_t end_io_func; struct writeback_control *wbc; + + /* + * The sectors of the page which are going to be submitted by + * extent_writepage_io(). + * This is to avoid touching ranges covered by compression/inline. + */ + unsigned long submit_bitmap; }; static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) @@ -1106,9 +1113,10 @@ int btrfs_read_folio(struct file *file, struct folio *folio) */ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, struct folio *folio, - struct writeback_control *wbc) + struct btrfs_bio_ctrl *bio_ctrl) { struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode); + struct writeback_control *wbc = bio_ctrl->wbc; const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping); const u64 page_start = folio_pos(folio); const u64 page_end = page_start + folio_size(folio) - 1; @@ -1123,6 +1131,14 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, u64 delalloc_to_write = 0; int ret = 0; + /* Save the dirty bitmap as our submission bitmap will be a subset of it. */ + if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) { + ASSERT(fs_info->sectors_per_page > 1); + btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap); + } else { + bio_ctrl->submit_bitmap = 1; + } + /* Lock all (subpage) delalloc ranges inside the folio first. */ while (delalloc_start < page_end) { delalloc_end = page_end; @@ -1190,22 +1206,18 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode, } /* - * We can hit btrfs_run_delalloc_range() with >0 return value. - * - * This happens when either the IO is already done and folio - * unlocked (inline) or the IO submission and folio unlock would - * be handled as async (compression). - * - * Inline is only possible for regular sectorsize for now. - * - * Compression is possible for both subpage and regular cases, - * but even for subpage compression only happens for page aligned - * range, thus the found delalloc range must go beyond current - * folio. + * We have some ranges that's going to be submitted asynchronously + * (compression or inline). These range have their own control + * on when to unlock the pages. We should not touch them + * anymore, so clear the range from the submission bitmap. */ - if (ret > 0) - ASSERT(!is_subpage || found_start + found_len >= page_end); - + if (ret > 0) { + unsigned int start_bit = (found_start - page_start) >> + fs_info->sectorsize_bits; + unsigned int end_bit = (min(page_end + 1, found_start + found_len) - + page_start) >> fs_info->sectorsize_bits; + bitmap_clear(&bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit); + } /* * Above btrfs_run_delalloc_range() may have unlocked the folio, * thus for the last range, we cannot touch the folio anymore. @@ -1230,10 +1242,10 @@ out: DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE); /* - * If btrfs_run_dealloc_range() already started I/O and unlocked - * the folios, we just need to account for them here. + * If all ranges are submitted asynchronously, we just need to account + * for them here. */ - if (ret == 1) { + if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) { wbc->nr_to_write -= delalloc_to_write; return 1; } @@ -1331,15 +1343,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, { struct btrfs_fs_info *fs_info = inode->root->fs_info; unsigned long range_bitmap = 0; - /* - * This is the default value for sectorsize == PAGE_SIZE case. - * We known we need to write the dirty sector (aka the page), - * even if the page is not dirty (we cleared it before entering). - * - * For subpage cases we will get the correct bitmap later. - */ - unsigned long dirty_bitmap = 1; - unsigned int bitmap_size = 1; bool submitted_io = false; const u64 folio_start = folio_pos(folio); u64 cur; @@ -1357,18 +1360,14 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, return 1; } - if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) { - ASSERT(fs_info->sectors_per_page > 1); - btrfs_get_subpage_dirty_bitmap(fs_info, folio, &dirty_bitmap); - bitmap_size = fs_info->sectors_per_page; - } for (cur = start; cur < start + len; cur += fs_info->sectorsize) set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap); - bitmap_and(&dirty_bitmap, &dirty_bitmap, &range_bitmap, bitmap_size); + bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap, + fs_info->sectors_per_page); bio_ctrl->end_io_func = end_bbio_data_write; - for_each_set_bit(bit, &dirty_bitmap, bitmap_size) { + for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) { cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits); if (cur >= i_size) { @@ -1421,6 +1420,7 @@ out: static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl) { struct inode *inode = folio->mapping->host; + struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); const u64 page_start = folio_pos(folio); int ret; size_t pg_offset; @@ -1442,11 +1442,16 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl if (folio->index == end_index) folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset); + /* + * Default to unlock the whole folio. + * The proper bitmap can only be initialized until writepage_delalloc(). + */ + bio_ctrl->submit_bitmap = (unsigned long)-1; ret = set_folio_extent_mapped(folio); if (ret < 0) goto done; - ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl->wbc); + ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl); if (ret == 1) return 0; if (ret) @@ -1466,8 +1471,11 @@ done: mapping_set_error(folio->mapping, ret); } - btrfs_folio_end_writer_lock(inode_to_fs_info(inode), folio, - page_start, PAGE_SIZE); + /* + * Only unlock ranges that are submitted. As there can be some async + * submitted ranges inside the folio. + */ + btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap); ASSERT(ret <= 0); return ret; } @@ -2210,6 +2218,11 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f if (pages_dirty && folio != locked_folio) ASSERT(folio_test_dirty(folio)); + /* + * Set the submission bitmap to submit all sectors. + * extent_writepage_io() will do the truncation correctly. + */ + bio_ctrl.submit_bitmap = (unsigned long)-1; ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len, &bio_ctrl, i_size); if (ret == 1) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index 83660fa82c32..fe4d719d506b 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -424,6 +424,39 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, folio_unlock(folio); } +void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, + struct folio *folio, unsigned long bitmap) +{ + struct btrfs_subpage *subpage = folio_get_private(folio); + const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked; + unsigned long flags; + bool last = false; + int cleared = 0; + int bit; + + if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) { + folio_unlock(folio); + return; + } + + if (atomic_read(&subpage->writers) == 0) { + /* No writers, locked by plain lock_page(). */ + folio_unlock(folio); + return; + } + + spin_lock_irqsave(&subpage->lock, flags); + for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) { + if (test_and_clear_bit(bit + start_bit, subpage->bitmaps)) + cleared++; + } + ASSERT(atomic_read(&subpage->writers) >= cleared); + last = atomic_sub_and_test(cleared, &subpage->writers); + spin_unlock_irqrestore(&subpage->lock, flags); + if (last) + folio_unlock(folio); +} + #define subpage_test_bitmap_all_set(fs_info, subpage, name) \ bitmap_test_range_all_set(subpage->bitmaps, \ fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \ diff --git a/fs/btrfs/subpage.h b/fs/btrfs/subpage.h index f805261e0999..4b85d91d0e18 100644 --- a/fs/btrfs/subpage.h +++ b/fs/btrfs/subpage.h @@ -106,6 +106,8 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len); void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 start, u32 len); +void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info, + struct folio *folio, unsigned long bitmap); bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info, struct folio *folio, u64 search_start, u64 *found_start_ret, u32 *found_len_ret); |