btrfs: only unlock the to-be-submitted ranges inside a folio

[SUBPAGE COMPRESSION LIMITS]
Currently inside writepage_delalloc(), if a delalloc range is going to
be submitted asynchronously (inline or compression, the page
dirty/writeback/unlock are all handled in at different time, not at the
submission time), then we return 1 and extent_writepage() will skip the
submission.

This is fine if every sector matches page size, but if a sector is
smaller than page size (aka, subpage case), then it can be very
problematic, for example for the following 64K page:

     0     16K     32K    48K     64K
     |/|   |///////|      |/|
       |                    |
       4K                   52K

Where |/| is the dirty range we need to submit.

In the above case, we need the following different handling for the 3
ranges:

- [0, 4K) needs to be submitted for regular write
  A single sector cannot be compressed.

- [16K, 32K) needs to be submitted for compressed write

- [48K, 52K) needs to be submitted for regular write.

Above, if we try to submit [16K, 32K) for compressed write, we will
return 1 and immediately, and without submitting the remaining
[48K, 52K) range.

Furthermore, since extent_writepage() will exit without unlocking any
sectors, the submitted range [0, 4K) will not have sector unlocked.

That's the reason why for now subpage is only allowed for full page
range.

[ENHANCEMENT]
- Introduce a submission bitmap at btrfs_bio_ctrl::submit_bitmap
  This records which sectors will be submitted by extent_writepage_io().
  This allows us to track which sectors needs to be submitted thus later
  to be properly unlocked.

  For asynchronously submitted range (inline/compression), the
  corresponding bits will be cleared from that bitmap.

- Only return 1 if no sector needs to be submitted in
  writepage_delalloc()

- Only submit sectors marked by submission bitmap inside
  extent_writepage_io()
  So we won't touch the asynchronously submitted part.

- Introduce btrfs_folio_end_writer_lock_bitmap() helper
  This will only unlock the involved sectors specified by @bitmap
  parameter, to avoid touching the range asynchronously submitted.

Please note that, since subpage compression is still limited to page
aligned range, this change is only a preparation for future sector
perfect compression support for subpage.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo 2024-09-02 14:29:06 +09:30 committed by David Sterba
parent 49a9907368
commit bd610c0937
3 changed files with 86 additions and 38 deletions

View File

@ -101,6 +101,13 @@ struct btrfs_bio_ctrl {
blk_opf_t opf;
btrfs_bio_end_io_t end_io_func;
struct writeback_control *wbc;
/*
* The sectors of the page which are going to be submitted by
* extent_writepage_io().
* This is to avoid touching ranges covered by compression/inline.
*/
unsigned long submit_bitmap;
};
static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl)
@ -1106,9 +1113,10 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
*/
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
struct folio *folio,
struct writeback_control *wbc)
struct btrfs_bio_ctrl *bio_ctrl)
{
struct btrfs_fs_info *fs_info = inode_to_fs_info(&inode->vfs_inode);
struct writeback_control *wbc = bio_ctrl->wbc;
const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
const u64 page_start = folio_pos(folio);
const u64 page_end = page_start + folio_size(folio) - 1;
@ -1123,6 +1131,14 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
u64 delalloc_to_write = 0;
int ret = 0;
/* Save the dirty bitmap as our submission bitmap will be a subset of it. */
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
ASSERT(fs_info->sectors_per_page > 1);
btrfs_get_subpage_dirty_bitmap(fs_info, folio, &bio_ctrl->submit_bitmap);
} else {
bio_ctrl->submit_bitmap = 1;
}
/* Lock all (subpage) delalloc ranges inside the folio first. */
while (delalloc_start < page_end) {
delalloc_end = page_end;
@ -1190,22 +1206,18 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
}
/*
* We can hit btrfs_run_delalloc_range() with >0 return value.
*
* This happens when either the IO is already done and folio
* unlocked (inline) or the IO submission and folio unlock would
* be handled as async (compression).
*
* Inline is only possible for regular sectorsize for now.
*
* Compression is possible for both subpage and regular cases,
* but even for subpage compression only happens for page aligned
* range, thus the found delalloc range must go beyond current
* folio.
* We have some ranges that's going to be submitted asynchronously
* (compression or inline). These range have their own control
* on when to unlock the pages. We should not touch them
* anymore, so clear the range from the submission bitmap.
*/
if (ret > 0)
ASSERT(!is_subpage || found_start + found_len >= page_end);
if (ret > 0) {
unsigned int start_bit = (found_start - page_start) >>
fs_info->sectorsize_bits;
unsigned int end_bit = (min(page_end + 1, found_start + found_len) -
page_start) >> fs_info->sectorsize_bits;
bitmap_clear(&bio_ctrl->submit_bitmap, start_bit, end_bit - start_bit);
}
/*
* Above btrfs_run_delalloc_range() may have unlocked the folio,
* thus for the last range, we cannot touch the folio anymore.
@ -1230,10 +1242,10 @@ out:
DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
/*
* If btrfs_run_dealloc_range() already started I/O and unlocked
* the folios, we just need to account for them here.
* If all ranges are submitted asynchronously, we just need to account
* for them here.
*/
if (ret == 1) {
if (bitmap_empty(&bio_ctrl->submit_bitmap, fs_info->sectors_per_page)) {
wbc->nr_to_write -= delalloc_to_write;
return 1;
}
@ -1331,15 +1343,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
unsigned long range_bitmap = 0;
/*
* This is the default value for sectorsize == PAGE_SIZE case.
* We known we need to write the dirty sector (aka the page),
* even if the page is not dirty (we cleared it before entering).
*
* For subpage cases we will get the correct bitmap later.
*/
unsigned long dirty_bitmap = 1;
unsigned int bitmap_size = 1;
bool submitted_io = false;
const u64 folio_start = folio_pos(folio);
u64 cur;
@ -1357,18 +1360,14 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
return 1;
}
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
ASSERT(fs_info->sectors_per_page > 1);
btrfs_get_subpage_dirty_bitmap(fs_info, folio, &dirty_bitmap);
bitmap_size = fs_info->sectors_per_page;
}
for (cur = start; cur < start + len; cur += fs_info->sectorsize)
set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap);
bitmap_and(&dirty_bitmap, &dirty_bitmap, &range_bitmap, bitmap_size);
bitmap_and(&bio_ctrl->submit_bitmap, &bio_ctrl->submit_bitmap, &range_bitmap,
fs_info->sectors_per_page);
bio_ctrl->end_io_func = end_bbio_data_write;
for_each_set_bit(bit, &dirty_bitmap, bitmap_size) {
for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
cur = folio_pos(folio) + (bit << fs_info->sectorsize_bits);
if (cur >= i_size) {
@ -1421,6 +1420,7 @@ out:
static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
{
struct inode *inode = folio->mapping->host;
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
const u64 page_start = folio_pos(folio);
int ret;
size_t pg_offset;
@ -1442,11 +1442,16 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
if (folio->index == end_index)
folio_zero_range(folio, pg_offset, folio_size(folio) - pg_offset);
/*
* Default to unlock the whole folio.
* The proper bitmap can only be initialized until writepage_delalloc().
*/
bio_ctrl->submit_bitmap = (unsigned long)-1;
ret = set_folio_extent_mapped(folio);
if (ret < 0)
goto done;
ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl->wbc);
ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
if (ret == 1)
return 0;
if (ret)
@ -1466,8 +1471,11 @@ done:
mapping_set_error(folio->mapping, ret);
}
btrfs_folio_end_writer_lock(inode_to_fs_info(inode), folio,
page_start, PAGE_SIZE);
/*
* Only unlock ranges that are submitted. As there can be some async
* submitted ranges inside the folio.
*/
btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
ASSERT(ret <= 0);
return ret;
}
@ -2210,6 +2218,11 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
if (pages_dirty && folio != locked_folio)
ASSERT(folio_test_dirty(folio));
/*
* Set the submission bitmap to submit all sectors.
* extent_writepage_io() will do the truncation correctly.
*/
bio_ctrl.submit_bitmap = (unsigned long)-1;
ret = extent_writepage_io(BTRFS_I(inode), folio, cur, cur_len,
&bio_ctrl, i_size);
if (ret == 1)

View File

@ -424,6 +424,39 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
folio_unlock(folio);
}
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, unsigned long bitmap)
{
struct btrfs_subpage *subpage = folio_get_private(folio);
const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
unsigned long flags;
bool last = false;
int cleared = 0;
int bit;
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
folio_unlock(folio);
return;
}
if (atomic_read(&subpage->writers) == 0) {
/* No writers, locked by plain lock_page(). */
folio_unlock(folio);
return;
}
spin_lock_irqsave(&subpage->lock, flags);
for_each_set_bit(bit, &bitmap, fs_info->sectors_per_page) {
if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
cleared++;
}
ASSERT(atomic_read(&subpage->writers) >= cleared);
last = atomic_sub_and_test(cleared, &subpage->writers);
spin_unlock_irqrestore(&subpage->lock, flags);
if (last)
folio_unlock(folio);
}
#define subpage_test_bitmap_all_set(fs_info, subpage, name) \
bitmap_test_range_all_set(subpage->bitmaps, \
fs_info->sectors_per_page * btrfs_bitmap_nr_##name, \

View File

@ -106,6 +106,8 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 start, u32 len);
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
struct folio *folio, unsigned long bitmap);
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
struct folio *folio, u64 search_start,
u64 *found_start_ret, u32 *found_len_ret);