btrfs: use bios instead of buffer_heads from super block writeout
Similar to the superblock read path, change the write path to using bios and pages instead of buffer_heads. This allows us to skip over the buffer_head code, for writing the superblock to disk. This is based on a patch originally authored by Nikolay Borisov. Co-developed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Nikolay Borisov <nborisov@suse.com> Reviewed-by: Nikolay Borisov <nborisov@suse.com> Reviewed-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
8f32380d3f
commit
314b6dd0ee
@ -7,7 +7,6 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/radix-tree.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/slab.h>
|
||||
@ -3395,25 +3394,34 @@ fail:
|
||||
}
|
||||
ALLOW_ERROR_INJECTION(open_ctree, ERRNO);
|
||||
|
||||
static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
|
||||
static void btrfs_end_super_write(struct bio *bio)
|
||||
{
|
||||
if (uptodate) {
|
||||
set_buffer_uptodate(bh);
|
||||
} else {
|
||||
struct btrfs_device *device = (struct btrfs_device *)
|
||||
bh->b_private;
|
||||
struct btrfs_device *device = bio->bi_private;
|
||||
struct bio_vec *bvec;
|
||||
struct bvec_iter_all iter_all;
|
||||
struct page *page;
|
||||
|
||||
btrfs_warn_rl_in_rcu(device->fs_info,
|
||||
"lost page write due to IO error on %s",
|
||||
rcu_str_deref(device->name));
|
||||
/* note, we don't set_buffer_write_io_error because we have
|
||||
* our own ways of dealing with the IO errors
|
||||
*/
|
||||
clear_buffer_uptodate(bh);
|
||||
btrfs_dev_stat_inc_and_print(device, BTRFS_DEV_STAT_WRITE_ERRS);
|
||||
bio_for_each_segment_all(bvec, bio, iter_all) {
|
||||
page = bvec->bv_page;
|
||||
|
||||
if (bio->bi_status) {
|
||||
btrfs_warn_rl_in_rcu(device->fs_info,
|
||||
"lost page write due to IO error on %s (%d)",
|
||||
rcu_str_deref(device->name),
|
||||
blk_status_to_errno(bio->bi_status));
|
||||
ClearPageUptodate(page);
|
||||
SetPageError(page);
|
||||
btrfs_dev_stat_inc_and_print(device,
|
||||
BTRFS_DEV_STAT_WRITE_ERRS);
|
||||
} else {
|
||||
SetPageUptodate(page);
|
||||
}
|
||||
|
||||
put_page(page);
|
||||
unlock_page(page);
|
||||
}
|
||||
unlock_buffer(bh);
|
||||
put_bh(bh);
|
||||
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
@ -3473,25 +3481,23 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
|
||||
|
||||
/*
|
||||
* Write superblock @sb to the @device. Do not wait for completion, all the
|
||||
* buffer heads we write are pinned.
|
||||
* pages we use for writing are locked.
|
||||
*
|
||||
* Write @max_mirrors copies of the superblock, where 0 means default that fit
|
||||
* the expected device size at commit time. Note that max_mirrors must be
|
||||
* same for write and wait phases.
|
||||
*
|
||||
* Return number of errors when buffer head is not found or submission fails.
|
||||
* Return number of errors when page is not found or submission fails.
|
||||
*/
|
||||
static int write_dev_supers(struct btrfs_device *device,
|
||||
struct btrfs_super_block *sb, int max_mirrors)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = device->fs_info;
|
||||
struct address_space *mapping = device->bdev->bd_inode->i_mapping;
|
||||
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
|
||||
struct buffer_head *bh;
|
||||
int i;
|
||||
int ret;
|
||||
int errors = 0;
|
||||
u64 bytenr;
|
||||
int op_flags;
|
||||
|
||||
if (max_mirrors == 0)
|
||||
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
|
||||
@ -3499,6 +3505,10 @@ static int write_dev_supers(struct btrfs_device *device,
|
||||
shash->tfm = fs_info->csum_shash;
|
||||
|
||||
for (i = 0; i < max_mirrors; i++) {
|
||||
struct page *page;
|
||||
struct bio *bio;
|
||||
struct btrfs_super_block *disk_super;
|
||||
|
||||
bytenr = btrfs_sb_offset(i);
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
|
||||
device->commit_total_bytes)
|
||||
@ -3511,37 +3521,45 @@ static int write_dev_supers(struct btrfs_device *device,
|
||||
BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
|
||||
crypto_shash_final(shash, sb->csum);
|
||||
|
||||
/* One reference for us, and we leave it for the caller */
|
||||
bh = __getblk(device->bdev, bytenr / BTRFS_BDEV_BLOCKSIZE,
|
||||
BTRFS_SUPER_INFO_SIZE);
|
||||
if (!bh) {
|
||||
page = find_or_create_page(mapping, bytenr >> PAGE_SHIFT,
|
||||
GFP_NOFS);
|
||||
if (!page) {
|
||||
btrfs_err(device->fs_info,
|
||||
"couldn't get super buffer head for bytenr %llu",
|
||||
"couldn't get super block page for bytenr %llu",
|
||||
bytenr);
|
||||
errors++;
|
||||
continue;
|
||||
}
|
||||
|
||||
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
|
||||
/* Bump the refcount for wait_dev_supers() */
|
||||
get_page(page);
|
||||
|
||||
/* one reference for submit_bh */
|
||||
get_bh(bh);
|
||||
|
||||
set_buffer_uptodate(bh);
|
||||
lock_buffer(bh);
|
||||
bh->b_end_io = btrfs_end_buffer_write_sync;
|
||||
bh->b_private = device;
|
||||
disk_super = page_address(page);
|
||||
memcpy(disk_super, sb, BTRFS_SUPER_INFO_SIZE);
|
||||
|
||||
/*
|
||||
* we fua the first super. The others we allow
|
||||
* to go down lazy.
|
||||
* Directly use bios here instead of relying on the page cache
|
||||
* to do I/O, so we don't lose the ability to do integrity
|
||||
* checking.
|
||||
*/
|
||||
op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
|
||||
bio = bio_alloc(GFP_NOFS, 1);
|
||||
bio_set_dev(bio, device->bdev);
|
||||
bio->bi_iter.bi_sector = bytenr >> SECTOR_SHIFT;
|
||||
bio->bi_private = device;
|
||||
bio->bi_end_io = btrfs_end_super_write;
|
||||
__bio_add_page(bio, page, BTRFS_SUPER_INFO_SIZE,
|
||||
offset_in_page(bytenr));
|
||||
|
||||
/*
|
||||
* We FUA only the first super block. The others we allow to
|
||||
* go down lazy and there's a short window where the on-disk
|
||||
* copies might still contain the older version.
|
||||
*/
|
||||
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO;
|
||||
if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
|
||||
op_flags |= REQ_FUA;
|
||||
ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
|
||||
if (ret)
|
||||
errors++;
|
||||
bio->bi_opf |= REQ_FUA;
|
||||
|
||||
btrfsic_submit_bio(bio);
|
||||
}
|
||||
return errors < i ? 0 : -1;
|
||||
}
|
||||
@ -3550,12 +3568,11 @@ static int write_dev_supers(struct btrfs_device *device,
|
||||
* Wait for write completion of superblocks done by write_dev_supers,
|
||||
* @max_mirrors same for write and wait phases.
|
||||
*
|
||||
* Return number of errors when buffer head is not found or not marked up to
|
||||
* Return number of errors when page is not found or not marked up to
|
||||
* date.
|
||||
*/
|
||||
static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
|
||||
{
|
||||
struct buffer_head *bh;
|
||||
int i;
|
||||
int errors = 0;
|
||||
bool primary_failed = false;
|
||||
@ -3565,32 +3582,34 @@ static int wait_dev_supers(struct btrfs_device *device, int max_mirrors)
|
||||
max_mirrors = BTRFS_SUPER_MIRROR_MAX;
|
||||
|
||||
for (i = 0; i < max_mirrors; i++) {
|
||||
struct page *page;
|
||||
|
||||
bytenr = btrfs_sb_offset(i);
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >=
|
||||
device->commit_total_bytes)
|
||||
break;
|
||||
|
||||
bh = __find_get_block(device->bdev,
|
||||
bytenr / BTRFS_BDEV_BLOCKSIZE,
|
||||
BTRFS_SUPER_INFO_SIZE);
|
||||
if (!bh) {
|
||||
page = find_get_page(device->bdev->bd_inode->i_mapping,
|
||||
bytenr >> PAGE_SHIFT);
|
||||
if (!page) {
|
||||
errors++;
|
||||
if (i == 0)
|
||||
primary_failed = true;
|
||||
continue;
|
||||
}
|
||||
wait_on_buffer(bh);
|
||||
if (!buffer_uptodate(bh)) {
|
||||
/* Page is submitted locked and unlocked once the IO completes */
|
||||
wait_on_page_locked(page);
|
||||
if (PageError(page)) {
|
||||
errors++;
|
||||
if (i == 0)
|
||||
primary_failed = true;
|
||||
}
|
||||
|
||||
/* drop our reference */
|
||||
brelse(bh);
|
||||
/* Drop our reference */
|
||||
put_page(page);
|
||||
|
||||
/* drop the reference from the writing run */
|
||||
brelse(bh);
|
||||
/* Drop the reference from the writing run */
|
||||
put_page(page);
|
||||
}
|
||||
|
||||
/* log error, force error return */
|
||||
|
Loading…
Reference in New Issue
Block a user