btrfs: unify buffered and direct I/O read repair
Currently, direct I/O has its own versions of bio_readpage_error() and btrfs_check_repairable() (dio_read_error() and btrfs_check_dio_repairable(), respectively). The main difference is that the direct I/O version doesn't do read validation. The rework of direct I/O repair makes it possible to do validation, so we can get rid of btrfs_check_dio_repairable() and combine bio_readpage_error() and dio_read_error() into a new helper, btrfs_submit_read_repair(). Reviewed-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: Omar Sandoval <osandov@fb.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
committed by
David Sterba
parent
5c047a699a
commit
77d5d68931
@@ -2601,46 +2601,10 @@ static bool btrfs_check_repairable(struct inode *inode, bool needs_validation,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
|
|
||||||
struct io_failure_record *failrec,
|
|
||||||
struct page *page, int pg_offset, int icsum,
|
|
||||||
bio_end_io_t *endio_func, void *data)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
|
||||||
struct bio *bio;
|
|
||||||
struct btrfs_io_bio *btrfs_failed_bio;
|
|
||||||
struct btrfs_io_bio *btrfs_bio;
|
|
||||||
|
|
||||||
bio = btrfs_io_bio_alloc(1);
|
|
||||||
bio->bi_end_io = endio_func;
|
|
||||||
bio->bi_iter.bi_sector = failrec->logical >> 9;
|
|
||||||
bio->bi_iter.bi_size = 0;
|
|
||||||
bio->bi_private = data;
|
|
||||||
|
|
||||||
btrfs_failed_bio = btrfs_io_bio(failed_bio);
|
|
||||||
if (btrfs_failed_bio->csum) {
|
|
||||||
u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
|
||||||
|
|
||||||
btrfs_bio = btrfs_io_bio(bio);
|
|
||||||
btrfs_bio->csum = btrfs_bio->csum_inline;
|
|
||||||
icsum *= csum_size;
|
|
||||||
memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + icsum,
|
|
||||||
csum_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
bio_add_page(bio, page, failrec->len, pg_offset);
|
|
||||||
btrfs_io_bio(bio)->logical = failrec->start;
|
|
||||||
btrfs_io_bio(bio)->iter = bio->bi_iter;
|
|
||||||
|
|
||||||
return bio;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
|
static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
|
||||||
{
|
{
|
||||||
struct bio_vec *bvec;
|
|
||||||
u64 len = 0;
|
u64 len = 0;
|
||||||
int i;
|
const u32 blocksize = inode->i_sb->s_blocksize;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If bi_status is BLK_STS_OK, then this was a checksum error, not an
|
* If bi_status is BLK_STS_OK, then this was a checksum error, not an
|
||||||
@@ -2653,72 +2617,99 @@ static bool btrfs_io_needs_validation(struct inode *inode, struct bio *bio)
|
|||||||
/*
|
/*
|
||||||
* We need to validate each sector individually if the failed I/O was
|
* We need to validate each sector individually if the failed I/O was
|
||||||
* for multiple sectors.
|
* for multiple sectors.
|
||||||
|
*
|
||||||
|
* There are a few possible bios that can end up here:
|
||||||
|
* 1. A buffered read bio, which is not cloned.
|
||||||
|
* 2. A direct I/O read bio, which is cloned.
|
||||||
|
* 3. A (buffered or direct) repair bio, which is not cloned.
|
||||||
|
*
|
||||||
|
* For cloned bios (case 2), we can get the size from
|
||||||
|
* btrfs_io_bio->iter; for non-cloned bios (cases 1 and 3), we can get
|
||||||
|
* it from the bvecs.
|
||||||
*/
|
*/
|
||||||
bio_for_each_bvec_all(bvec, bio, i) {
|
if (bio_flagged(bio, BIO_CLONED)) {
|
||||||
len += bvec->bv_len;
|
if (btrfs_io_bio(bio)->iter.bi_size > blocksize)
|
||||||
if (len > inode->i_sb->s_blocksize)
|
|
||||||
return true;
|
return true;
|
||||||
|
} else {
|
||||||
|
struct bio_vec *bvec;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
bio_for_each_bvec_all(bvec, bio, i) {
|
||||||
|
len += bvec->bv_len;
|
||||||
|
if (len > blocksize)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
blk_status_t btrfs_submit_read_repair(struct inode *inode,
|
||||||
* This is a generic handler for readpage errors. If other copies exist, read
|
struct bio *failed_bio, u64 phy_offset,
|
||||||
* those and write back good data to the failed position. Does not investigate
|
struct page *page, unsigned int pgoff,
|
||||||
* in remapping the failed extent elsewhere, hoping the device will be smart
|
u64 start, u64 end, int failed_mirror,
|
||||||
* enough to do this as needed
|
submit_bio_hook_t *submit_bio_hook)
|
||||||
*/
|
|
||||||
static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
|
|
||||||
struct page *page, u64 start, u64 end,
|
|
||||||
int failed_mirror)
|
|
||||||
{
|
{
|
||||||
struct io_failure_record *failrec;
|
struct io_failure_record *failrec;
|
||||||
struct inode *inode = page->mapping->host;
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||||
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
||||||
|
struct btrfs_io_bio *failed_io_bio = btrfs_io_bio(failed_bio);
|
||||||
|
const int icsum = phy_offset >> inode->i_sb->s_blocksize_bits;
|
||||||
bool need_validation;
|
bool need_validation;
|
||||||
struct bio *bio;
|
struct bio *repair_bio;
|
||||||
int read_mode = 0;
|
struct btrfs_io_bio *repair_io_bio;
|
||||||
blk_status_t status;
|
blk_status_t status;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
btrfs_debug(fs_info,
|
||||||
|
"repair read error: read error at %llu", start);
|
||||||
|
|
||||||
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
|
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
|
||||||
|
|
||||||
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
|
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return errno_to_blk_status(ret);
|
||||||
|
|
||||||
need_validation = btrfs_io_needs_validation(inode, failed_bio);
|
need_validation = btrfs_io_needs_validation(inode, failed_bio);
|
||||||
|
|
||||||
if (!btrfs_check_repairable(inode, need_validation, failrec,
|
if (!btrfs_check_repairable(inode, need_validation, failrec,
|
||||||
failed_mirror)) {
|
failed_mirror)) {
|
||||||
free_io_failure(failure_tree, tree, failrec);
|
free_io_failure(failure_tree, tree, failrec);
|
||||||
return -EIO;
|
return BLK_STS_IOERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
repair_bio = btrfs_io_bio_alloc(1);
|
||||||
|
repair_io_bio = btrfs_io_bio(repair_bio);
|
||||||
|
repair_bio->bi_opf = REQ_OP_READ;
|
||||||
if (need_validation)
|
if (need_validation)
|
||||||
read_mode |= REQ_FAILFAST_DEV;
|
repair_bio->bi_opf |= REQ_FAILFAST_DEV;
|
||||||
|
repair_bio->bi_end_io = failed_bio->bi_end_io;
|
||||||
|
repair_bio->bi_iter.bi_sector = failrec->logical >> 9;
|
||||||
|
repair_bio->bi_private = failed_bio->bi_private;
|
||||||
|
|
||||||
phy_offset >>= inode->i_sb->s_blocksize_bits;
|
if (failed_io_bio->csum) {
|
||||||
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
|
const u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
|
||||||
start - page_offset(page),
|
|
||||||
(int)phy_offset, failed_bio->bi_end_io,
|
repair_io_bio->csum = repair_io_bio->csum_inline;
|
||||||
NULL);
|
memcpy(repair_io_bio->csum,
|
||||||
bio->bi_opf = REQ_OP_READ | read_mode;
|
failed_io_bio->csum + csum_size * icsum, csum_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
bio_add_page(repair_bio, page, failrec->len, pgoff);
|
||||||
|
repair_io_bio->logical = failrec->start;
|
||||||
|
repair_io_bio->iter = repair_bio->bi_iter;
|
||||||
|
|
||||||
btrfs_debug(btrfs_sb(inode->i_sb),
|
btrfs_debug(btrfs_sb(inode->i_sb),
|
||||||
"Repair Read Error: submitting new read[%#x] to this_mirror=%d, in_validation=%d",
|
"repair read error: submitting new read to mirror %d, in_validation=%d",
|
||||||
read_mode, failrec->this_mirror, failrec->in_validation);
|
failrec->this_mirror, failrec->in_validation);
|
||||||
|
|
||||||
status = tree->ops->submit_bio_hook(tree->private_data, bio, failrec->this_mirror,
|
status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
|
||||||
failrec->bio_flags);
|
failrec->bio_flags);
|
||||||
if (status) {
|
if (status) {
|
||||||
free_io_failure(failure_tree, tree, failrec);
|
free_io_failure(failure_tree, tree, failrec);
|
||||||
bio_put(bio);
|
bio_put(repair_bio);
|
||||||
ret = blk_status_to_errno(status);
|
|
||||||
}
|
}
|
||||||
|
return status;
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* lots and lots of room for performance fixes in the end_bio funcs */
|
/* lots and lots of room for performance fixes in the end_bio funcs */
|
||||||
@@ -2890,9 +2881,10 @@ static void end_bio_extent_readpage(struct bio *bio)
|
|||||||
* If it can't handle the error it will return -EIO and
|
* If it can't handle the error it will return -EIO and
|
||||||
* we remain responsible for that page.
|
* we remain responsible for that page.
|
||||||
*/
|
*/
|
||||||
ret = bio_readpage_error(bio, offset, page, start, end,
|
if (!btrfs_submit_read_repair(inode, bio, offset, page,
|
||||||
mirror);
|
start - page_offset(page),
|
||||||
if (ret == 0) {
|
start, end, mirror,
|
||||||
|
tree->ops->submit_bio_hook)) {
|
||||||
uptodate = !bio->bi_status;
|
uptodate = !bio->bi_status;
|
||||||
offset += len;
|
offset += len;
|
||||||
continue;
|
continue;
|
||||||
|
|||||||
@@ -66,6 +66,10 @@ struct btrfs_io_bio;
|
|||||||
struct io_failure_record;
|
struct io_failure_record;
|
||||||
struct extent_io_tree;
|
struct extent_io_tree;
|
||||||
|
|
||||||
|
typedef blk_status_t (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
|
||||||
|
int mirror_num,
|
||||||
|
unsigned long bio_flags);
|
||||||
|
|
||||||
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
|
typedef blk_status_t (extent_submit_bio_start_t)(void *private_data,
|
||||||
struct bio *bio, u64 bio_offset);
|
struct bio *bio, u64 bio_offset);
|
||||||
|
|
||||||
@@ -74,8 +78,7 @@ struct extent_io_ops {
|
|||||||
* The following callbacks must be always defined, the function
|
* The following callbacks must be always defined, the function
|
||||||
* pointer will be called unconditionally.
|
* pointer will be called unconditionally.
|
||||||
*/
|
*/
|
||||||
blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio,
|
submit_bio_hook_t *submit_bio_hook;
|
||||||
int mirror_num, unsigned long bio_flags);
|
|
||||||
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
|
int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
|
||||||
struct page *page, u64 start, u64 end,
|
struct page *page, u64 start, u64 end,
|
||||||
int mirror);
|
int mirror);
|
||||||
@@ -312,10 +315,12 @@ struct io_failure_record {
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct bio *btrfs_create_repair_bio(struct inode *inode, struct bio *failed_bio,
|
blk_status_t btrfs_submit_read_repair(struct inode *inode,
|
||||||
struct io_failure_record *failrec,
|
struct bio *failed_bio, u64 phy_offset,
|
||||||
struct page *page, int pg_offset, int icsum,
|
struct page *page, unsigned int pgoff,
|
||||||
bio_end_io_t *endio_func, void *data);
|
u64 start, u64 end, int failed_mirror,
|
||||||
|
submit_bio_hook_t *submit_bio_hook);
|
||||||
|
|
||||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||||
bool find_lock_delalloc_range(struct inode *inode,
|
bool find_lock_delalloc_range(struct inode *inode,
|
||||||
struct page *locked_page, u64 *start,
|
struct page *locked_page, u64 *start,
|
||||||
|
|||||||
105
fs/btrfs/inode.c
105
fs/btrfs/inode.c
@@ -7379,10 +7379,11 @@ static void btrfs_dio_private_put(struct btrfs_dio_private *dip)
|
|||||||
kfree(dip);
|
kfree(dip);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
|
static blk_status_t submit_dio_repair_bio(struct inode *inode, struct bio *bio,
|
||||||
struct bio *bio,
|
int mirror_num,
|
||||||
int mirror_num)
|
unsigned long bio_flags)
|
||||||
{
|
{
|
||||||
|
struct btrfs_dio_private *dip = bio->bi_private;
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||||
blk_status_t ret;
|
blk_status_t ret;
|
||||||
|
|
||||||
@@ -7392,96 +7393,11 @@ static inline blk_status_t submit_dio_repair_bio(struct inode *inode,
|
|||||||
if (ret)
|
if (ret)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int btrfs_check_dio_repairable(struct inode *inode,
|
|
||||||
struct bio *failed_bio,
|
|
||||||
struct io_failure_record *failrec,
|
|
||||||
int failed_mirror)
|
|
||||||
{
|
|
||||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
|
||||||
int num_copies;
|
|
||||||
|
|
||||||
num_copies = btrfs_num_copies(fs_info, failrec->logical, failrec->len);
|
|
||||||
if (num_copies == 1) {
|
|
||||||
/*
|
|
||||||
* we only have a single copy of the data, so don't bother with
|
|
||||||
* all the retry and error correction code that follows. no
|
|
||||||
* matter what the error is, it is very likely to persist.
|
|
||||||
*/
|
|
||||||
btrfs_debug(fs_info,
|
|
||||||
"Check DIO Repairable: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d",
|
|
||||||
num_copies, failrec->this_mirror, failed_mirror);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
failrec->failed_mirror = failed_mirror;
|
|
||||||
failrec->this_mirror++;
|
|
||||||
if (failrec->this_mirror == failed_mirror)
|
|
||||||
failrec->this_mirror++;
|
|
||||||
|
|
||||||
if (failrec->this_mirror > num_copies) {
|
|
||||||
btrfs_debug(fs_info,
|
|
||||||
"Check DIO Repairable: (fail) num_copies=%d, next_mirror %d, failed_mirror %d",
|
|
||||||
num_copies, failrec->this_mirror, failed_mirror);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static blk_status_t dio_read_error(struct inode *inode, struct bio *failed_bio,
|
|
||||||
struct page *page, unsigned int pgoff,
|
|
||||||
u64 start, u64 end, int failed_mirror)
|
|
||||||
{
|
|
||||||
struct btrfs_dio_private *dip = failed_bio->bi_private;
|
|
||||||
struct io_failure_record *failrec;
|
|
||||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
|
||||||
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
|
|
||||||
struct bio *bio;
|
|
||||||
int isector;
|
|
||||||
unsigned int read_mode = 0;
|
|
||||||
int ret;
|
|
||||||
blk_status_t status;
|
|
||||||
|
|
||||||
BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE);
|
|
||||||
|
|
||||||
ret = btrfs_get_io_failure_record(inode, start, end, &failrec);
|
|
||||||
if (ret)
|
|
||||||
return errno_to_blk_status(ret);
|
|
||||||
|
|
||||||
ret = btrfs_check_dio_repairable(inode, failed_bio, failrec,
|
|
||||||
failed_mirror);
|
|
||||||
if (!ret) {
|
|
||||||
free_io_failure(failure_tree, io_tree, failrec);
|
|
||||||
return BLK_STS_IOERR;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (btrfs_io_bio(failed_bio)->iter.bi_size > inode->i_sb->s_blocksize)
|
|
||||||
read_mode |= REQ_FAILFAST_DEV;
|
|
||||||
|
|
||||||
isector = start - btrfs_io_bio(failed_bio)->logical;
|
|
||||||
isector >>= inode->i_sb->s_blocksize_bits;
|
|
||||||
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page, pgoff,
|
|
||||||
isector, failed_bio->bi_end_io, dip);
|
|
||||||
bio->bi_opf = REQ_OP_READ | read_mode;
|
|
||||||
|
|
||||||
btrfs_debug(BTRFS_I(inode)->root->fs_info,
|
|
||||||
"repair DIO read error: submitting new dio read[%#x] to this_mirror=%d, in_validation=%d",
|
|
||||||
read_mode, failrec->this_mirror, failrec->in_validation);
|
|
||||||
|
|
||||||
refcount_inc(&dip->refs);
|
refcount_inc(&dip->refs);
|
||||||
status = submit_dio_repair_bio(inode, bio, failrec->this_mirror);
|
ret = btrfs_map_bio(fs_info, bio, mirror_num);
|
||||||
if (status) {
|
if (ret)
|
||||||
free_io_failure(failure_tree, io_tree, failrec);
|
|
||||||
bio_put(bio);
|
|
||||||
refcount_dec(&dip->refs);
|
refcount_dec(&dip->refs);
|
||||||
}
|
return ret;
|
||||||
|
|
||||||
return status;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
|
static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
|
||||||
@@ -7517,11 +7433,14 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
|
|||||||
} else {
|
} else {
|
||||||
blk_status_t status;
|
blk_status_t status;
|
||||||
|
|
||||||
status = dio_read_error(inode, &io_bio->bio,
|
status = btrfs_submit_read_repair(inode,
|
||||||
|
&io_bio->bio,
|
||||||
|
start - io_bio->logical,
|
||||||
bvec.bv_page, pgoff,
|
bvec.bv_page, pgoff,
|
||||||
start,
|
start,
|
||||||
start + sectorsize - 1,
|
start + sectorsize - 1,
|
||||||
io_bio->mirror_num);
|
io_bio->mirror_num,
|
||||||
|
submit_dio_repair_bio);
|
||||||
if (status)
|
if (status)
|
||||||
err = status;
|
err = status;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user