btrfs: convert the io_failure_tree to a plain rb_tree

We still have this oddity of stashing the io_failure_record in the
extent state for the io_failure_tree, which is leftover from when we
used to stuff private pointers in extent_io_trees.

However this doesn't make a lot of sense for the io failure records, we
can simply use a normal rb_tree for this.  This will allow us to further
simplify the extent_io_tree code by removing the io_failure_rec pointer
from the extent state.

Convert the io_failure_tree to an rb tree + spinlock in the inode, and
then use our rb tree simple helpers to insert and find failed records.
This greatly cleans up this code and makes it easier to separate out the
extent_io_tree code.

Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Josef Bacik 2022-09-09 17:53:16 -04:00 committed by David Sterba
parent a206174805
commit 87c11705cc
7 changed files with 96 additions and 120 deletions

View File

@ -94,7 +94,8 @@ struct btrfs_inode {
/* special utility tree used to record which mirrors have already been
* tried when checksums fail for a given block
*/
struct extent_io_tree io_failure_tree;
struct rb_root io_failure_tree;
spinlock_t io_failure_lock;
/*
* Keep track of where the inode has extent items mapped in order to

View File

@ -56,7 +56,6 @@ enum {
IO_TREE_FS_EXCLUDED_EXTENTS,
IO_TREE_BTREE_INODE_IO,
IO_TREE_INODE_IO,
IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
@ -89,8 +88,6 @@ struct extent_state {
refcount_t refs;
u32 state;
struct io_failure_record *failrec;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif

View File

@ -326,7 +326,6 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
if (!state)
return state;
state->state = 0;
state->failrec = NULL;
RB_CLEAR_NODE(&state->rb_node);
btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
refcount_set(&state->refs, 1);
@ -2159,64 +2158,29 @@ out:
return total_bytes;
}
/*
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
*/
static int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec)
static int insert_failrec(struct btrfs_inode *inode,
struct io_failure_record *failrec)
{
struct rb_node *node;
struct extent_state *state;
int ret = 0;
struct rb_node *exist;
spin_lock(&tree->lock);
/*
* this search will find all the extents that end after
* our range starts.
*/
node = tree_search(tree, start);
if (!node) {
ret = -ENOENT;
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
if (state->start != start) {
ret = -ENOENT;
goto out;
}
state->failrec = failrec;
out:
spin_unlock(&tree->lock);
return ret;
spin_lock(&inode->io_failure_lock);
exist = rb_simple_insert(&inode->io_failure_tree, failrec->bytenr,
&failrec->rb_node);
spin_unlock(&inode->io_failure_lock);
return (exist == NULL) ? 0 : -EEXIST;
}
static struct io_failure_record *get_state_failrec(struct extent_io_tree *tree,
u64 start)
static struct io_failure_record *get_failrec(struct btrfs_inode *inode, u64 start)
{
struct rb_node *node;
struct extent_state *state;
struct io_failure_record *failrec;
struct io_failure_record *failrec = ERR_PTR(-ENOENT);
spin_lock(&tree->lock);
/*
* this search will find all the extents that end after
* our range starts.
*/
node = tree_search(tree, start);
if (!node) {
failrec = ERR_PTR(-ENOENT);
goto out;
}
state = rb_entry(node, struct extent_state, rb_node);
if (state->start != start) {
failrec = ERR_PTR(-ENOENT);
goto out;
}
failrec = state->failrec;
out:
spin_unlock(&tree->lock);
spin_lock(&inode->io_failure_lock);
node = rb_simple_search(&inode->io_failure_tree, start);
if (node)
failrec = rb_entry(node, struct io_failure_record, rb_node);
spin_unlock(&inode->io_failure_lock);
return failrec;
}
@ -2276,28 +2240,20 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
return bitset;
}
static int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
static int free_io_failure(struct btrfs_inode *inode,
struct io_failure_record *rec)
{
int ret;
int err = 0;
set_state_failrec(failure_tree, rec->start, NULL);
ret = clear_extent_bits(failure_tree, rec->start,
rec->start + rec->len - 1,
EXTENT_LOCKED | EXTENT_DIRTY);
if (ret)
err = ret;
spin_lock(&inode->io_failure_lock);
rb_erase(&rec->rb_node, &inode->io_failure_tree);
spin_unlock(&inode->io_failure_lock);
ret = clear_extent_bits(io_tree, rec->start,
rec->start + rec->len - 1,
ret = clear_extent_bits(&inode->io_tree, rec->bytenr,
rec->bytenr + rec->len - 1,
EXTENT_DAMAGED);
if (ret && !err)
err = ret;
kfree(rec);
return err;
return ret;
}
/*
@ -2436,22 +2392,13 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
struct page *page, unsigned int pg_offset)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_io_tree *failure_tree = &inode->io_failure_tree;
struct extent_io_tree *io_tree = &inode->io_tree;
u64 ino = btrfs_ino(inode);
u64 private;
struct io_failure_record *failrec;
struct extent_state *state;
int mirror;
int ret;
private = 0;
ret = count_range_bits(failure_tree, &private, (u64)-1, 1,
EXTENT_DIRTY, 0);
if (!ret)
return 0;
failrec = get_state_failrec(failure_tree, start);
failrec = get_failrec(inode, start);
if (IS_ERR(failrec))
return 0;
@ -2462,12 +2409,12 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
spin_lock(&io_tree->lock);
state = find_first_extent_bit_state(io_tree,
failrec->start,
failrec->bytenr,
EXTENT_LOCKED);
spin_unlock(&io_tree->lock);
if (!state || state->start > failrec->start ||
state->end < failrec->start + failrec->len - 1)
if (!state || state->start > failrec->bytenr ||
state->end < failrec->bytenr + failrec->len - 1)
goto out;
mirror = failrec->this_mirror;
@ -2478,7 +2425,7 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
} while (mirror != failrec->failed_mirror);
out:
free_io_failure(failure_tree, io_tree, failrec);
free_io_failure(inode, failrec);
return 0;
}
@ -2490,30 +2437,26 @@ out:
*/
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end)
{
struct extent_io_tree *failure_tree = &inode->io_failure_tree;
struct io_failure_record *failrec;
struct extent_state *state, *next;
struct rb_node *node, *next;
if (RB_EMPTY_ROOT(&failure_tree->state))
if (RB_EMPTY_ROOT(&inode->io_failure_tree))
return;
spin_lock(&failure_tree->lock);
state = find_first_extent_bit_state(failure_tree, start, EXTENT_DIRTY);
while (state) {
if (state->start > end)
spin_lock(&inode->io_failure_lock);
node = rb_simple_search_first(&inode->io_failure_tree, start);
while (node) {
failrec = rb_entry(node, struct io_failure_record, rb_node);
if (failrec->bytenr > end)
break;
ASSERT(state->end <= end);
next = next_state(state);
failrec = state->failrec;
free_extent_state(state);
next = rb_next(node);
rb_erase(&failrec->rb_node, &inode->io_failure_tree);
kfree(failrec);
state = next;
node = next;
}
spin_unlock(&failure_tree->lock);
spin_unlock(&inode->io_failure_lock);
}
static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode,
@ -2523,16 +2466,15 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
u64 start = bbio->file_offset + bio_offset;
struct io_failure_record *failrec;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
const u32 sectorsize = fs_info->sectorsize;
int ret;
failrec = get_state_failrec(failure_tree, start);
failrec = get_failrec(BTRFS_I(inode), start);
if (!IS_ERR(failrec)) {
btrfs_debug(fs_info,
"Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu",
failrec->logical, failrec->start, failrec->len);
failrec->logical, failrec->bytenr, failrec->len);
/*
* when data can be on disk more than twice, add to failrec here
* (e.g. with a list for failed_mirror) to make
@ -2547,7 +2489,8 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
if (!failrec)
return ERR_PTR(-ENOMEM);
failrec->start = start;
RB_CLEAR_NODE(&failrec->rb_node);
failrec->bytenr = start;
failrec->len = sectorsize;
failrec->failed_mirror = bbio->mirror_num;
failrec->this_mirror = bbio->mirror_num;
@ -2572,17 +2515,17 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode
}
/* Set the bits in the private failure tree */
ret = set_extent_bits(failure_tree, start, start + sectorsize - 1,
EXTENT_LOCKED | EXTENT_DIRTY);
if (ret >= 0) {
ret = set_state_failrec(failure_tree, start, failrec);
/* Set the bits in the inode's tree */
ret = set_extent_bits(tree, start, start + sectorsize - 1,
EXTENT_DAMAGED);
} else if (ret < 0) {
ret = insert_failrec(BTRFS_I(inode), failrec);
if (ret) {
kfree(failrec);
return ERR_PTR(ret);
}
ret = set_extent_bits(tree, start, start + sectorsize - 1,
EXTENT_DAMAGED);
if (ret) {
free_io_failure(BTRFS_I(inode), failrec);
return ERR_PTR(ret);
}
return failrec;
}
@ -2594,8 +2537,6 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
u64 start = failed_bbio->file_offset + bio_offset;
struct io_failure_record *failrec;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct bio *failed_bio = &failed_bbio->bio;
const int icsum = bio_offset >> fs_info->sectorsize_bits;
struct bio *repair_bio;
@ -2624,7 +2565,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
btrfs_debug(fs_info,
"failed to repair num_copies %d this_mirror %d failed_mirror %d",
failrec->num_copies, failrec->this_mirror, failrec->failed_mirror);
free_io_failure(failure_tree, tree, failrec);
free_io_failure(BTRFS_I(inode), failrec);
return -EIO;
}

View File

@ -254,8 +254,12 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
* bio end_io callback is called to indicate things have failed.
*/
struct io_failure_record {
/* Use rb_simple_node for search/insert */
struct {
struct rb_node rb_node;
u64 bytenr;
};
struct page *page;
u64 start;
u64 len;
u64 logical;
int this_mirror;

View File

@ -8790,6 +8790,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_log_commit = 0;
spin_lock_init(&ei->lock);
spin_lock_init(&ei->io_failure_lock);
ei->outstanding_extents = 0;
if (sb->s_magic != BTRFS_TEST_MAGIC)
btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv,
@ -8806,12 +8807,10 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
inode = &ei->vfs_inode;
extent_map_tree_init(&ei->extent_tree);
extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
extent_io_tree_init(fs_info, &ei->io_failure_tree,
IO_TREE_INODE_IO_FAILURE, inode);
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT, inode);
ei->io_failure_tree = RB_ROOT;
ei->io_tree.track_uptodate = true;
ei->io_failure_tree.track_uptodate = true;
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);

View File

@ -88,6 +88,41 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
return NULL;
}
/*
* Search @root from an entry that starts or comes after @bytenr.
*
* @root: the root to search.
* @bytenr: bytenr to search from.
*
* Return the rb_node that start at or after @bytenr. If there is no entry at
* or after @bytner return NULL.
*/
static inline struct rb_node *rb_simple_search_first(struct rb_root *root,
u64 bytenr)
{
struct rb_node *node = root->rb_node, *ret = NULL;
struct rb_simple_node *entry, *ret_entry = NULL;
while (node) {
entry = rb_entry(node, struct rb_simple_node, rb_node);
if (bytenr < entry->bytenr) {
if (!ret || entry->bytenr < ret_entry->bytenr) {
ret = node;
ret_entry = entry;
}
node = node->rb_left;
} else if (bytenr > entry->bytenr) {
node = node->rb_right;
} else {
return node;
}
}
return ret;
}
static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
struct rb_node *node)
{

View File

@ -84,7 +84,6 @@ struct raid56_bio_trace_info;
EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \
EM( IO_TREE_BTREE_INODE_IO, "BTREE_INODE_IO") \
EM( IO_TREE_INODE_IO, "INODE_IO") \
EM( IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE") \
EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \
EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \
EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \