btrfs: avoid allocating and running pointless delayed extent operations

We always allocate a delayed extent op structure when allocating a tree
block (except for log trees), but most of the time we don't need it as
we only need to set the BTRFS_BLOCK_FLAG_FULL_BACKREF if we're dealing
with a relocation tree and we only need to set the key of a tree block
in a btrfs_tree_block_info structure if we are not using skinny metadata
(feature enabled by default since btrfs-progs 3.18 and available as of
kernel 3.10).

In these cases, where we don't need neither to update flags nor to set
the key, we only use the delayed extent op structure to set the tree
block's level. This is a waste of memory and besides that, the memory
allocation can fail and can add additional latency.

Instead of using a delayed extent op structure to store the level of
the tree block, use the delayed ref head to store it. This doesn't
change the size of neither structure and helps us avoid allocating
delayed extent ops structures when using the skinny metadata feature
and there's no relocation going on. This also gets rid of a BUG_ON().

For example, for a fs_mark run, with 5 iterations, 8 threads and 100K
files per iteration, before this patch there were 118109 allocations
of delayed extent op structures and after it there were none.

Reviewed-by: Boris Burkov <boris@bur.io>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Filipe Manana 2024-06-20 19:51:32 +01:00 committed by David Sterba
parent 33336c1805
commit eba1469f8f
3 changed files with 39 additions and 39 deletions

View File

@ -819,6 +819,12 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
/* If not metadata set an impossible level to help debugging. */
if (generic_ref->type == BTRFS_REF_METADATA)
head_ref->level = generic_ref->tree_ref.level;
else
head_ref->level = U8_MAX;
if (qrecord) {
if (generic_ref->ref_root && reserved) {
qrecord->data_rsv = reserved;
@ -1072,7 +1078,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
}
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 bytenr, u64 num_bytes, u8 level,
struct btrfs_delayed_extent_op *extent_op)
{
struct btrfs_delayed_ref_head *head_ref;
@ -1082,6 +1088,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
.action = BTRFS_UPDATE_DELAYED_HEAD,
.bytenr = bytenr,
.num_bytes = num_bytes,
.tree_ref.level = level,
};
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);

View File

@ -108,7 +108,6 @@ struct btrfs_delayed_ref_node {
struct btrfs_delayed_extent_op {
struct btrfs_disk_key key;
u8 level;
bool update_key;
bool update_flags;
u64 flags_to_set;
@ -172,6 +171,9 @@ struct btrfs_delayed_ref_head {
*/
u64 reserved_bytes;
/* Tree block level, for metadata only. */
u8 level;
/*
* when a new extent is allocated, it is just reserved in memory
* The actual extent isn't inserted into the extent allocation tree
@ -355,7 +357,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_ref *generic_ref,
u64 reserved);
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes,
u64 bytenr, u64 num_bytes, u8 level,
struct btrfs_delayed_extent_op *extent_op);
void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,

View File

@ -200,19 +200,8 @@ search_again:
goto search_again;
}
spin_lock(&head->lock);
if (head->extent_op && head->extent_op->update_flags) {
if (head->extent_op && head->extent_op->update_flags)
extent_flags |= head->extent_op->flags_to_set;
} else if (unlikely(num_refs == 0)) {
spin_unlock(&head->lock);
mutex_unlock(&head->mutex);
spin_unlock(&delayed_refs->lock);
ret = -EUCLEAN;
btrfs_err(fs_info,
"unexpected zero reference count for extent %llu (%s)",
bytenr, metadata ? "metadata" : "data");
btrfs_abort_transaction(trans, ret);
goto out_free;
}
num_refs += head->ref_mod;
spin_unlock(&head->lock);
@ -1632,7 +1621,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
if (metadata) {
key.type = BTRFS_METADATA_ITEM_KEY;
key.offset = extent_op->level;
key.offset = head->level;
} else {
key.type = BTRFS_EXTENT_ITEM_KEY;
key.offset = head->num_bytes;
@ -1667,7 +1656,7 @@ again:
ret = -EUCLEAN;
btrfs_err(fs_info,
"missing extent item for extent %llu num_bytes %llu level %d",
head->bytenr, head->num_bytes, extent_op->level);
head->bytenr, head->num_bytes, head->level);
goto out;
}
}
@ -1726,7 +1715,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
.generation = trans->transid,
};
BUG_ON(!extent_op || !extent_op->update_flags);
ret = alloc_reserved_tree_block(trans, node, extent_op);
if (!ret)
btrfs_record_squota_delta(fs_info, &delta);
@ -2233,7 +2221,6 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, u64 flags)
{
struct btrfs_delayed_extent_op *extent_op;
int level = btrfs_header_level(eb);
int ret;
extent_op = btrfs_alloc_delayed_extent_op();
@ -2243,9 +2230,9 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
extent_op->flags_to_set = flags;
extent_op->update_flags = true;
extent_op->update_key = false;
extent_op->level = level;
ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len,
btrfs_header_level(eb), extent_op);
if (ret)
btrfs_free_delayed_extent_op(extent_op);
return ret;
@ -4866,7 +4853,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_path *path;
struct extent_buffer *leaf;
u32 size = sizeof(*extent_item) + sizeof(*iref);
u64 flags = extent_op->flags_to_set;
const u64 flags = (extent_op ? extent_op->flags_to_set : 0);
/* The owner of a tree block is the level. */
int level = btrfs_delayed_ref_owner(node);
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@ -5123,7 +5110,6 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
struct btrfs_key ins;
struct btrfs_block_rsv *block_rsv;
struct extent_buffer *buf;
struct btrfs_delayed_extent_op *extent_op;
u64 flags = 0;
int ret;
u32 blocksize = fs_info->nodesize;
@ -5166,6 +5152,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
BUG_ON(parent > 0);
if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_ref generic_ref = {
.action = BTRFS_ADD_DELAYED_EXTENT,
.bytenr = ins.objectid,
@ -5174,30 +5161,34 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
.owning_root = owning_root,
.ref_root = root_objectid,
};
extent_op = btrfs_alloc_delayed_extent_op();
if (!extent_op) {
ret = -ENOMEM;
goto out_free_buf;
if (!skinny_metadata || flags != 0) {
extent_op = btrfs_alloc_delayed_extent_op();
if (!extent_op) {
ret = -ENOMEM;
goto out_free_buf;
}
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
extent_op->update_key = (skinny_metadata ? false : true);
extent_op->update_flags = (flags != 0);
} else {
extent_op = NULL;
}
if (key)
memcpy(&extent_op->key, key, sizeof(extent_op->key));
else
memset(&extent_op->key, 0, sizeof(extent_op->key));
extent_op->flags_to_set = flags;
extent_op->update_key = skinny_metadata ? false : true;
extent_op->update_flags = true;
extent_op->level = level;
btrfs_init_tree_ref(&generic_ref, level, btrfs_root_id(root), false);
btrfs_ref_tree_mod(fs_info, &generic_ref);
ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op);
if (ret)
goto out_free_delayed;
if (ret) {
btrfs_free_delayed_extent_op(extent_op);
goto out_free_buf;
}
}
return buf;
out_free_delayed:
btrfs_free_delayed_extent_op(extent_op);
out_free_buf:
btrfs_tree_unlock(buf);
free_extent_buffer(buf);