for-5.7-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl6CDIMACgkQxWXV+ddt WDuJ9g/+NTVt+OXAX3G4VLAIR6EjugREAmiHPlojM7scKsmkBuH9BN35+2EPj+yS rSmdL01nOH3gyqe+RzAc1EEiujH/9uDpkNf4zE1tGtj9m5Useqj8ZNmiG/BN0PmR OJZkVb8DXUHEXIFscHjQJPP60kFZoqIovS7qZbDh4992+p98lTiUUEI6SPanVYeR QysXxmafty03hQMFW93ohFZemwAELVVI44nHxxcmOHT5BbIIopXrkInkkchB9I6b l+tIJx1gjL6k0D3v/TTqRuD+wGCE8InJgtiuEOf0WkHp2YXUlSDaKAnF/j9Le4oe eOgc50LtA3YNGmZ2m5vTeRjBeU9qUPWjJWJ2urp87oIrxX5x7B5Hsjxdnn28P0yZ dl/dt9HxeCKFgaRrMZYETYq9VBt0IMxiOIG9w5fukB9qnC6Dd05dXyQB0slg0+l1 chn5p0FtMS74cvXB32jW7N0fwxWNt6KI4zBvomabJGYZQd6+dyDO8l8Od86vvve/ w7KgRy7CFBjc9JOCyLTvS8eEhu/qAVc07phSblpdNnyzPFjWWTdZySON/qQYvUCf cGDiq+5+1d1+kWuEjtYNzvxon2AaAfg7UBZm5FrjN735ojTQXqm2vi3rrurcU5AZ ItmiU6DMre5EGZ+hfWgSPXDkeqx/JYbtDuUwWbNg6svTXaKKnmI= =1m9l -----END PGP SIGNATURE----- Merge tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "A number of core changes that make things work better in general, code is simpler and cleaner. Core changes: - per-inode file extent tree, for in memory tracking of contiguous extent ranges to make sure i_size adjustments are accurate - tree root structures are protected by reference counts, replacing SRCU that did not cover some cases - leak detector for tree root structures - per-transaction pinned extent tracking - buffer heads are replaced by bios for super block access - speedup of extent back reference resolution, on an example test scenario the runtime of send went down from a hour to minutes - factor out locking scheme used for subvolume writer and NOCOW exclusion, abstracted as DREW lock, double reader-writer exclusion (allow either readers or writers) - cleanup and abstract extent allocation policies, preparation for zoned device support - make reflink/clone_range work on inline extents - add more cancellation point for relocation, improves long response from 'balance cancel' - add page migration callback for data pages - switch to guid for uuids, with additional cleanups of the interface - make ranged full fsyncs more efficient - removal of obsolete ioctl flag BTRFS_SUBVOL_CREATE_ASYNC - remove b-tree readahead from delayed refs paths, avoiding seek and read unnecessary blocks Features: - v2 of ioctl to delete subvolumes, allowing to delete by id and more future extensions Fixes: - fix qgroup rescan worker that could block umount - fix crash during unmount due to race with delayed inode workers - fix dellaloc flushing logic that could create unnecessary chunks under heavy load - fix missing file extent item for hole after ranged fsync - several fixes in relocation error handling Other: - more documentation of relocation, device replace, space reservations - many random cleanups" * tag 'for-5.7-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (210 commits) btrfs: fix missing semaphore unlock in btrfs_sync_file btrfs: use nofs allocations for running delayed items btrfs: sysfs: Use scnprintf() instead of snprintf() btrfs: do not resolve backrefs for roots that are being deleted btrfs: track reloc roots based on their commit root bytenr btrfs: restart relocate_tree_blocks properly btrfs: reloc: reorder reservation before root selection btrfs: do not readahead in build_backref_tree btrfs: do not use readahead for running delayed refs btrfs: Remove async_transid from btrfs_mksubvol/create_subvol/create_snapshot btrfs: Remove transid argument from btrfs_ioctl_snap_create_transid btrfs: Remove BTRFS_SUBVOL_CREATE_ASYNC support btrfs: kill the subvol_srcu btrfs: make btrfs_cleanup_fs_roots use the radix tree lock btrfs: don't take an extra root ref at allocation time btrfs: hold a ref on the root on the dead roots list btrfs: make inodes hold a ref on their roots btrfs: move the root freeing stuff into btrfs_put_root btrfs: move ino_cache_inode dropping out of btrfs_free_fs_root btrfs: make the extent buffer leak check per fs info ...
This commit is contained in:
commit
15c981d16d
@ -11,7 +11,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
||||
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||
block-rsv.o delalloc-space.o block-group.o discard.o
|
||||
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
|
@ -395,3 +395,11 @@ void btrfs_set_work_high_priority(struct btrfs_work *work)
|
||||
{
|
||||
set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
|
||||
}
|
||||
|
||||
void btrfs_flush_workqueue(struct btrfs_workqueue *wq)
|
||||
{
|
||||
if (wq->high)
|
||||
flush_workqueue(wq->high->normal_wq);
|
||||
|
||||
flush_workqueue(wq->normal->normal_wq);
|
||||
}
|
||||
|
@ -44,5 +44,6 @@ void btrfs_set_work_high_priority(struct btrfs_work *work);
|
||||
struct btrfs_fs_info * __pure btrfs_work_owner(const struct btrfs_work *work);
|
||||
struct btrfs_fs_info * __pure btrfs_workqueue_owner(const struct __btrfs_workqueue *wq);
|
||||
bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq);
|
||||
void btrfs_flush_workqueue(struct btrfs_workqueue *wq);
|
||||
|
||||
#endif
|
||||
|
@ -347,33 +347,10 @@ static int add_prelim_ref(const struct btrfs_fs_info *fs_info,
|
||||
return -ENOMEM;
|
||||
|
||||
ref->root_id = root_id;
|
||||
if (key) {
|
||||
if (key)
|
||||
ref->key_for_search = *key;
|
||||
/*
|
||||
* We can often find data backrefs with an offset that is too
|
||||
* large (>= LLONG_MAX, maximum allowed file offset) due to
|
||||
* underflows when subtracting a file's offset with the data
|
||||
* offset of its corresponding extent data item. This can
|
||||
* happen for example in the clone ioctl.
|
||||
* So if we detect such case we set the search key's offset to
|
||||
* zero to make sure we will find the matching file extent item
|
||||
* at add_all_parents(), otherwise we will miss it because the
|
||||
* offset taken form the backref is much larger then the offset
|
||||
* of the file extent item. This can make us scan a very large
|
||||
* number of file extent items, but at least it will not make
|
||||
* us miss any.
|
||||
* This is an ugly workaround for a behaviour that should have
|
||||
* never existed, but it does and a fix for the clone ioctl
|
||||
* would touch a lot of places, cause backwards incompatibility
|
||||
* and would not fix the problem for extents cloned with older
|
||||
* kernels.
|
||||
*/
|
||||
if (ref->key_for_search.type == BTRFS_EXTENT_DATA_KEY &&
|
||||
ref->key_for_search.offset >= LLONG_MAX)
|
||||
ref->key_for_search.offset = 0;
|
||||
} else {
|
||||
else
|
||||
memset(&ref->key_for_search, 0, sizeof(ref->key_for_search));
|
||||
}
|
||||
|
||||
ref->inode_list = NULL;
|
||||
ref->level = level;
|
||||
@ -409,10 +386,36 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info,
|
||||
wanted_disk_byte, count, sc, gfp_mask);
|
||||
}
|
||||
|
||||
static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr)
|
||||
{
|
||||
struct rb_node **p = &preftrees->direct.root.rb_root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct prelim_ref *ref = NULL;
|
||||
struct prelim_ref target = {0};
|
||||
int result;
|
||||
|
||||
target.parent = bytenr;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
ref = rb_entry(parent, struct prelim_ref, rbnode);
|
||||
result = prelim_ref_compare(ref, &target);
|
||||
|
||||
if (result < 0)
|
||||
p = &(*p)->rb_left;
|
||||
else if (result > 0)
|
||||
p = &(*p)->rb_right;
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
struct ulist *parents, struct prelim_ref *ref,
|
||||
struct ulist *parents,
|
||||
struct preftrees *preftrees, struct prelim_ref *ref,
|
||||
int level, u64 time_seq, const u64 *extent_item_pos,
|
||||
u64 total_refs, bool ignore_offset)
|
||||
bool ignore_offset)
|
||||
{
|
||||
int ret = 0;
|
||||
int slot;
|
||||
@ -424,6 +427,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
u64 disk_byte;
|
||||
u64 wanted_disk_byte = ref->wanted_disk_byte;
|
||||
u64 count = 0;
|
||||
u64 data_offset;
|
||||
|
||||
if (level != 0) {
|
||||
eb = path->nodes[level];
|
||||
@ -434,18 +438,26 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
}
|
||||
|
||||
/*
|
||||
* We normally enter this function with the path already pointing to
|
||||
* the first item to check. But sometimes, we may enter it with
|
||||
* slot==nritems. In that case, go to the next leaf before we continue.
|
||||
* 1. We normally enter this function with the path already pointing to
|
||||
* the first item to check. But sometimes, we may enter it with
|
||||
* slot == nritems.
|
||||
* 2. We are searching for normal backref but bytenr of this leaf
|
||||
* matches shared data backref
|
||||
* 3. The leaf owner is not equal to the root we are searching
|
||||
*
|
||||
* For these cases, go to the next leaf before we continue.
|
||||
*/
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
eb = path->nodes[0];
|
||||
if (path->slots[0] >= btrfs_header_nritems(eb) ||
|
||||
is_shared_data_backref(preftrees, eb->start) ||
|
||||
ref->root_id != btrfs_header_owner(eb)) {
|
||||
if (time_seq == SEQ_LAST)
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
else
|
||||
ret = btrfs_next_old_leaf(root, path, time_seq);
|
||||
}
|
||||
|
||||
while (!ret && count < total_refs) {
|
||||
while (!ret && count < ref->count) {
|
||||
eb = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
|
||||
@ -455,13 +467,31 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path,
|
||||
key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We are searching for normal backref but bytenr of this leaf
|
||||
* matches shared data backref, OR
|
||||
* the leaf owner is not equal to the root we are searching for
|
||||
*/
|
||||
if (slot == 0 &&
|
||||
(is_shared_data_backref(preftrees, eb->start) ||
|
||||
ref->root_id != btrfs_header_owner(eb))) {
|
||||
if (time_seq == SEQ_LAST)
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
else
|
||||
ret = btrfs_next_old_leaf(root, path, time_seq);
|
||||
continue;
|
||||
}
|
||||
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
|
||||
disk_byte = btrfs_file_extent_disk_bytenr(eb, fi);
|
||||
data_offset = btrfs_file_extent_offset(eb, fi);
|
||||
|
||||
if (disk_byte == wanted_disk_byte) {
|
||||
eie = NULL;
|
||||
old = NULL;
|
||||
count++;
|
||||
if (ref->key_for_search.offset == key.offset - data_offset)
|
||||
count++;
|
||||
else
|
||||
goto next;
|
||||
if (extent_item_pos) {
|
||||
ret = check_extent_in_eb(&key, eb, fi,
|
||||
*extent_item_pos,
|
||||
@ -502,9 +532,9 @@ next:
|
||||
*/
|
||||
static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path, u64 time_seq,
|
||||
struct preftrees *preftrees,
|
||||
struct prelim_ref *ref, struct ulist *parents,
|
||||
const u64 *extent_item_pos, u64 total_refs,
|
||||
bool ignore_offset)
|
||||
const u64 *extent_item_pos, bool ignore_offset)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_key root_key;
|
||||
@ -512,23 +542,25 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
int ret = 0;
|
||||
int root_level;
|
||||
int level = ref->level;
|
||||
int index;
|
||||
struct btrfs_key search_key = ref->key_for_search;
|
||||
|
||||
root_key.objectid = ref->root_id;
|
||||
root_key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
root_key.offset = (u64)-1;
|
||||
|
||||
index = srcu_read_lock(&fs_info->subvol_srcu);
|
||||
|
||||
root = btrfs_get_fs_root(fs_info, &root_key, false);
|
||||
if (IS_ERR(root)) {
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = PTR_ERR(root);
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (!path->search_commit_root &&
|
||||
test_bit(BTRFS_ROOT_DELETING, &root->state)) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (btrfs_is_testing(fs_info)) {
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
@ -540,21 +572,36 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
else
|
||||
root_level = btrfs_old_root_level(root, time_seq);
|
||||
|
||||
if (root_level + 1 == level) {
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
if (root_level + 1 == level)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can often find data backrefs with an offset that is too large
|
||||
* (>= LLONG_MAX, maximum allowed file offset) due to underflows when
|
||||
* subtracting a file's offset with the data offset of its
|
||||
* corresponding extent data item. This can happen for example in the
|
||||
* clone ioctl.
|
||||
*
|
||||
* So if we detect such case we set the search key's offset to zero to
|
||||
* make sure we will find the matching file extent item at
|
||||
* add_all_parents(), otherwise we will miss it because the offset
|
||||
* taken form the backref is much larger then the offset of the file
|
||||
* extent item. This can make us scan a very large number of file
|
||||
* extent items, but at least it will not make us miss any.
|
||||
*
|
||||
* This is an ugly workaround for a behaviour that should have never
|
||||
* existed, but it does and a fix for the clone ioctl would touch a lot
|
||||
* of places, cause backwards incompatibility and would not fix the
|
||||
* problem for extents cloned with older kernels.
|
||||
*/
|
||||
if (search_key.type == BTRFS_EXTENT_DATA_KEY &&
|
||||
search_key.offset >= LLONG_MAX)
|
||||
search_key.offset = 0;
|
||||
path->lowest_level = level;
|
||||
if (time_seq == SEQ_LAST)
|
||||
ret = btrfs_search_slot(NULL, root, &ref->key_for_search, path,
|
||||
0, 0);
|
||||
ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
|
||||
else
|
||||
ret = btrfs_search_old_slot(root, &ref->key_for_search, path,
|
||||
time_seq);
|
||||
|
||||
/* root node has been locked, we can release @subvol_srcu safely here */
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = btrfs_search_old_slot(root, &search_key, path, time_seq);
|
||||
|
||||
btrfs_debug(fs_info,
|
||||
"search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)",
|
||||
@ -574,9 +621,11 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info,
|
||||
eb = path->nodes[level];
|
||||
}
|
||||
|
||||
ret = add_all_parents(root, path, parents, ref, level, time_seq,
|
||||
extent_item_pos, total_refs, ignore_offset);
|
||||
ret = add_all_parents(root, path, parents, preftrees, ref, level,
|
||||
time_seq, extent_item_pos, ignore_offset);
|
||||
out:
|
||||
btrfs_put_root(root);
|
||||
out_free:
|
||||
path->lowest_level = 0;
|
||||
btrfs_release_path(path);
|
||||
return ret;
|
||||
@ -609,7 +658,7 @@ unode_aux_to_inode_list(struct ulist_node *node)
|
||||
static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path, u64 time_seq,
|
||||
struct preftrees *preftrees,
|
||||
const u64 *extent_item_pos, u64 total_refs,
|
||||
const u64 *extent_item_pos,
|
||||
struct share_check *sc, bool ignore_offset)
|
||||
{
|
||||
int err;
|
||||
@ -653,9 +702,9 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info,
|
||||
ret = BACKREF_FOUND_SHARED;
|
||||
goto out;
|
||||
}
|
||||
err = resolve_indirect_ref(fs_info, path, time_seq, ref,
|
||||
parents, extent_item_pos,
|
||||
total_refs, ignore_offset);
|
||||
err = resolve_indirect_ref(fs_info, path, time_seq, preftrees,
|
||||
ref, parents, extent_item_pos,
|
||||
ignore_offset);
|
||||
/*
|
||||
* we can only tolerate ENOENT,otherwise,we should catch error
|
||||
* and return directly.
|
||||
@ -758,8 +807,7 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_head *head, u64 seq,
|
||||
struct preftrees *preftrees, u64 *total_refs,
|
||||
struct share_check *sc)
|
||||
struct preftrees *preftrees, struct share_check *sc)
|
||||
{
|
||||
struct btrfs_delayed_ref_node *node;
|
||||
struct btrfs_delayed_extent_op *extent_op = head->extent_op;
|
||||
@ -793,7 +841,6 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info,
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
*total_refs += count;
|
||||
switch (node->type) {
|
||||
case BTRFS_TREE_BLOCK_REF_KEY: {
|
||||
/* NORMAL INDIRECT METADATA backref */
|
||||
@ -876,7 +923,7 @@ out:
|
||||
static int add_inline_refs(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path, u64 bytenr,
|
||||
int *info_level, struct preftrees *preftrees,
|
||||
u64 *total_refs, struct share_check *sc)
|
||||
struct share_check *sc)
|
||||
{
|
||||
int ret = 0;
|
||||
int slot;
|
||||
@ -900,7 +947,6 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info,
|
||||
|
||||
ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item);
|
||||
flags = btrfs_extent_flags(leaf, ei);
|
||||
*total_refs += btrfs_extent_refs(leaf, ei);
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, slot);
|
||||
|
||||
ptr = (unsigned long)(ei + 1);
|
||||
@ -1125,8 +1171,6 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
|
||||
struct prelim_ref *ref;
|
||||
struct rb_node *node;
|
||||
struct extent_inode_elem *eie = NULL;
|
||||
/* total of both direct AND indirect refs! */
|
||||
u64 total_refs = 0;
|
||||
struct preftrees preftrees = {
|
||||
.direct = PREFTREE_INIT,
|
||||
.indirect = PREFTREE_INIT,
|
||||
@ -1195,7 +1239,7 @@ again:
|
||||
}
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
ret = add_delayed_refs(fs_info, head, time_seq,
|
||||
&preftrees, &total_refs, sc);
|
||||
&preftrees, sc);
|
||||
mutex_unlock(&head->mutex);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -1216,8 +1260,7 @@ again:
|
||||
(key.type == BTRFS_EXTENT_ITEM_KEY ||
|
||||
key.type == BTRFS_METADATA_ITEM_KEY)) {
|
||||
ret = add_inline_refs(fs_info, path, bytenr,
|
||||
&info_level, &preftrees,
|
||||
&total_refs, sc);
|
||||
&info_level, &preftrees, sc);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = add_keyed_refs(fs_info, path, bytenr, info_level,
|
||||
@ -1236,7 +1279,7 @@ again:
|
||||
WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root));
|
||||
|
||||
ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees,
|
||||
extent_item_pos, total_refs, sc, ignore_offset);
|
||||
extent_item_pos, sc, ignore_offset);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1362,10 +1405,10 @@ static void free_leaf_list(struct ulist *blocks)
|
||||
*
|
||||
* returns 0 on success, <0 on error
|
||||
*/
|
||||
static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **leafs,
|
||||
const u64 *extent_item_pos, bool ignore_offset)
|
||||
int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **leafs,
|
||||
const u64 *extent_item_pos, bool ignore_offset)
|
||||
{
|
||||
int ret;
|
||||
|
||||
|
@ -40,6 +40,10 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info,
|
||||
|
||||
int paths_from_inode(u64 inum, struct inode_fs_paths *ipath);
|
||||
|
||||
int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **leafs,
|
||||
const u64 *extent_item_pos, bool ignore_offset);
|
||||
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 time_seq, struct ulist **roots, bool ignore_offset);
|
||||
|
@ -460,7 +460,7 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
|
||||
int ret;
|
||||
|
||||
while (start < end) {
|
||||
ret = find_first_extent_bit(info->pinned_extents, start,
|
||||
ret = find_first_extent_bit(&info->excluded_extents, start,
|
||||
&extent_start, &extent_end,
|
||||
EXTENT_DIRTY | EXTENT_UPTODATE,
|
||||
NULL);
|
||||
@ -1248,6 +1248,55 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool clean_pinned_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group *bg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bg->fs_info;
|
||||
struct btrfs_transaction *prev_trans = NULL;
|
||||
const u64 start = bg->start;
|
||||
const u64 end = start + bg->length - 1;
|
||||
int ret;
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (trans->transaction->list.prev != &fs_info->trans_list) {
|
||||
prev_trans = list_last_entry(&trans->transaction->list,
|
||||
struct btrfs_transaction, list);
|
||||
refcount_inc(&prev_trans->use_count);
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
/*
|
||||
* Hold the unused_bg_unpin_mutex lock to avoid racing with
|
||||
* btrfs_finish_extent_commit(). If we are at transaction N, another
|
||||
* task might be running finish_extent_commit() for the previous
|
||||
* transaction N - 1, and have seen a range belonging to the block
|
||||
* group in pinned_extents before we were able to clear the whole block
|
||||
* group range from pinned_extents. This means that task can lookup for
|
||||
* the block group after we unpinned it from pinned_extents and removed
|
||||
* it, leading to a BUG_ON() at unpin_extent_range().
|
||||
*/
|
||||
mutex_lock(&fs_info->unused_bg_unpin_mutex);
|
||||
if (prev_trans) {
|
||||
ret = clear_extent_bits(&prev_trans->pinned_extents, start, end,
|
||||
EXTENT_DIRTY);
|
||||
if (ret)
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = clear_extent_bits(&trans->transaction->pinned_extents, start, end,
|
||||
EXTENT_DIRTY);
|
||||
if (ret)
|
||||
goto err;
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
|
||||
return true;
|
||||
|
||||
err:
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
btrfs_dec_block_group_ro(bg);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process the unused_bgs list and remove any that don't have any allocated
|
||||
* space inside of them.
|
||||
@ -1265,7 +1314,6 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
while (!list_empty(&fs_info->unused_bgs)) {
|
||||
u64 start, end;
|
||||
int trimming;
|
||||
|
||||
block_group = list_first_entry(&fs_info->unused_bgs,
|
||||
@ -1344,35 +1392,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
* We could have pending pinned extents for this block group,
|
||||
* just delete them, we don't care about them anymore.
|
||||
*/
|
||||
start = block_group->start;
|
||||
end = start + block_group->length - 1;
|
||||
/*
|
||||
* Hold the unused_bg_unpin_mutex lock to avoid racing with
|
||||
* btrfs_finish_extent_commit(). If we are at transaction N,
|
||||
* another task might be running finish_extent_commit() for the
|
||||
* previous transaction N - 1, and have seen a range belonging
|
||||
* to the block group in freed_extents[] before we were able to
|
||||
* clear the whole block group range from freed_extents[]. This
|
||||
* means that task can lookup for the block group after we
|
||||
* unpinned it from freed_extents[] and removed it, leading to
|
||||
* a BUG_ON() at btrfs_unpin_extent_range().
|
||||
*/
|
||||
mutex_lock(&fs_info->unused_bg_unpin_mutex);
|
||||
ret = clear_extent_bits(&fs_info->freed_extents[0], start, end,
|
||||
EXTENT_DIRTY);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
if (!clean_pinned_extents(trans, block_group))
|
||||
goto end_trans;
|
||||
}
|
||||
ret = clear_extent_bits(&fs_info->freed_extents[1], start, end,
|
||||
EXTENT_DIRTY);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
goto end_trans;
|
||||
}
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
|
||||
/*
|
||||
* At this point, the block_group is read only and should fail
|
||||
@ -1987,6 +2008,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||
btrfs_release_path(path);
|
||||
}
|
||||
|
||||
rcu_read_lock();
|
||||
list_for_each_entry_rcu(space_info, &info->space_info, list) {
|
||||
if (!(btrfs_get_alloc_profile(info, space_info->flags) &
|
||||
(BTRFS_BLOCK_GROUP_RAID10 |
|
||||
@ -2007,6 +2029,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||
list)
|
||||
inc_block_group_ro(cache, 1);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
|
||||
btrfs_init_global_block_rsv(info);
|
||||
ret = check_chunk_block_group_mappings(info);
|
||||
@ -2345,7 +2368,7 @@ static int cache_save_setup(struct btrfs_block_group *block_group,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (trans->aborted)
|
||||
if (TRANS_ABORTED(trans))
|
||||
return 0;
|
||||
again:
|
||||
inode = lookup_free_space_inode(block_group, path);
|
||||
@ -2881,7 +2904,7 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
&cache->space_info->total_bytes_pinned,
|
||||
num_bytes,
|
||||
BTRFS_TOTAL_BYTES_PINNED_BATCH);
|
||||
set_extent_dirty(info->pinned_extents,
|
||||
set_extent_dirty(&trans->transaction->pinned_extents,
|
||||
bytenr, bytenr + num_bytes - 1,
|
||||
GFP_NOFS | __GFP_NOFAIL);
|
||||
}
|
||||
|
@ -6,6 +6,98 @@
|
||||
#include "space-info.h"
|
||||
#include "transaction.h"
|
||||
|
||||
/*
|
||||
* HOW DO BLOCK RESERVES WORK
|
||||
*
|
||||
* Think of block_rsv's as buckets for logically grouped metadata
|
||||
* reservations. Each block_rsv has a ->size and a ->reserved. ->size is
|
||||
* how large we want our block rsv to be, ->reserved is how much space is
|
||||
* currently reserved for this block reserve.
|
||||
*
|
||||
* ->failfast exists for the truncate case, and is described below.
|
||||
*
|
||||
* NORMAL OPERATION
|
||||
*
|
||||
* -> Reserve
|
||||
* Entrance: btrfs_block_rsv_add, btrfs_block_rsv_refill
|
||||
*
|
||||
* We call into btrfs_reserve_metadata_bytes() with our bytes, which is
|
||||
* accounted for in space_info->bytes_may_use, and then add the bytes to
|
||||
* ->reserved, and ->size in the case of btrfs_block_rsv_add.
|
||||
*
|
||||
* ->size is an over-estimation of how much we may use for a particular
|
||||
* operation.
|
||||
*
|
||||
* -> Use
|
||||
* Entrance: btrfs_use_block_rsv
|
||||
*
|
||||
* When we do a btrfs_alloc_tree_block() we call into btrfs_use_block_rsv()
|
||||
* to determine the appropriate block_rsv to use, and then verify that
|
||||
* ->reserved has enough space for our tree block allocation. Once
|
||||
* successful we subtract fs_info->nodesize from ->reserved.
|
||||
*
|
||||
* -> Finish
|
||||
* Entrance: btrfs_block_rsv_release
|
||||
*
|
||||
* We are finished with our operation, subtract our individual reservation
|
||||
* from ->size, and then subtract ->size from ->reserved and free up the
|
||||
* excess if there is any.
|
||||
*
|
||||
* There is some logic here to refill the delayed refs rsv or the global rsv
|
||||
* as needed, otherwise the excess is subtracted from
|
||||
* space_info->bytes_may_use.
|
||||
*
|
||||
* TYPES OF BLOCK RESERVES
|
||||
*
|
||||
* BLOCK_RSV_TRANS, BLOCK_RSV_DELOPS, BLOCK_RSV_CHUNK
|
||||
* These behave normally, as described above, just within the confines of the
|
||||
* lifetime of their particular operation (transaction for the whole trans
|
||||
* handle lifetime, for example).
|
||||
*
|
||||
* BLOCK_RSV_GLOBAL
|
||||
* It is impossible to properly account for all the space that may be required
|
||||
* to make our extent tree updates. This block reserve acts as an overflow
|
||||
* buffer in case our delayed refs reserve does not reserve enough space to
|
||||
* update the extent tree.
|
||||
*
|
||||
* We can steal from this in some cases as well, notably on evict() or
|
||||
* truncate() in order to help users recover from ENOSPC conditions.
|
||||
*
|
||||
* BLOCK_RSV_DELALLOC
|
||||
* The individual item sizes are determined by the per-inode size
|
||||
* calculations, which are described with the delalloc code. This is pretty
|
||||
* straightforward, it's just the calculation of ->size encodes a lot of
|
||||
* different items, and thus it gets used when updating inodes, inserting file
|
||||
* extents, and inserting checksums.
|
||||
*
|
||||
* BLOCK_RSV_DELREFS
|
||||
* We keep a running tally of how many delayed refs we have on the system.
|
||||
* We assume each one of these delayed refs are going to use a full
|
||||
* reservation. We use the transaction items and pre-reserve space for every
|
||||
* operation, and use this reservation to refill any gap between ->size and
|
||||
* ->reserved that may exist.
|
||||
*
|
||||
* From there it's straightforward, removing a delayed ref means we remove its
|
||||
* count from ->size and free up reservations as necessary. Since this is
|
||||
* the most dynamic block reserve in the system, we will try to refill this
|
||||
* block reserve first with any excess returned by any other block reserve.
|
||||
*
|
||||
* BLOCK_RSV_EMPTY
|
||||
* This is the fallback block reserve to make us try to reserve space if we
|
||||
* don't have a specific bucket for this allocation. It is mostly used for
|
||||
* updating the device tree and such, since that is a separate pool we're
|
||||
* content to just reserve space from the space_info on demand.
|
||||
*
|
||||
* BLOCK_RSV_TEMP
|
||||
* This is used by things like truncate and iput. We will temporarily
|
||||
* allocate a block reserve, set it to some size, and then truncate bytes
|
||||
* until we have no space left. With ->failfast set we'll simply return
|
||||
* ENOSPC from btrfs_use_block_rsv() to signal that we need to unwind and try
|
||||
* to make a new reservation. This is because these operations are
|
||||
* unbounded, so we want to do as much work as we can, and then back off and
|
||||
* re-reserve.
|
||||
*/
|
||||
|
||||
static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
struct btrfs_block_rsv *dest, u64 num_bytes,
|
||||
@ -111,7 +203,7 @@ void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
if (!rsv)
|
||||
return;
|
||||
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
|
||||
btrfs_block_rsv_release(fs_info, rsv, (u64)-1, NULL);
|
||||
kfree(rsv);
|
||||
}
|
||||
|
||||
@ -178,9 +270,9 @@ int btrfs_block_rsv_refill(struct btrfs_root *root,
|
||||
return ret;
|
||||
}
|
||||
|
||||
u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, u64 *qgroup_to_release)
|
||||
u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv, u64 num_bytes,
|
||||
u64 *qgroup_to_release)
|
||||
{
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_refs_rsv;
|
||||
@ -297,9 +389,9 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
|
||||
if (block_rsv->reserved < block_rsv->size) {
|
||||
num_bytes = block_rsv->size - block_rsv->reserved;
|
||||
block_rsv->reserved += num_bytes;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
|
||||
num_bytes);
|
||||
block_rsv->reserved = block_rsv->size;
|
||||
} else if (block_rsv->reserved > block_rsv->size) {
|
||||
num_bytes = block_rsv->reserved - block_rsv->size;
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, sinfo,
|
||||
@ -344,7 +436,8 @@ void btrfs_init_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
|
||||
void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1);
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->global_block_rsv, (u64)-1,
|
||||
NULL);
|
||||
WARN_ON(fs_info->trans_block_rsv.size > 0);
|
||||
WARN_ON(fs_info->trans_block_rsv.reserved > 0);
|
||||
WARN_ON(fs_info->chunk_block_rsv.size > 0);
|
||||
|
@ -73,7 +73,7 @@ int btrfs_cond_migrate_bytes(struct btrfs_fs_info *fs_info,
|
||||
int min_factor);
|
||||
void btrfs_block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, bool update_size);
|
||||
u64 __btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes, u64 *qgroup_to_release);
|
||||
void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info);
|
||||
@ -82,20 +82,12 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u32 blocksize);
|
||||
|
||||
static inline void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u64 num_bytes)
|
||||
{
|
||||
__btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
|
||||
}
|
||||
|
||||
static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
u32 blocksize)
|
||||
{
|
||||
btrfs_block_rsv_add_bytes(block_rsv, blocksize, false);
|
||||
btrfs_block_rsv_release(fs_info, block_rsv, 0);
|
||||
btrfs_block_rsv_release(fs_info, block_rsv, 0, NULL);
|
||||
}
|
||||
|
||||
#endif /* BTRFS_BLOCK_RSV_H */
|
||||
|
@ -60,6 +60,12 @@ struct btrfs_inode {
|
||||
*/
|
||||
struct extent_io_tree io_failure_tree;
|
||||
|
||||
/*
|
||||
* Keep track of where the inode has extent items mapped in order to
|
||||
* make sure the i_size adjustments are accurate
|
||||
*/
|
||||
struct extent_io_tree file_extent_tree;
|
||||
|
||||
/* held while logging the inode in tree-log.c */
|
||||
struct mutex log_mutex;
|
||||
|
||||
|
@ -77,7 +77,6 @@
|
||||
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/genhd.h>
|
||||
#include <linux/blkdev.h>
|
||||
@ -152,11 +151,8 @@ struct btrfsic_block {
|
||||
struct list_head ref_to_list; /* list */
|
||||
struct list_head ref_from_list; /* list */
|
||||
struct btrfsic_block *next_in_same_bio;
|
||||
void *orig_bio_bh_private;
|
||||
union {
|
||||
bio_end_io_t *bio;
|
||||
bh_end_io_t *bh;
|
||||
} orig_bio_bh_end_io;
|
||||
void *orig_bio_private;
|
||||
bio_end_io_t *orig_bio_end_io;
|
||||
int submit_bio_bh_rw;
|
||||
u64 flush_gen; /* only valid if !never_written */
|
||||
};
|
||||
@ -325,14 +321,12 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
|
||||
u64 dev_bytenr, char **mapped_datav,
|
||||
unsigned int num_pages,
|
||||
struct bio *bio, int *bio_is_patched,
|
||||
struct buffer_head *bh,
|
||||
int submit_bio_bh_rw);
|
||||
static int btrfsic_process_written_superblock(
|
||||
struct btrfsic_state *state,
|
||||
struct btrfsic_block *const block,
|
||||
struct btrfs_super_block *const super_hdr);
|
||||
static void btrfsic_bio_end_io(struct bio *bp);
|
||||
static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
|
||||
static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
|
||||
const struct btrfsic_block *block,
|
||||
int recursion_level);
|
||||
@ -399,8 +393,8 @@ static void btrfsic_block_init(struct btrfsic_block *b)
|
||||
b->never_written = 0;
|
||||
b->mirror_num = 0;
|
||||
b->next_in_same_bio = NULL;
|
||||
b->orig_bio_bh_private = NULL;
|
||||
b->orig_bio_bh_end_io.bio = NULL;
|
||||
b->orig_bio_private = NULL;
|
||||
b->orig_bio_end_io = NULL;
|
||||
INIT_LIST_HEAD(&b->collision_resolving_node);
|
||||
INIT_LIST_HEAD(&b->all_blocks_node);
|
||||
INIT_LIST_HEAD(&b->ref_to_list);
|
||||
@ -767,29 +761,31 @@ static int btrfsic_process_superblock_dev_mirror(
|
||||
struct btrfs_fs_info *fs_info = state->fs_info;
|
||||
struct btrfs_super_block *super_tmp;
|
||||
u64 dev_bytenr;
|
||||
struct buffer_head *bh;
|
||||
struct btrfsic_block *superblock_tmp;
|
||||
int pass;
|
||||
struct block_device *const superblock_bdev = device->bdev;
|
||||
struct page *page;
|
||||
struct address_space *mapping = superblock_bdev->bd_inode->i_mapping;
|
||||
int ret = 0;
|
||||
|
||||
/* super block bytenr is always the unmapped device bytenr */
|
||||
dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
|
||||
if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->commit_total_bytes)
|
||||
return -1;
|
||||
bh = __bread(superblock_bdev, dev_bytenr / BTRFS_BDEV_BLOCKSIZE,
|
||||
BTRFS_SUPER_INFO_SIZE);
|
||||
if (NULL == bh)
|
||||
|
||||
page = read_cache_page_gfp(mapping, dev_bytenr >> PAGE_SHIFT, GFP_NOFS);
|
||||
if (IS_ERR(page))
|
||||
return -1;
|
||||
super_tmp = (struct btrfs_super_block *)
|
||||
(bh->b_data + (dev_bytenr & (BTRFS_BDEV_BLOCKSIZE - 1)));
|
||||
|
||||
super_tmp = page_address(page);
|
||||
|
||||
if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
|
||||
btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
|
||||
memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
|
||||
btrfs_super_nodesize(super_tmp) != state->metablock_size ||
|
||||
btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
|
||||
brelse(bh);
|
||||
return 0;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
superblock_tmp =
|
||||
@ -800,8 +796,8 @@ static int btrfsic_process_superblock_dev_mirror(
|
||||
superblock_tmp = btrfsic_block_alloc();
|
||||
if (NULL == superblock_tmp) {
|
||||
pr_info("btrfsic: error, kmalloc failed!\n");
|
||||
brelse(bh);
|
||||
return -1;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
/* for superblock, only the dev_bytenr makes sense */
|
||||
superblock_tmp->dev_bytenr = dev_bytenr;
|
||||
@ -885,8 +881,8 @@ static int btrfsic_process_superblock_dev_mirror(
|
||||
mirror_num)) {
|
||||
pr_info("btrfsic: btrfsic_map_block(bytenr @%llu, mirror %d) failed!\n",
|
||||
next_bytenr, mirror_num);
|
||||
brelse(bh);
|
||||
return -1;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
next_block = btrfsic_block_lookup_or_add(
|
||||
@ -895,8 +891,8 @@ static int btrfsic_process_superblock_dev_mirror(
|
||||
mirror_num, NULL);
|
||||
if (NULL == next_block) {
|
||||
btrfsic_release_block_ctx(&tmp_next_block_ctx);
|
||||
brelse(bh);
|
||||
return -1;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
next_block->disk_key = tmp_disk_key;
|
||||
@ -907,16 +903,17 @@ static int btrfsic_process_superblock_dev_mirror(
|
||||
BTRFSIC_GENERATION_UNKNOWN);
|
||||
btrfsic_release_block_ctx(&tmp_next_block_ctx);
|
||||
if (NULL == l) {
|
||||
brelse(bh);
|
||||
return -1;
|
||||
ret = -1;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
|
||||
btrfsic_dump_tree_sub(state, superblock_tmp, 0);
|
||||
|
||||
brelse(bh);
|
||||
return 0;
|
||||
out:
|
||||
put_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
|
||||
@ -1743,7 +1740,6 @@ static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
|
||||
u64 dev_bytenr, char **mapped_datav,
|
||||
unsigned int num_pages,
|
||||
struct bio *bio, int *bio_is_patched,
|
||||
struct buffer_head *bh,
|
||||
int submit_bio_bh_rw)
|
||||
{
|
||||
int is_metadata;
|
||||
@ -1902,9 +1898,9 @@ again:
|
||||
block->is_iodone = 0;
|
||||
BUG_ON(NULL == bio_is_patched);
|
||||
if (!*bio_is_patched) {
|
||||
block->orig_bio_bh_private =
|
||||
block->orig_bio_private =
|
||||
bio->bi_private;
|
||||
block->orig_bio_bh_end_io.bio =
|
||||
block->orig_bio_end_io =
|
||||
bio->bi_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bio->bi_private = block;
|
||||
@ -1916,25 +1912,17 @@ again:
|
||||
bio->bi_private;
|
||||
|
||||
BUG_ON(NULL == chained_block);
|
||||
block->orig_bio_bh_private =
|
||||
chained_block->orig_bio_bh_private;
|
||||
block->orig_bio_bh_end_io.bio =
|
||||
chained_block->orig_bio_bh_end_io.
|
||||
bio;
|
||||
block->orig_bio_private =
|
||||
chained_block->orig_bio_private;
|
||||
block->orig_bio_end_io =
|
||||
chained_block->orig_bio_end_io;
|
||||
block->next_in_same_bio = chained_block;
|
||||
bio->bi_private = block;
|
||||
}
|
||||
} else if (NULL != bh) {
|
||||
block->is_iodone = 0;
|
||||
block->orig_bio_bh_private = bh->b_private;
|
||||
block->orig_bio_bh_end_io.bh = bh->b_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bh->b_private = block;
|
||||
bh->b_end_io = btrfsic_bh_end_io;
|
||||
} else {
|
||||
block->is_iodone = 1;
|
||||
block->orig_bio_bh_private = NULL;
|
||||
block->orig_bio_bh_end_io.bio = NULL;
|
||||
block->orig_bio_private = NULL;
|
||||
block->orig_bio_end_io = NULL;
|
||||
block->next_in_same_bio = NULL;
|
||||
}
|
||||
}
|
||||
@ -2042,8 +2030,8 @@ again:
|
||||
block->is_iodone = 0;
|
||||
BUG_ON(NULL == bio_is_patched);
|
||||
if (!*bio_is_patched) {
|
||||
block->orig_bio_bh_private = bio->bi_private;
|
||||
block->orig_bio_bh_end_io.bio = bio->bi_end_io;
|
||||
block->orig_bio_private = bio->bi_private;
|
||||
block->orig_bio_end_io = bio->bi_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bio->bi_private = block;
|
||||
bio->bi_end_io = btrfsic_bio_end_io;
|
||||
@ -2054,24 +2042,17 @@ again:
|
||||
bio->bi_private;
|
||||
|
||||
BUG_ON(NULL == chained_block);
|
||||
block->orig_bio_bh_private =
|
||||
chained_block->orig_bio_bh_private;
|
||||
block->orig_bio_bh_end_io.bio =
|
||||
chained_block->orig_bio_bh_end_io.bio;
|
||||
block->orig_bio_private =
|
||||
chained_block->orig_bio_private;
|
||||
block->orig_bio_end_io =
|
||||
chained_block->orig_bio_end_io;
|
||||
block->next_in_same_bio = chained_block;
|
||||
bio->bi_private = block;
|
||||
}
|
||||
} else if (NULL != bh) {
|
||||
block->is_iodone = 0;
|
||||
block->orig_bio_bh_private = bh->b_private;
|
||||
block->orig_bio_bh_end_io.bh = bh->b_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bh->b_private = block;
|
||||
bh->b_end_io = btrfsic_bh_end_io;
|
||||
} else {
|
||||
block->is_iodone = 1;
|
||||
block->orig_bio_bh_private = NULL;
|
||||
block->orig_bio_bh_end_io.bio = NULL;
|
||||
block->orig_bio_private = NULL;
|
||||
block->orig_bio_end_io = NULL;
|
||||
block->next_in_same_bio = NULL;
|
||||
}
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
|
||||
@ -2112,8 +2093,8 @@ static void btrfsic_bio_end_io(struct bio *bp)
|
||||
iodone_w_error = 1;
|
||||
|
||||
BUG_ON(NULL == block);
|
||||
bp->bi_private = block->orig_bio_bh_private;
|
||||
bp->bi_end_io = block->orig_bio_bh_end_io.bio;
|
||||
bp->bi_private = block->orig_bio_private;
|
||||
bp->bi_end_io = block->orig_bio_end_io;
|
||||
|
||||
do {
|
||||
struct btrfsic_block *next_block;
|
||||
@ -2146,38 +2127,6 @@ static void btrfsic_bio_end_io(struct bio *bp)
|
||||
bp->bi_end_io(bp);
|
||||
}
|
||||
|
||||
static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
|
||||
{
|
||||
struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
|
||||
int iodone_w_error = !uptodate;
|
||||
struct btrfsic_dev_state *dev_state;
|
||||
|
||||
BUG_ON(NULL == block);
|
||||
dev_state = block->dev_state;
|
||||
if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
||||
pr_info("bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
|
||||
iodone_w_error,
|
||||
btrfsic_get_block_type(dev_state->state, block),
|
||||
block->logical_bytenr, block->dev_state->name,
|
||||
block->dev_bytenr, block->mirror_num);
|
||||
|
||||
block->iodone_w_error = iodone_w_error;
|
||||
if (block->submit_bio_bh_rw & REQ_PREFLUSH) {
|
||||
dev_state->last_flush_gen++;
|
||||
if ((dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
|
||||
pr_info("bh_end_io() new %s flush_gen=%llu\n",
|
||||
dev_state->name, dev_state->last_flush_gen);
|
||||
}
|
||||
if (block->submit_bio_bh_rw & REQ_FUA)
|
||||
block->flush_gen = 0; /* FUA completed means block is on disk */
|
||||
|
||||
bh->b_private = block->orig_bio_bh_private;
|
||||
bh->b_end_io = block->orig_bio_bh_end_io.bh;
|
||||
block->is_iodone = 1; /* for FLUSH, this releases the block */
|
||||
bh->b_end_io(bh, uptodate);
|
||||
}
|
||||
|
||||
static int btrfsic_process_written_superblock(
|
||||
struct btrfsic_state *state,
|
||||
struct btrfsic_block *const superblock,
|
||||
@ -2730,63 +2679,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_lookup(dev_t dev)
|
||||
&btrfsic_dev_state_hashtable);
|
||||
}
|
||||
|
||||
int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh)
|
||||
{
|
||||
struct btrfsic_dev_state *dev_state;
|
||||
|
||||
if (!btrfsic_is_initialized)
|
||||
return submit_bh(op, op_flags, bh);
|
||||
|
||||
mutex_lock(&btrfsic_mutex);
|
||||
/* since btrfsic_submit_bh() might also be called before
|
||||
* btrfsic_mount(), this might return NULL */
|
||||
dev_state = btrfsic_dev_state_lookup(bh->b_bdev->bd_dev);
|
||||
|
||||
/* Only called to write the superblock (incl. FLUSH/FUA) */
|
||||
if (NULL != dev_state &&
|
||||
(op == REQ_OP_WRITE) && bh->b_size > 0) {
|
||||
u64 dev_bytenr;
|
||||
|
||||
dev_bytenr = BTRFS_BDEV_BLOCKSIZE * bh->b_blocknr;
|
||||
if (dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
||||
pr_info("submit_bh(op=0x%x,0x%x, blocknr=%llu (bytenr %llu), size=%zu, data=%p, bdev=%p)\n",
|
||||
op, op_flags, (unsigned long long)bh->b_blocknr,
|
||||
dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
|
||||
btrfsic_process_written_block(dev_state, dev_bytenr,
|
||||
&bh->b_data, 1, NULL,
|
||||
NULL, bh, op_flags);
|
||||
} else if (NULL != dev_state && (op_flags & REQ_PREFLUSH)) {
|
||||
if (dev_state->state->print_mask &
|
||||
BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
|
||||
pr_info("submit_bh(op=0x%x,0x%x FLUSH, bdev=%p)\n",
|
||||
op, op_flags, bh->b_bdev);
|
||||
if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
|
||||
if ((dev_state->state->print_mask &
|
||||
(BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
|
||||
BTRFSIC_PRINT_MASK_VERBOSE)))
|
||||
pr_info("btrfsic_submit_bh(%s) with FLUSH but dummy block already in use (ignored)!\n",
|
||||
dev_state->name);
|
||||
} else {
|
||||
struct btrfsic_block *const block =
|
||||
&dev_state->dummy_block_for_bio_bh_flush;
|
||||
|
||||
block->is_iodone = 0;
|
||||
block->never_written = 0;
|
||||
block->iodone_w_error = 0;
|
||||
block->flush_gen = dev_state->last_flush_gen + 1;
|
||||
block->submit_bio_bh_rw = op_flags;
|
||||
block->orig_bio_bh_private = bh->b_private;
|
||||
block->orig_bio_bh_end_io.bh = bh->b_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bh->b_private = block;
|
||||
bh->b_end_io = btrfsic_bh_end_io;
|
||||
}
|
||||
}
|
||||
mutex_unlock(&btrfsic_mutex);
|
||||
return submit_bh(op, op_flags, bh);
|
||||
}
|
||||
|
||||
static void __btrfsic_submit_bio(struct bio *bio)
|
||||
{
|
||||
struct btrfsic_dev_state *dev_state;
|
||||
@ -2838,7 +2730,7 @@ static void __btrfsic_submit_bio(struct bio *bio)
|
||||
btrfsic_process_written_block(dev_state, dev_bytenr,
|
||||
mapped_datav, segs,
|
||||
bio, &bio_is_patched,
|
||||
NULL, bio->bi_opf);
|
||||
bio->bi_opf);
|
||||
bio_for_each_segment(bvec, bio, iter)
|
||||
kunmap(bvec.bv_page);
|
||||
kfree(mapped_datav);
|
||||
@ -2862,8 +2754,8 @@ static void __btrfsic_submit_bio(struct bio *bio)
|
||||
block->iodone_w_error = 0;
|
||||
block->flush_gen = dev_state->last_flush_gen + 1;
|
||||
block->submit_bio_bh_rw = bio->bi_opf;
|
||||
block->orig_bio_bh_private = bio->bi_private;
|
||||
block->orig_bio_bh_end_io.bio = bio->bi_end_io;
|
||||
block->orig_bio_private = bio->bi_private;
|
||||
block->orig_bio_end_io = bio->bi_end_io;
|
||||
block->next_in_same_bio = NULL;
|
||||
bio->bi_private = block;
|
||||
bio->bi_end_io = btrfsic_bio_end_io;
|
||||
|
@ -7,11 +7,9 @@
|
||||
#define BTRFS_CHECK_INTEGRITY_H
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
int btrfsic_submit_bh(int op, int op_flags, struct buffer_head *bh);
|
||||
void btrfsic_submit_bio(struct bio *bio);
|
||||
int btrfsic_submit_bio_wait(struct bio *bio);
|
||||
#else
|
||||
#define btrfsic_submit_bh submit_bh
|
||||
#define btrfsic_submit_bio submit_bio
|
||||
#define btrfsic_submit_bio_wait submit_bio_wait
|
||||
#endif
|
||||
|
@ -31,8 +31,8 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
|
||||
|
||||
static const struct btrfs_csums {
|
||||
u16 size;
|
||||
const char *name;
|
||||
const char *driver;
|
||||
const char name[10];
|
||||
const char driver[12];
|
||||
} btrfs_csums[] = {
|
||||
[BTRFS_CSUM_TYPE_CRC32] = { .size = 4, .name = "crc32c" },
|
||||
[BTRFS_CSUM_TYPE_XXHASH] = { .size = 8, .name = "xxhash64" },
|
||||
@ -63,7 +63,8 @@ const char *btrfs_super_csum_name(u16 csum_type)
|
||||
const char *btrfs_super_csum_driver(u16 csum_type)
|
||||
{
|
||||
/* csum type is validated at mount time */
|
||||
return btrfs_csums[csum_type].driver ?:
|
||||
return btrfs_csums[csum_type].driver[0] ?
|
||||
btrfs_csums[csum_type].driver :
|
||||
btrfs_csums[csum_type].name;
|
||||
}
|
||||
|
||||
@ -143,44 +144,6 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root)
|
||||
return eb;
|
||||
}
|
||||
|
||||
/* loop around taking references on and locking the root node of the
|
||||
* tree until you end up with a lock on the root. A locked buffer
|
||||
* is returned, with a reference held.
|
||||
*/
|
||||
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
while (1) {
|
||||
eb = btrfs_root_node(root);
|
||||
btrfs_tree_lock(eb);
|
||||
if (eb == root->node)
|
||||
break;
|
||||
btrfs_tree_unlock(eb);
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return eb;
|
||||
}
|
||||
|
||||
/* loop around taking references on and locking the root node of the
|
||||
* tree until you end up with a lock on the root. A locked buffer
|
||||
* is returned, with a reference held.
|
||||
*/
|
||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
while (1) {
|
||||
eb = btrfs_root_node(root);
|
||||
btrfs_tree_read_lock(eb);
|
||||
if (eb == root->node)
|
||||
break;
|
||||
btrfs_tree_read_unlock(eb);
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return eb;
|
||||
}
|
||||
|
||||
/* cowonly root (everything not a reference counted cow subvolume), just get
|
||||
* put onto a simple dirty list. transaction.c walks this to make sure they
|
||||
* get properly updated on disk.
|
||||
@ -341,7 +304,6 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
||||
struct rb_root *tm_root;
|
||||
struct rb_node *node;
|
||||
struct rb_node *next;
|
||||
struct seq_list *cur_elem;
|
||||
struct tree_mod_elem *tm;
|
||||
u64 min_seq = (u64)-1;
|
||||
u64 seq_putting = elem->seq;
|
||||
@ -353,18 +315,20 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
||||
list_del(&elem->list);
|
||||
elem->seq = 0;
|
||||
|
||||
list_for_each_entry(cur_elem, &fs_info->tree_mod_seq_list, list) {
|
||||
if (cur_elem->seq < min_seq) {
|
||||
if (seq_putting > cur_elem->seq) {
|
||||
/*
|
||||
* blocker with lower sequence number exists, we
|
||||
* cannot remove anything from the log
|
||||
*/
|
||||
write_unlock(&fs_info->tree_mod_log_lock);
|
||||
return;
|
||||
}
|
||||
min_seq = cur_elem->seq;
|
||||
if (!list_empty(&fs_info->tree_mod_seq_list)) {
|
||||
struct seq_list *first;
|
||||
|
||||
first = list_first_entry(&fs_info->tree_mod_seq_list,
|
||||
struct seq_list, list);
|
||||
if (seq_putting > first->seq) {
|
||||
/*
|
||||
* Blocker with lower sequence number exists, we
|
||||
* cannot remove anything from the log.
|
||||
*/
|
||||
write_unlock(&fs_info->tree_mod_log_lock);
|
||||
return;
|
||||
}
|
||||
min_seq = first->seq;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -962,9 +926,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
if (new_flags != 0) {
|
||||
int level = btrfs_header_level(buf);
|
||||
|
||||
ret = btrfs_set_disk_extent_flags(trans,
|
||||
buf->start,
|
||||
buf->len,
|
||||
ret = btrfs_set_disk_extent_flags(trans, buf,
|
||||
new_flags, level, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include "extent_map.h"
|
||||
#include "async-thread.h"
|
||||
#include "block-rsv.h"
|
||||
#include "locking.h"
|
||||
|
||||
struct btrfs_trans_handle;
|
||||
struct btrfs_transaction;
|
||||
@ -596,8 +597,8 @@ struct btrfs_fs_info {
|
||||
/* keep track of unallocated space */
|
||||
atomic64_t free_chunk_space;
|
||||
|
||||
struct extent_io_tree freed_extents[2];
|
||||
struct extent_io_tree *pinned_extents;
|
||||
/* Track ranges which are used by log trees blocks/logged data extents */
|
||||
struct extent_io_tree excluded_extents;
|
||||
|
||||
/* logical->physical extent mapping */
|
||||
struct extent_map_tree mapping_tree;
|
||||
@ -696,7 +697,6 @@ struct btrfs_fs_info {
|
||||
struct rw_semaphore cleanup_work_sem;
|
||||
|
||||
struct rw_semaphore subvol_sem;
|
||||
struct srcu_struct subvol_srcu;
|
||||
|
||||
spinlock_t trans_lock;
|
||||
/*
|
||||
@ -947,6 +947,10 @@ struct btrfs_fs_info {
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
struct kobject *debug_kobj;
|
||||
struct kobject *discard_debug_kobj;
|
||||
struct list_head allocated_roots;
|
||||
|
||||
spinlock_t eb_leak_lock;
|
||||
struct list_head allocated_ebs;
|
||||
#endif
|
||||
};
|
||||
|
||||
@ -955,11 +959,6 @@ static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
|
||||
return sb->s_fs_info;
|
||||
}
|
||||
|
||||
struct btrfs_subvolume_writers {
|
||||
struct percpu_counter counter;
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
/*
|
||||
* The state of btrfs root
|
||||
*/
|
||||
@ -1131,8 +1130,9 @@ struct btrfs_root {
|
||||
* root_item_lock.
|
||||
*/
|
||||
int dedupe_in_progress;
|
||||
struct btrfs_subvolume_writers *subv_writers;
|
||||
atomic_t will_be_snapshotted;
|
||||
/* For exclusion of snapshot creation and nocow writes */
|
||||
struct btrfs_drew_lock snapshot_lock;
|
||||
|
||||
atomic_t snapshot_force_cow;
|
||||
|
||||
/* For qgroup metadata reserved space */
|
||||
@ -1149,6 +1149,10 @@ struct btrfs_root {
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
u64 alloc_bytenr;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
struct list_head leak_list;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct btrfs_clone_extent_info {
|
||||
@ -1971,16 +1975,6 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
|
||||
btrfs_set_header_flags(eb, flags);
|
||||
}
|
||||
|
||||
static inline unsigned long btrfs_header_fsid(void)
|
||||
{
|
||||
return offsetof(struct btrfs_header, fsid);
|
||||
}
|
||||
|
||||
static inline unsigned long btrfs_header_chunk_tree_uuid(const struct extent_buffer *eb)
|
||||
{
|
||||
return offsetof(struct btrfs_header, chunk_tree_uuid);
|
||||
}
|
||||
|
||||
static inline int btrfs_is_leaf(const struct extent_buffer *eb)
|
||||
{
|
||||
return btrfs_header_level(eb) == 0;
|
||||
@ -2458,9 +2452,9 @@ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len);
|
||||
int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
u64 offset, int metadata, u64 *refs, u64 *flags);
|
||||
int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr, u64 num, int reserved);
|
||||
int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
|
||||
int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num,
|
||||
int reserved);
|
||||
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes);
|
||||
int btrfs_exclude_logged_extents(struct extent_buffer *eb);
|
||||
int btrfs_cross_ref_exist(struct btrfs_root *root,
|
||||
@ -2490,13 +2484,13 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct extent_buffer *buf, int full_backref);
|
||||
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, u64 flags,
|
||||
struct extent_buffer *eb, u64 flags,
|
||||
int level, int is_data);
|
||||
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
|
||||
|
||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len, int delalloc);
|
||||
int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start,
|
||||
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
|
||||
u64 len);
|
||||
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
|
||||
@ -2665,9 +2659,8 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
|
||||
return btrfs_next_old_item(root, p, 0);
|
||||
}
|
||||
int btrfs_leaf_free_space(struct extent_buffer *leaf);
|
||||
int __must_check btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv,
|
||||
int update_ref, int for_reloc);
|
||||
int __must_check btrfs_drop_snapshot(struct btrfs_root *root, int update_ref,
|
||||
int for_reloc);
|
||||
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct extent_buffer *node,
|
||||
@ -2695,23 +2688,6 @@ static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
|
||||
return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
|
||||
}
|
||||
|
||||
static inline void free_fs_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
kfree(fs_info->balance_ctl);
|
||||
kfree(fs_info->delayed_root);
|
||||
kfree(fs_info->extent_root);
|
||||
kfree(fs_info->tree_root);
|
||||
kfree(fs_info->chunk_root);
|
||||
kfree(fs_info->dev_root);
|
||||
kfree(fs_info->csum_root);
|
||||
kfree(fs_info->quota_root);
|
||||
kfree(fs_info->uuid_root);
|
||||
kfree(fs_info->free_space_root);
|
||||
kfree(fs_info->super_copy);
|
||||
kfree(fs_info->super_for_commit);
|
||||
kvfree(fs_info);
|
||||
}
|
||||
|
||||
/* tree mod log functions from ctree.c */
|
||||
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
|
||||
struct seq_list *elem);
|
||||
@ -2750,9 +2726,7 @@ int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
|
||||
u64 subid);
|
||||
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
|
||||
u64 subid);
|
||||
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
|
||||
int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
|
||||
u64));
|
||||
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/* dir-item.c */
|
||||
int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
|
||||
@ -2859,6 +2833,12 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
struct btrfs_file_extent_item *fi,
|
||||
const bool new_inline,
|
||||
struct extent_map *em);
|
||||
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 len);
|
||||
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 len);
|
||||
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size);
|
||||
u64 btrfs_file_extent_end(const struct btrfs_path *path);
|
||||
|
||||
/* inode.c */
|
||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
@ -2996,9 +2976,6 @@ int btrfs_dirty_pages(struct inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos, size_t write_bytes,
|
||||
struct extent_state **cached);
|
||||
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
|
||||
loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out,
|
||||
loff_t len, unsigned int remap_flags);
|
||||
|
||||
/* tree-defrag.c */
|
||||
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||
@ -3008,6 +2985,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||
int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
unsigned long new_flags);
|
||||
int btrfs_sync_fs(struct super_block *sb, int wait);
|
||||
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
u64 subvol_objectid);
|
||||
|
||||
static inline __printf(2, 3) __cold
|
||||
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
|
||||
@ -3401,6 +3380,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending,
|
||||
u64 *bytes_to_reserve);
|
||||
int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_pending_snapshot *pending);
|
||||
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/* scrub.c */
|
||||
int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
|
@ -9,6 +9,108 @@
|
||||
#include "qgroup.h"
|
||||
#include "block-group.h"
|
||||
|
||||
/*
|
||||
* HOW DOES THIS WORK
|
||||
*
|
||||
* There are two stages to data reservations, one for data and one for metadata
|
||||
* to handle the new extents and checksums generated by writing data.
|
||||
*
|
||||
*
|
||||
* DATA RESERVATION
|
||||
* The general flow of the data reservation is as follows
|
||||
*
|
||||
* -> Reserve
|
||||
* We call into btrfs_reserve_data_bytes() for the user request bytes that
|
||||
* they wish to write. We make this reservation and add it to
|
||||
* space_info->bytes_may_use. We set EXTENT_DELALLOC on the inode io_tree
|
||||
* for the range and carry on if this is buffered, or follow up trying to
|
||||
* make a real allocation if we are pre-allocating or doing O_DIRECT.
|
||||
*
|
||||
* -> Use
|
||||
* At writepages()/prealloc/O_DIRECT time we will call into
|
||||
* btrfs_reserve_extent() for some part or all of this range of bytes. We
|
||||
* will make the allocation and subtract space_info->bytes_may_use by the
|
||||
* original requested length and increase the space_info->bytes_reserved by
|
||||
* the allocated length. This distinction is important because compression
|
||||
* may allocate a smaller on disk extent than we previously reserved.
|
||||
*
|
||||
* -> Allocation
|
||||
* finish_ordered_io() will insert the new file extent item for this range,
|
||||
* and then add a delayed ref update for the extent tree. Once that delayed
|
||||
* ref is written the extent size is subtracted from
|
||||
* space_info->bytes_reserved and added to space_info->bytes_used.
|
||||
*
|
||||
* Error handling
|
||||
*
|
||||
* -> By the reservation maker
|
||||
* This is the simplest case, we haven't completed our operation and we know
|
||||
* how much we reserved, we can simply call
|
||||
* btrfs_free_reserved_data_space*() and it will be removed from
|
||||
* space_info->bytes_may_use.
|
||||
*
|
||||
* -> After the reservation has been made, but before cow_file_range()
|
||||
* This is specifically for the delalloc case. You must clear
|
||||
* EXTENT_DELALLOC with the EXTENT_CLEAR_DATA_RESV bit, and the range will
|
||||
* be subtracted from space_info->bytes_may_use.
|
||||
*
|
||||
* METADATA RESERVATION
|
||||
* The general metadata reservation lifetimes are discussed elsewhere, this
|
||||
* will just focus on how it is used for delalloc space.
|
||||
*
|
||||
* We keep track of two things on a per inode bases
|
||||
*
|
||||
* ->outstanding_extents
|
||||
* This is the number of file extent items we'll need to handle all of the
|
||||
* outstanding DELALLOC space we have in this inode. We limit the maximum
|
||||
* size of an extent, so a large contiguous dirty area may require more than
|
||||
* one outstanding_extent, which is why count_max_extents() is used to
|
||||
* determine how many outstanding_extents get added.
|
||||
*
|
||||
* ->csum_bytes
|
||||
* This is essentially how many dirty bytes we have for this inode, so we
|
||||
* can calculate the number of checksum items we would have to add in order
|
||||
* to checksum our outstanding data.
|
||||
*
|
||||
* We keep a per-inode block_rsv in order to make it easier to keep track of
|
||||
* our reservation. We use btrfs_calculate_inode_block_rsv_size() to
|
||||
* calculate the current theoretical maximum reservation we would need for the
|
||||
* metadata for this inode. We call this and then adjust our reservation as
|
||||
* necessary, either by attempting to reserve more space, or freeing up excess
|
||||
* space.
|
||||
*
|
||||
* OUTSTANDING_EXTENTS HANDLING
|
||||
*
|
||||
* ->outstanding_extents is used for keeping track of how many extents we will
|
||||
* need to use for this inode, and it will fluctuate depending on where you are
|
||||
* in the life cycle of the dirty data. Consider the following normal case for
|
||||
* a completely clean inode, with a num_bytes < our maximum allowed extent size
|
||||
*
|
||||
* -> reserve
|
||||
* ->outstanding_extents += 1 (current value is 1)
|
||||
*
|
||||
* -> set_delalloc
|
||||
* ->outstanding_extents += 1 (currrent value is 2)
|
||||
*
|
||||
* -> btrfs_delalloc_release_extents()
|
||||
* ->outstanding_extents -= 1 (current value is 1)
|
||||
*
|
||||
* We must call this once we are done, as we hold our reservation for the
|
||||
* duration of our operation, and then assume set_delalloc will update the
|
||||
* counter appropriately.
|
||||
*
|
||||
* -> add ordered extent
|
||||
* ->outstanding_extents += 1 (current value is 2)
|
||||
*
|
||||
* -> btrfs_clear_delalloc_extent
|
||||
* ->outstanding_extents -= 1 (current value is 1)
|
||||
*
|
||||
* -> finish_ordered_io/btrfs_remove_ordered_extent
|
||||
* ->outstanding_extents -= 1 (current value is 0)
|
||||
*
|
||||
* Each stage is responsible for their own accounting of the extent, thus
|
||||
* making error handling and cleanup easier.
|
||||
*/
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
@ -228,8 +330,8 @@ static void btrfs_inode_rsv_release(struct btrfs_inode *inode, bool qgroup_free)
|
||||
* are releasing 0 bytes, and then we'll just get the reservation over
|
||||
* the size free'd.
|
||||
*/
|
||||
released = __btrfs_block_rsv_release(fs_info, block_rsv, 0,
|
||||
&qgroup_to_release);
|
||||
released = btrfs_block_rsv_release(fs_info, block_rsv, 0,
|
||||
&qgroup_to_release);
|
||||
if (released > 0)
|
||||
trace_btrfs_space_reservation(fs_info, "delalloc",
|
||||
btrfs_ino(inode), released, 0);
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "misc.h"
|
||||
#include "delayed-inode.h"
|
||||
#include "disk-io.h"
|
||||
@ -595,8 +596,7 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item",
|
||||
item->key.objectid, item->bytes_reserved,
|
||||
0);
|
||||
btrfs_block_rsv_release(fs_info, rsv,
|
||||
item->bytes_reserved);
|
||||
btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
|
||||
}
|
||||
|
||||
static int btrfs_delayed_inode_reserve_metadata(
|
||||
@ -677,8 +677,7 @@ static void btrfs_delayed_inode_release_metadata(struct btrfs_fs_info *fs_info,
|
||||
rsv = &fs_info->delayed_block_rsv;
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_inode",
|
||||
node->inode_id, node->bytes_reserved, 0);
|
||||
btrfs_block_rsv_release(fs_info, rsv,
|
||||
node->bytes_reserved);
|
||||
btrfs_block_rsv_release(fs_info, rsv, node->bytes_reserved, NULL);
|
||||
if (qgroup_free)
|
||||
btrfs_qgroup_free_meta_prealloc(node->root,
|
||||
node->bytes_reserved);
|
||||
@ -805,11 +804,14 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_item *delayed_item)
|
||||
{
|
||||
struct extent_buffer *leaf;
|
||||
unsigned int nofs_flag;
|
||||
char *ptr;
|
||||
int ret;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
ret = btrfs_insert_empty_item(trans, root, path, &delayed_item->key,
|
||||
delayed_item->data_len);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (ret < 0 && ret != -EEXIST)
|
||||
return ret;
|
||||
|
||||
@ -937,6 +939,7 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_node *node)
|
||||
{
|
||||
struct btrfs_delayed_item *curr, *prev;
|
||||
unsigned int nofs_flag;
|
||||
int ret = 0;
|
||||
|
||||
do_again:
|
||||
@ -945,7 +948,9 @@ do_again:
|
||||
if (!curr)
|
||||
goto delete_fail;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
ret = btrfs_search_slot(trans, root, &curr->key, path, -1, 1);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (ret < 0)
|
||||
goto delete_fail;
|
||||
else if (ret > 0) {
|
||||
@ -1012,6 +1017,7 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_key key;
|
||||
struct btrfs_inode_item *inode_item;
|
||||
struct extent_buffer *leaf;
|
||||
unsigned int nofs_flag;
|
||||
int mod;
|
||||
int ret;
|
||||
|
||||
@ -1024,7 +1030,9 @@ static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
|
||||
else
|
||||
mod = 1;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
ret = btrfs_lookup_inode(trans, root, path, &key, mod);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (ret > 0) {
|
||||
btrfs_release_path(path);
|
||||
return -ENOENT;
|
||||
@ -1075,7 +1083,10 @@ search:
|
||||
|
||||
key.type = BTRFS_INODE_EXTREF_KEY;
|
||||
key.offset = -1;
|
||||
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (ret < 0)
|
||||
goto err_out;
|
||||
ASSERT(ret);
|
||||
@ -1139,7 +1150,7 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, int nr)
|
||||
int ret = 0;
|
||||
bool count = (nr > 0);
|
||||
|
||||
if (trans->aborted)
|
||||
if (TRANS_ABORTED(trans))
|
||||
return -EIO;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
@ -1760,6 +1771,7 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
|
||||
|
||||
int btrfs_fill_inode(struct inode *inode, u32 *rdev)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
struct btrfs_delayed_node *delayed_node;
|
||||
struct btrfs_inode_item *inode_item;
|
||||
|
||||
@ -1779,6 +1791,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
|
||||
i_uid_write(inode, btrfs_stack_inode_uid(inode_item));
|
||||
i_gid_write(inode, btrfs_stack_inode_gid(inode_item));
|
||||
btrfs_i_size_write(BTRFS_I(inode), btrfs_stack_inode_size(inode_item));
|
||||
btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
|
||||
round_up(i_size_read(inode), fs_info->sectorsize));
|
||||
inode->i_mode = btrfs_stack_inode_mode(inode_item);
|
||||
set_nlink(inode, btrfs_stack_inode_nlink(inode_item));
|
||||
inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
|
||||
|
@ -70,7 +70,7 @@ struct btrfs_delayed_item {
|
||||
refcount_t refs;
|
||||
int ins_or_del;
|
||||
u32 data_len;
|
||||
char data[0];
|
||||
char data[];
|
||||
};
|
||||
|
||||
static inline void btrfs_init_delayed_root(
|
||||
|
@ -82,8 +82,7 @@ void btrfs_delayed_refs_rsv_release(struct btrfs_fs_info *fs_info, int nr)
|
||||
u64 num_bytes = btrfs_calc_insert_metadata_size(fs_info, nr);
|
||||
u64 released = 0;
|
||||
|
||||
released = __btrfs_block_rsv_release(fs_info, block_rsv, num_bytes,
|
||||
NULL);
|
||||
released = btrfs_block_rsv_release(fs_info, block_rsv, num_bytes, NULL);
|
||||
if (released)
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
|
||||
0, released, 0);
|
||||
|
@ -22,6 +22,46 @@
|
||||
#include "dev-replace.h"
|
||||
#include "sysfs.h"
|
||||
|
||||
/*
|
||||
* Device replace overview
|
||||
*
|
||||
* [Objective]
|
||||
* To copy all extents (both new and on-disk) from source device to target
|
||||
* device, while still keeping the filesystem read-write.
|
||||
*
|
||||
* [Method]
|
||||
* There are two main methods involved:
|
||||
*
|
||||
* - Write duplication
|
||||
*
|
||||
* All new writes will be written to both target and source devices, so even
|
||||
* if replace gets canceled, sources device still contans up-to-date data.
|
||||
*
|
||||
* Location: handle_ops_on_dev_replace() from __btrfs_map_block()
|
||||
* Start: btrfs_dev_replace_start()
|
||||
* End: btrfs_dev_replace_finishing()
|
||||
* Content: Latest data/metadata
|
||||
*
|
||||
* - Copy existing extents
|
||||
*
|
||||
* This happens by re-using scrub facility, as scrub also iterates through
|
||||
* existing extents from commit root.
|
||||
*
|
||||
* Location: scrub_write_block_to_dev_replace() from
|
||||
* scrub_block_complete()
|
||||
* Content: Data/meta from commit root.
|
||||
*
|
||||
* Due to the content difference, we need to avoid nocow write when dev-replace
|
||||
* is happening. This is done by marking the block group read-only and waiting
|
||||
* for NOCOW writes.
|
||||
*
|
||||
* After replace is done, the finishing part is done by swapping the target and
|
||||
* source devices.
|
||||
*
|
||||
* Location: btrfs_dev_replace_update_device_in_mapping_tree() from
|
||||
* btrfs_dev_replace_finishing()
|
||||
*/
|
||||
|
||||
static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
int scrub_ret);
|
||||
static void btrfs_dev_replace_update_device_in_mapping_tree(
|
||||
@ -472,7 +512,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
|
||||
up_write(&dev_replace->rwsem);
|
||||
|
||||
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
|
||||
ret = btrfs_sysfs_add_devices_dir(tgt_device->fs_devices, tgt_device);
|
||||
if (ret)
|
||||
btrfs_err(fs_info, "kobj add dev failed %d", ret);
|
||||
|
||||
@ -703,7 +743,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
|
||||
/* replace the sysfs entry */
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
|
||||
btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, src_device);
|
||||
btrfs_sysfs_update_devid(tgt_device);
|
||||
btrfs_rm_dev_replace_free_srcdev(src_device);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -39,6 +39,8 @@ static inline u64 btrfs_sb_offset(int mirror)
|
||||
struct btrfs_device;
|
||||
struct btrfs_fs_devices;
|
||||
|
||||
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||
struct btrfs_key *first_key, u64 parent_transid);
|
||||
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
@ -54,15 +56,12 @@ int __cold open_ctree(struct super_block *sb,
|
||||
char *options);
|
||||
void __cold close_ctree(struct btrfs_fs_info *fs_info);
|
||||
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
|
||||
struct buffer_head *btrfs_read_dev_super(struct block_device *bdev);
|
||||
int btrfs_read_dev_one_super(struct block_device *bdev, int copy_num,
|
||||
struct buffer_head **bh_ret);
|
||||
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
|
||||
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
int copy_num);
|
||||
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
|
||||
struct btrfs_key *location);
|
||||
int btrfs_init_fs_root(struct btrfs_root *root);
|
||||
struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
|
||||
u64 root_id);
|
||||
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
|
||||
struct btrfs_key *key);
|
||||
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
|
||||
@ -70,19 +69,13 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_key *key,
|
||||
bool check_ref);
|
||||
static inline struct btrfs_root *
|
||||
btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_key *location)
|
||||
{
|
||||
return btrfs_get_fs_root(fs_info, location, true);
|
||||
}
|
||||
|
||||
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_free_fs_root(struct btrfs_root *root);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
||||
@ -95,19 +88,16 @@ struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
||||
* If you want to ensure the whole tree is safe, you should use
|
||||
* fs_info->subvol_srcu
|
||||
*/
|
||||
static inline struct btrfs_root *btrfs_grab_fs_root(struct btrfs_root *root)
|
||||
static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
||||
{
|
||||
if (!root)
|
||||
return NULL;
|
||||
if (refcount_inc_not_zero(&root->refs))
|
||||
return root;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void btrfs_put_fs_root(struct btrfs_root *root)
|
||||
{
|
||||
if (refcount_dec_and_test(&root->refs))
|
||||
kfree(root);
|
||||
}
|
||||
|
||||
void btrfs_put_root(struct btrfs_root *root);
|
||||
void btrfs_mark_buffer_dirty(struct extent_buffer *buf);
|
||||
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
||||
int atomic);
|
||||
|
@ -57,16 +57,14 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
|
||||
return type;
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
u64 root_objectid, u32 generation,
|
||||
int check_generation)
|
||||
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
u64 root_objectid, u32 generation,
|
||||
int check_generation)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
struct btrfs_root *root;
|
||||
struct inode *inode;
|
||||
struct btrfs_key key;
|
||||
int index;
|
||||
int err = 0;
|
||||
|
||||
if (objectid < BTRFS_FIRST_FREE_OBJECTID)
|
||||
return ERR_PTR(-ESTALE);
|
||||
@ -75,25 +73,18 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
index = srcu_read_lock(&fs_info->subvol_srcu);
|
||||
|
||||
root = btrfs_read_fs_root_no_name(fs_info, &key);
|
||||
if (IS_ERR(root)) {
|
||||
err = PTR_ERR(root);
|
||||
goto fail;
|
||||
}
|
||||
root = btrfs_get_fs_root(fs_info, &key, true);
|
||||
if (IS_ERR(root))
|
||||
return ERR_CAST(root);
|
||||
|
||||
key.objectid = objectid;
|
||||
key.type = BTRFS_INODE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
inode = btrfs_iget(sb, &key, root);
|
||||
if (IS_ERR(inode)) {
|
||||
err = PTR_ERR(inode);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
btrfs_put_root(root);
|
||||
if (IS_ERR(inode))
|
||||
return ERR_CAST(inode);
|
||||
|
||||
if (check_generation && generation != inode->i_generation) {
|
||||
iput(inode);
|
||||
@ -101,9 +92,6 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
}
|
||||
|
||||
return d_obtain_alias(inode);
|
||||
fail:
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_fh_to_parent(struct super_block *sb, struct fid *fh,
|
||||
@ -152,7 +140,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
||||
return btrfs_get_dentry(sb, objectid, root_objectid, generation, 1);
|
||||
}
|
||||
|
||||
static struct dentry *btrfs_get_parent(struct dentry *child)
|
||||
struct dentry *btrfs_get_parent(struct dentry *child)
|
||||
{
|
||||
struct inode *dir = d_inode(child);
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
|
||||
|
@ -18,4 +18,9 @@ struct btrfs_fid {
|
||||
u64 parent_root_objectid;
|
||||
} __attribute__ ((packed));
|
||||
|
||||
struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
u64 root_objectid, u32 generation,
|
||||
int check_generation);
|
||||
struct dentry *btrfs_get_parent(struct dentry *child);
|
||||
|
||||
#endif
|
||||
|
@ -36,13 +36,14 @@ struct io_failure_record;
|
||||
#define CHUNK_TRIMMED EXTENT_DEFRAG
|
||||
|
||||
enum {
|
||||
IO_TREE_FS_INFO_FREED_EXTENTS0,
|
||||
IO_TREE_FS_INFO_FREED_EXTENTS1,
|
||||
IO_TREE_FS_PINNED_EXTENTS,
|
||||
IO_TREE_FS_EXCLUDED_EXTENTS,
|
||||
IO_TREE_INODE_IO,
|
||||
IO_TREE_INODE_IO_FAILURE,
|
||||
IO_TREE_RELOC_BLOCKS,
|
||||
IO_TREE_TRANS_DIRTY_PAGES,
|
||||
IO_TREE_ROOT_DIRTY_LOG_PAGES,
|
||||
IO_TREE_INODE_FILE_EXTENT,
|
||||
IO_TREE_SELFTEST,
|
||||
};
|
||||
|
||||
@ -222,6 +223,8 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
struct extent_state **cached_state);
|
||||
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, unsigned bits);
|
||||
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, unsigned bits);
|
||||
int extent_invalidatepage(struct extent_io_tree *tree,
|
||||
struct page *page, unsigned long offset);
|
||||
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
|
||||
|
@ -64,10 +64,8 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 num_bytes)
|
||||
{
|
||||
u64 end = start + num_bytes - 1;
|
||||
set_extent_bits(&fs_info->freed_extents[0],
|
||||
start, end, EXTENT_UPTODATE);
|
||||
set_extent_bits(&fs_info->freed_extents[1],
|
||||
start, end, EXTENT_UPTODATE);
|
||||
set_extent_bits(&fs_info->excluded_extents, start, end,
|
||||
EXTENT_UPTODATE);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -79,10 +77,8 @@ void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
|
||||
start = cache->start;
|
||||
end = start + cache->length - 1;
|
||||
|
||||
clear_extent_bits(&fs_info->freed_extents[0],
|
||||
start, end, EXTENT_UPTODATE);
|
||||
clear_extent_bits(&fs_info->freed_extents[1],
|
||||
start, end, EXTENT_UPTODATE);
|
||||
clear_extent_bits(&fs_info->excluded_extents, start, end,
|
||||
EXTENT_UPTODATE);
|
||||
}
|
||||
|
||||
static u64 generic_ref_to_space_flags(struct btrfs_ref *ref)
|
||||
@ -1193,24 +1189,6 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int insert_extent_backref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
u64 bytenr, u64 parent, u64 root_objectid,
|
||||
u64 owner, u64 offset, int refs_to_add)
|
||||
{
|
||||
int ret;
|
||||
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
||||
BUG_ON(refs_to_add != 1);
|
||||
ret = insert_tree_block_ref(trans, path, bytenr, parent,
|
||||
root_objectid);
|
||||
} else {
|
||||
ret = insert_extent_data_ref(trans, path, bytenr, parent,
|
||||
root_objectid, owner, offset,
|
||||
refs_to_add);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int remove_extent_backref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_extent_inline_ref *iref,
|
||||
@ -1469,7 +1447,6 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
path->reada = READA_FORWARD;
|
||||
path->leave_spinning = 1;
|
||||
/* this will setup the path even if it fails to insert the back ref */
|
||||
ret = insert_inline_extent_backref(trans, path, bytenr, num_bytes,
|
||||
@ -1494,11 +1471,17 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
path->reada = READA_FORWARD;
|
||||
path->leave_spinning = 1;
|
||||
/* now insert the actual backref */
|
||||
ret = insert_extent_backref(trans, path, bytenr, parent, root_objectid,
|
||||
owner, offset, refs_to_add);
|
||||
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
|
||||
BUG_ON(refs_to_add != 1);
|
||||
ret = insert_tree_block_ref(trans, path, bytenr, parent,
|
||||
root_objectid);
|
||||
} else {
|
||||
ret = insert_extent_data_ref(trans, path, bytenr, parent,
|
||||
root_objectid, owner, offset,
|
||||
refs_to_add);
|
||||
}
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
out:
|
||||
@ -1583,7 +1566,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
int err = 0;
|
||||
int metadata = !extent_op->is_data;
|
||||
|
||||
if (trans->aborted)
|
||||
if (TRANS_ABORTED(trans))
|
||||
return 0;
|
||||
|
||||
if (metadata && !btrfs_fs_incompat(fs_info, SKINNY_METADATA))
|
||||
@ -1604,7 +1587,6 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
again:
|
||||
path->reada = READA_FORWARD;
|
||||
path->leave_spinning = 1;
|
||||
ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 1);
|
||||
if (ret < 0) {
|
||||
@ -1703,10 +1685,9 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (trans->aborted) {
|
||||
if (TRANS_ABORTED(trans)) {
|
||||
if (insert_reserved)
|
||||
btrfs_pin_extent(trans->fs_info, node->bytenr,
|
||||
node->num_bytes, 1);
|
||||
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1721,8 +1702,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
else
|
||||
BUG();
|
||||
if (ret && insert_reserved)
|
||||
btrfs_pin_extent(trans->fs_info, node->bytenr,
|
||||
node->num_bytes, 1);
|
||||
btrfs_pin_extent(trans, node->bytenr, node->num_bytes, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1867,8 +1847,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
if (head->must_insert_reserved) {
|
||||
btrfs_pin_extent(fs_info, head->bytenr,
|
||||
head->num_bytes, 1);
|
||||
btrfs_pin_extent(trans, head->bytenr, head->num_bytes, 1);
|
||||
if (head->is_data) {
|
||||
ret = btrfs_del_csums(trans, fs_info->csum_root,
|
||||
head->bytenr, head->num_bytes);
|
||||
@ -2191,7 +2170,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
int run_all = count == (unsigned long)-1;
|
||||
|
||||
/* We'll clean this up in btrfs_cleanup_transaction */
|
||||
if (trans->aborted)
|
||||
if (TRANS_ABORTED(trans))
|
||||
return 0;
|
||||
|
||||
if (test_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags))
|
||||
@ -2238,7 +2217,7 @@ out:
|
||||
}
|
||||
|
||||
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, u64 flags,
|
||||
struct extent_buffer *eb, u64 flags,
|
||||
int level, int is_data)
|
||||
{
|
||||
struct btrfs_delayed_extent_op *extent_op;
|
||||
@ -2254,7 +2233,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
|
||||
extent_op->is_data = is_data ? true : false;
|
||||
extent_op->level = level;
|
||||
|
||||
ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op);
|
||||
ret = btrfs_add_delayed_extent_op(trans, eb->start, eb->len, extent_op);
|
||||
if (ret)
|
||||
btrfs_free_delayed_extent_op(extent_op);
|
||||
return ret;
|
||||
@ -2588,7 +2567,8 @@ static u64 first_logical_byte(struct btrfs_fs_info *fs_info, u64 search_start)
|
||||
return bytenr;
|
||||
}
|
||||
|
||||
static int pin_down_extent(struct btrfs_block_group *cache,
|
||||
static int pin_down_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group *cache,
|
||||
u64 bytenr, u64 num_bytes, int reserved)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
@ -2607,22 +2587,20 @@ static int pin_down_extent(struct btrfs_block_group *cache,
|
||||
|
||||
percpu_counter_add_batch(&cache->space_info->total_bytes_pinned,
|
||||
num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH);
|
||||
set_extent_dirty(fs_info->pinned_extents, bytenr,
|
||||
set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
|
||||
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
|
||||
int btrfs_pin_extent(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, int reserved)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
|
||||
ASSERT(fs_info->running_transaction);
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
|
||||
BUG_ON(!cache); /* Logic error */
|
||||
|
||||
pin_down_extent(cache, bytenr, num_bytes, reserved);
|
||||
pin_down_extent(trans, cache, bytenr, num_bytes, reserved);
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
return 0;
|
||||
@ -2631,13 +2609,15 @@ int btrfs_pin_extent(struct btrfs_fs_info *fs_info,
|
||||
/*
|
||||
* this function must be called within transaction
|
||||
*/
|
||||
int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
|
||||
int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
int ret;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, bytenr);
|
||||
btrfs_add_excluded_extent(trans->fs_info, bytenr, num_bytes);
|
||||
|
||||
cache = btrfs_lookup_block_group(trans->fs_info, bytenr);
|
||||
if (!cache)
|
||||
return -EINVAL;
|
||||
|
||||
@ -2649,7 +2629,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
btrfs_cache_block_group(cache, 1);
|
||||
|
||||
pin_down_extent(cache, bytenr, num_bytes, 0);
|
||||
pin_down_extent(trans, cache, bytenr, num_bytes, 0);
|
||||
|
||||
/* remove us from the free space cache (if we're there at all) */
|
||||
ret = btrfs_remove_free_space(cache, bytenr, num_bytes);
|
||||
@ -2763,11 +2743,6 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
}
|
||||
|
||||
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
|
||||
fs_info->pinned_extents = &fs_info->freed_extents[1];
|
||||
else
|
||||
fs_info->pinned_extents = &fs_info->freed_extents[0];
|
||||
|
||||
up_write(&fs_info->commit_root_sem);
|
||||
|
||||
btrfs_update_global_block_rsv(fs_info);
|
||||
@ -2908,12 +2883,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
||||
u64 end;
|
||||
int ret;
|
||||
|
||||
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
|
||||
unpin = &fs_info->freed_extents[1];
|
||||
else
|
||||
unpin = &fs_info->freed_extents[0];
|
||||
unpin = &trans->transaction->pinned_extents;
|
||||
|
||||
while (!trans->aborted) {
|
||||
while (!TRANS_ABORTED(trans)) {
|
||||
struct extent_state *cached_state = NULL;
|
||||
|
||||
mutex_lock(&fs_info->unused_bg_unpin_mutex);
|
||||
@ -2923,6 +2895,9 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
break;
|
||||
}
|
||||
if (test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags))
|
||||
clear_extent_bits(&fs_info->excluded_extents, start,
|
||||
end, EXTENT_UPTODATE);
|
||||
|
||||
if (btrfs_test_opt(fs_info, DISCARD_SYNC))
|
||||
ret = btrfs_discard_extent(fs_info, start,
|
||||
@ -2950,7 +2925,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
||||
u64 trimmed = 0;
|
||||
|
||||
ret = -EROFS;
|
||||
if (!trans->aborted)
|
||||
if (!TRANS_ABORTED(trans))
|
||||
ret = btrfs_discard_extent(fs_info,
|
||||
block_group->start,
|
||||
block_group->length,
|
||||
@ -3000,7 +2975,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
path->reada = READA_FORWARD;
|
||||
path->leave_spinning = 1;
|
||||
|
||||
is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID;
|
||||
@ -3301,7 +3275,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
cache = btrfs_lookup_block_group(fs_info, buf->start);
|
||||
|
||||
if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) {
|
||||
pin_down_extent(cache, buf->start, buf->len, 1);
|
||||
pin_down_extent(trans, cache, buf->start, buf->len, 1);
|
||||
btrfs_put_block_group(cache);
|
||||
goto out;
|
||||
}
|
||||
@ -3345,7 +3319,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
|
||||
(ref->type == BTRFS_REF_DATA &&
|
||||
ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) {
|
||||
/* unlocks the pinned mutex */
|
||||
btrfs_pin_extent(fs_info, ref->bytenr, ref->len, 1);
|
||||
btrfs_pin_extent(trans, ref->bytenr, ref->len, 1);
|
||||
old_ref_mod = new_ref_mod = 0;
|
||||
ret = 0;
|
||||
} else if (ref->type == BTRFS_REF_METADATA) {
|
||||
@ -3438,6 +3412,10 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
|
||||
btrfs_put_block_group(cache);
|
||||
}
|
||||
|
||||
enum btrfs_extent_allocation_policy {
|
||||
BTRFS_EXTENT_ALLOC_CLUSTERED,
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used internally for find_free_extent() function. Wraps needed
|
||||
* parameters.
|
||||
@ -3454,6 +3432,8 @@ struct find_free_extent_ctl {
|
||||
|
||||
/* For clustered allocation */
|
||||
u64 empty_cluster;
|
||||
struct btrfs_free_cluster *last_ptr;
|
||||
bool use_cluster;
|
||||
|
||||
bool have_caching_bg;
|
||||
bool orig_have_caching_bg;
|
||||
@ -3489,6 +3469,12 @@ struct find_free_extent_ctl {
|
||||
|
||||
/* Found result */
|
||||
u64 found_offset;
|
||||
|
||||
/* Hint where to start looking for an empty space */
|
||||
u64 hint_byte;
|
||||
|
||||
/* Allocation policy */
|
||||
enum btrfs_extent_allocation_policy policy;
|
||||
};
|
||||
|
||||
|
||||
@ -3501,11 +3487,11 @@ struct find_free_extent_ctl {
|
||||
* Return 0 means we have found a location and set ffe_ctl->found_offset.
|
||||
*/
|
||||
static int find_free_extent_clustered(struct btrfs_block_group *bg,
|
||||
struct btrfs_free_cluster *last_ptr,
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_block_group **cluster_bg_ret)
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_block_group **cluster_bg_ret)
|
||||
{
|
||||
struct btrfs_block_group *cluster_bg;
|
||||
struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
|
||||
u64 aligned_cluster;
|
||||
u64 offset;
|
||||
int ret;
|
||||
@ -3605,9 +3591,9 @@ refill_cluster:
|
||||
* Return -EAGAIN to inform caller that we need to re-search this block group
|
||||
*/
|
||||
static int find_free_extent_unclustered(struct btrfs_block_group *bg,
|
||||
struct btrfs_free_cluster *last_ptr,
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
{
|
||||
struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
|
||||
u64 offset;
|
||||
|
||||
/*
|
||||
@ -3663,16 +3649,101 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int do_allocation_clustered(struct btrfs_block_group *block_group,
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_block_group **bg_ret)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* We want to try and use the cluster allocator, so lets look there */
|
||||
if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
|
||||
ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
|
||||
if (ret >= 0 || ret == -EAGAIN)
|
||||
return ret;
|
||||
/* ret == -ENOENT case falls through */
|
||||
}
|
||||
|
||||
return find_free_extent_unclustered(block_group, ffe_ctl);
|
||||
}
|
||||
|
||||
static int do_allocation(struct btrfs_block_group *block_group,
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_block_group **bg_ret)
|
||||
{
|
||||
switch (ffe_ctl->policy) {
|
||||
case BTRFS_EXTENT_ALLOC_CLUSTERED:
|
||||
return do_allocation_clustered(block_group, ffe_ctl, bg_ret);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static void release_block_group(struct btrfs_block_group *block_group,
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
int delalloc)
|
||||
{
|
||||
switch (ffe_ctl->policy) {
|
||||
case BTRFS_EXTENT_ALLOC_CLUSTERED:
|
||||
ffe_ctl->retry_clustered = false;
|
||||
ffe_ctl->retry_unclustered = false;
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
|
||||
BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
|
||||
ffe_ctl->index);
|
||||
btrfs_release_block_group(block_group, delalloc);
|
||||
}
|
||||
|
||||
static void found_extent_clustered(struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_key *ins)
|
||||
{
|
||||
struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
|
||||
|
||||
if (!ffe_ctl->use_cluster && last_ptr) {
|
||||
spin_lock(&last_ptr->lock);
|
||||
last_ptr->window_start = ins->objectid;
|
||||
spin_unlock(&last_ptr->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void found_extent(struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_key *ins)
|
||||
{
|
||||
switch (ffe_ctl->policy) {
|
||||
case BTRFS_EXTENT_ALLOC_CLUSTERED:
|
||||
found_extent_clustered(ffe_ctl, ins);
|
||||
break;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
static int chunk_allocation_failed(struct find_free_extent_ctl *ffe_ctl)
|
||||
{
|
||||
switch (ffe_ctl->policy) {
|
||||
case BTRFS_EXTENT_ALLOC_CLUSTERED:
|
||||
/*
|
||||
* If we can't allocate a new chunk we've already looped through
|
||||
* at least once, move on to the NO_EMPTY_SIZE case.
|
||||
*/
|
||||
ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
|
||||
return 0;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return >0 means caller needs to re-search for free extent
|
||||
* Return 0 means we have the needed free extent.
|
||||
* Return <0 means we failed to locate any free extent.
|
||||
*/
|
||||
static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_free_cluster *last_ptr,
|
||||
struct btrfs_key *ins,
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
int full_search, bool use_cluster)
|
||||
bool full_search)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->extent_root;
|
||||
int ret;
|
||||
@ -3689,11 +3760,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
return 1;
|
||||
|
||||
if (ins->objectid) {
|
||||
if (!use_cluster && last_ptr) {
|
||||
spin_lock(&last_ptr->lock);
|
||||
last_ptr->window_start = ins->objectid;
|
||||
spin_unlock(&last_ptr->lock);
|
||||
}
|
||||
found_extent(ffe_ctl, ins);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3739,16 +3806,10 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
ret = btrfs_chunk_alloc(trans, ffe_ctl->flags,
|
||||
CHUNK_ALLOC_FORCE);
|
||||
|
||||
/*
|
||||
* If we can't allocate a new chunk we've already looped
|
||||
* through at least once, move on to the NO_EMPTY_SIZE
|
||||
* case.
|
||||
*/
|
||||
if (ret == -ENOSPC)
|
||||
ffe_ctl->loop = LOOP_NO_EMPTY_SIZE;
|
||||
|
||||
/* Do not bail out on ENOSPC since we can do more. */
|
||||
if (ret < 0 && ret != -ENOSPC)
|
||||
if (ret == -ENOSPC)
|
||||
ret = chunk_allocation_failed(ffe_ctl);
|
||||
else if (ret < 0)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
else
|
||||
ret = 0;
|
||||
@ -3759,6 +3820,9 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
if (ffe_ctl->loop == LOOP_NO_EMPTY_SIZE) {
|
||||
if (ffe_ctl->policy != BTRFS_EXTENT_ALLOC_CLUSTERED)
|
||||
return -ENOSPC;
|
||||
|
||||
/*
|
||||
* Don't loop again if we already have no empty_size and
|
||||
* no empty_cluster.
|
||||
@ -3774,6 +3838,71 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
static int prepare_allocation_clustered(struct btrfs_fs_info *fs_info,
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_space_info *space_info,
|
||||
struct btrfs_key *ins)
|
||||
{
|
||||
/*
|
||||
* If our free space is heavily fragmented we may not be able to make
|
||||
* big contiguous allocations, so instead of doing the expensive search
|
||||
* for free space, simply return ENOSPC with our max_extent_size so we
|
||||
* can go ahead and search for a more manageable chunk.
|
||||
*
|
||||
* If our max_extent_size is large enough for our allocation simply
|
||||
* disable clustering since we will likely not be able to find enough
|
||||
* space to create a cluster and induce latency trying.
|
||||
*/
|
||||
if (space_info->max_extent_size) {
|
||||
spin_lock(&space_info->lock);
|
||||
if (space_info->max_extent_size &&
|
||||
ffe_ctl->num_bytes > space_info->max_extent_size) {
|
||||
ins->offset = space_info->max_extent_size;
|
||||
spin_unlock(&space_info->lock);
|
||||
return -ENOSPC;
|
||||
} else if (space_info->max_extent_size) {
|
||||
ffe_ctl->use_cluster = false;
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
|
||||
ffe_ctl->last_ptr = fetch_cluster_info(fs_info, space_info,
|
||||
&ffe_ctl->empty_cluster);
|
||||
if (ffe_ctl->last_ptr) {
|
||||
struct btrfs_free_cluster *last_ptr = ffe_ctl->last_ptr;
|
||||
|
||||
spin_lock(&last_ptr->lock);
|
||||
if (last_ptr->block_group)
|
||||
ffe_ctl->hint_byte = last_ptr->window_start;
|
||||
if (last_ptr->fragmented) {
|
||||
/*
|
||||
* We still set window_start so we can keep track of the
|
||||
* last place we found an allocation to try and save
|
||||
* some time.
|
||||
*/
|
||||
ffe_ctl->hint_byte = last_ptr->window_start;
|
||||
ffe_ctl->use_cluster = false;
|
||||
}
|
||||
spin_unlock(&last_ptr->lock);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prepare_allocation(struct btrfs_fs_info *fs_info,
|
||||
struct find_free_extent_ctl *ffe_ctl,
|
||||
struct btrfs_space_info *space_info,
|
||||
struct btrfs_key *ins)
|
||||
{
|
||||
switch (ffe_ctl->policy) {
|
||||
case BTRFS_EXTENT_ALLOC_CLUSTERED:
|
||||
return prepare_allocation_clustered(fs_info, ffe_ctl,
|
||||
space_info, ins);
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* walks the btree of allocated extents and find a hole of a given size.
|
||||
* The key ins is changed to record the hole:
|
||||
@ -3801,16 +3930,14 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 ram_bytes, u64 num_bytes, u64 empty_size,
|
||||
u64 hint_byte, struct btrfs_key *ins,
|
||||
u64 hint_byte_orig, struct btrfs_key *ins,
|
||||
u64 flags, int delalloc)
|
||||
{
|
||||
int ret = 0;
|
||||
int cache_block_group_error = 0;
|
||||
struct btrfs_free_cluster *last_ptr = NULL;
|
||||
struct btrfs_block_group *block_group = NULL;
|
||||
struct find_free_extent_ctl ffe_ctl = {0};
|
||||
struct btrfs_space_info *space_info;
|
||||
bool use_cluster = true;
|
||||
bool full_search = false;
|
||||
|
||||
WARN_ON(num_bytes < fs_info->sectorsize);
|
||||
@ -3819,13 +3946,19 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
||||
ffe_ctl.empty_size = empty_size;
|
||||
ffe_ctl.flags = flags;
|
||||
ffe_ctl.search_start = 0;
|
||||
ffe_ctl.retry_clustered = false;
|
||||
ffe_ctl.retry_unclustered = false;
|
||||
ffe_ctl.delalloc = delalloc;
|
||||
ffe_ctl.index = btrfs_bg_flags_to_raid_index(flags);
|
||||
ffe_ctl.have_caching_bg = false;
|
||||
ffe_ctl.orig_have_caching_bg = false;
|
||||
ffe_ctl.found_offset = 0;
|
||||
ffe_ctl.hint_byte = hint_byte_orig;
|
||||
ffe_ctl.policy = BTRFS_EXTENT_ALLOC_CLUSTERED;
|
||||
|
||||
/* For clustered allocation */
|
||||
ffe_ctl.retry_clustered = false;
|
||||
ffe_ctl.retry_unclustered = false;
|
||||
ffe_ctl.last_ptr = NULL;
|
||||
ffe_ctl.use_cluster = true;
|
||||
|
||||
ins->type = BTRFS_EXTENT_ITEM_KEY;
|
||||
ins->objectid = 0;
|
||||
@ -3839,51 +3972,14 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/*
|
||||
* If our free space is heavily fragmented we may not be able to make
|
||||
* big contiguous allocations, so instead of doing the expensive search
|
||||
* for free space, simply return ENOSPC with our max_extent_size so we
|
||||
* can go ahead and search for a more manageable chunk.
|
||||
*
|
||||
* If our max_extent_size is large enough for our allocation simply
|
||||
* disable clustering since we will likely not be able to find enough
|
||||
* space to create a cluster and induce latency trying.
|
||||
*/
|
||||
if (unlikely(space_info->max_extent_size)) {
|
||||
spin_lock(&space_info->lock);
|
||||
if (space_info->max_extent_size &&
|
||||
num_bytes > space_info->max_extent_size) {
|
||||
ins->offset = space_info->max_extent_size;
|
||||
spin_unlock(&space_info->lock);
|
||||
return -ENOSPC;
|
||||
} else if (space_info->max_extent_size) {
|
||||
use_cluster = false;
|
||||
}
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
|
||||
last_ptr = fetch_cluster_info(fs_info, space_info,
|
||||
&ffe_ctl.empty_cluster);
|
||||
if (last_ptr) {
|
||||
spin_lock(&last_ptr->lock);
|
||||
if (last_ptr->block_group)
|
||||
hint_byte = last_ptr->window_start;
|
||||
if (last_ptr->fragmented) {
|
||||
/*
|
||||
* We still set window_start so we can keep track of the
|
||||
* last place we found an allocation to try and save
|
||||
* some time.
|
||||
*/
|
||||
hint_byte = last_ptr->window_start;
|
||||
use_cluster = false;
|
||||
}
|
||||
spin_unlock(&last_ptr->lock);
|
||||
}
|
||||
ret = prepare_allocation(fs_info, &ffe_ctl, space_info, ins);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ffe_ctl.search_start = max(ffe_ctl.search_start,
|
||||
first_logical_byte(fs_info, 0));
|
||||
ffe_ctl.search_start = max(ffe_ctl.search_start, hint_byte);
|
||||
if (ffe_ctl.search_start == hint_byte) {
|
||||
ffe_ctl.search_start = max(ffe_ctl.search_start, ffe_ctl.hint_byte);
|
||||
if (ffe_ctl.search_start == ffe_ctl.hint_byte) {
|
||||
block_group = btrfs_lookup_block_group(fs_info,
|
||||
ffe_ctl.search_start);
|
||||
/*
|
||||
@ -3924,6 +4020,8 @@ search:
|
||||
down_read(&space_info->groups_sem);
|
||||
list_for_each_entry(block_group,
|
||||
&space_info->block_groups[ffe_ctl.index], list) {
|
||||
struct btrfs_block_group *bg_ret;
|
||||
|
||||
/* If the block group is read-only, we can skip it entirely. */
|
||||
if (unlikely(block_group->ro))
|
||||
continue;
|
||||
@ -3984,39 +4082,20 @@ have_block_group:
|
||||
if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
|
||||
goto loop;
|
||||
|
||||
/*
|
||||
* Ok we want to try and use the cluster allocator, so
|
||||
* lets look there
|
||||
*/
|
||||
if (last_ptr && use_cluster) {
|
||||
struct btrfs_block_group *cluster_bg = NULL;
|
||||
|
||||
ret = find_free_extent_clustered(block_group, last_ptr,
|
||||
&ffe_ctl, &cluster_bg);
|
||||
|
||||
if (ret == 0) {
|
||||
if (cluster_bg && cluster_bg != block_group) {
|
||||
btrfs_release_block_group(block_group,
|
||||
delalloc);
|
||||
block_group = cluster_bg;
|
||||
}
|
||||
goto checks;
|
||||
} else if (ret == -EAGAIN) {
|
||||
goto have_block_group;
|
||||
} else if (ret > 0) {
|
||||
goto loop;
|
||||
bg_ret = NULL;
|
||||
ret = do_allocation(block_group, &ffe_ctl, &bg_ret);
|
||||
if (ret == 0) {
|
||||
if (bg_ret && bg_ret != block_group) {
|
||||
btrfs_release_block_group(block_group, delalloc);
|
||||
block_group = bg_ret;
|
||||
}
|
||||
/* ret == -ENOENT case falls through */
|
||||
} else if (ret == -EAGAIN) {
|
||||
goto have_block_group;
|
||||
} else if (ret > 0) {
|
||||
goto loop;
|
||||
}
|
||||
|
||||
ret = find_free_extent_unclustered(block_group, last_ptr,
|
||||
&ffe_ctl);
|
||||
if (ret == -EAGAIN)
|
||||
goto have_block_group;
|
||||
else if (ret > 0)
|
||||
goto loop;
|
||||
/* ret == 0 case falls through */
|
||||
checks:
|
||||
/* Checks */
|
||||
ffe_ctl.search_start = round_up(ffe_ctl.found_offset,
|
||||
fs_info->stripesize);
|
||||
|
||||
@ -4050,17 +4129,12 @@ checks:
|
||||
btrfs_release_block_group(block_group, delalloc);
|
||||
break;
|
||||
loop:
|
||||
ffe_ctl.retry_clustered = false;
|
||||
ffe_ctl.retry_unclustered = false;
|
||||
BUG_ON(btrfs_bg_flags_to_raid_index(block_group->flags) !=
|
||||
ffe_ctl.index);
|
||||
btrfs_release_block_group(block_group, delalloc);
|
||||
release_block_group(block_group, &ffe_ctl, delalloc);
|
||||
cond_resched();
|
||||
}
|
||||
up_read(&space_info->groups_sem);
|
||||
|
||||
ret = find_free_extent_update_loop(fs_info, last_ptr, ins, &ffe_ctl,
|
||||
full_search, use_cluster);
|
||||
ret = find_free_extent_update_loop(fs_info, ins, &ffe_ctl, full_search);
|
||||
if (ret > 0)
|
||||
goto search;
|
||||
|
||||
@ -4189,18 +4263,20 @@ int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
|
||||
int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
|
||||
u64 len)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
int ret = 0;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, start);
|
||||
cache = btrfs_lookup_block_group(trans->fs_info, start);
|
||||
if (!cache) {
|
||||
btrfs_err(fs_info, "unable to find block group for %llu", start);
|
||||
btrfs_err(trans->fs_info, "unable to find block group for %llu",
|
||||
start);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
ret = pin_down_extent(cache, start, len, 1);
|
||||
ret = pin_down_extent(trans, cache, start, len, 1);
|
||||
btrfs_put_block_group(cache);
|
||||
return ret;
|
||||
}
|
||||
@ -4431,7 +4507,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
|
||||
ret = alloc_reserved_file_extent(trans, 0, root_objectid, 0, owner,
|
||||
offset, ins, 1);
|
||||
if (ret)
|
||||
btrfs_pin_extent(fs_info, ins->objectid, ins->offset, 1);
|
||||
btrfs_pin_extent(trans, ins->objectid, ins->offset, 1);
|
||||
btrfs_put_block_group(block_group);
|
||||
return ret;
|
||||
}
|
||||
@ -4750,8 +4826,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
ret = btrfs_dec_ref(trans, root, eb, 0);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
ret = btrfs_set_disk_extent_flags(trans, eb->start,
|
||||
eb->len, flag,
|
||||
ret = btrfs_set_disk_extent_flags(trans, eb, flag,
|
||||
btrfs_header_level(eb), 0);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
wc->flags[level] |= flag;
|
||||
@ -5209,9 +5284,7 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
|
||||
*
|
||||
* If called with for_reloc == 0, may exit early with -EAGAIN
|
||||
*/
|
||||
int btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
struct btrfs_block_rsv *block_rsv, int update_ref,
|
||||
int for_reloc)
|
||||
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_path *path;
|
||||
@ -5250,9 +5323,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
if (err)
|
||||
goto out_end_trans;
|
||||
|
||||
if (block_rsv)
|
||||
trans->block_rsv = block_rsv;
|
||||
|
||||
/*
|
||||
* This will help us catch people modifying the fs tree while we're
|
||||
* dropping it. It is unsafe to mess with the fs tree while it's being
|
||||
@ -5380,8 +5450,6 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
err = PTR_ERR(trans);
|
||||
goto out_free;
|
||||
}
|
||||
if (block_rsv)
|
||||
trans->block_rsv = block_rsv;
|
||||
}
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
@ -5413,13 +5481,10 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
}
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
|
||||
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
|
||||
btrfs_add_dropped_root(trans, root);
|
||||
} else {
|
||||
free_extent_buffer(root->node);
|
||||
free_extent_buffer(root->commit_root);
|
||||
btrfs_put_fs_root(root);
|
||||
}
|
||||
else
|
||||
btrfs_put_root(root);
|
||||
root_dropped = true;
|
||||
out_end_trans:
|
||||
btrfs_end_transaction_throttle(trans);
|
||||
@ -5749,47 +5814,3 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
|
||||
return bg_ret;
|
||||
return dev_ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* btrfs_{start,end}_write_no_snapshotting() are similar to
|
||||
* mnt_{want,drop}_write(), they are used to prevent some tasks from writing
|
||||
* data into the page cache through nocow before the subvolume is snapshoted,
|
||||
* but flush the data into disk after the snapshot creation, or to prevent
|
||||
* operations while snapshotting is ongoing and that cause the snapshot to be
|
||||
* inconsistent (writes followed by expanding truncates for example).
|
||||
*/
|
||||
void btrfs_end_write_no_snapshotting(struct btrfs_root *root)
|
||||
{
|
||||
percpu_counter_dec(&root->subv_writers->counter);
|
||||
cond_wake_up(&root->subv_writers->wait);
|
||||
}
|
||||
|
||||
int btrfs_start_write_no_snapshotting(struct btrfs_root *root)
|
||||
{
|
||||
if (atomic_read(&root->will_be_snapshotted))
|
||||
return 0;
|
||||
|
||||
percpu_counter_inc(&root->subv_writers->counter);
|
||||
/*
|
||||
* Make sure counter is updated before we check for snapshot creation.
|
||||
*/
|
||||
smp_mb();
|
||||
if (atomic_read(&root->will_be_snapshotted)) {
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void btrfs_wait_for_snapshot_creation(struct btrfs_root *root)
|
||||
{
|
||||
while (true) {
|
||||
int ret;
|
||||
|
||||
ret = btrfs_start_write_no_snapshotting(root);
|
||||
if (ret)
|
||||
break;
|
||||
wait_var_event(&root->will_be_snapshotted,
|
||||
!atomic_read(&root->will_be_snapshotted));
|
||||
}
|
||||
}
|
||||
|
@ -35,42 +35,54 @@ static inline bool extent_state_in_tree(const struct extent_state *state)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static LIST_HEAD(buffers);
|
||||
static LIST_HEAD(states);
|
||||
|
||||
static DEFINE_SPINLOCK(leak_lock);
|
||||
|
||||
static inline
|
||||
void btrfs_leak_debug_add(struct list_head *new, struct list_head *head)
|
||||
static inline void btrfs_leak_debug_add(spinlock_t *lock,
|
||||
struct list_head *new,
|
||||
struct list_head *head)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&leak_lock, flags);
|
||||
spin_lock_irqsave(lock, flags);
|
||||
list_add(new, head);
|
||||
spin_unlock_irqrestore(&leak_lock, flags);
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
|
||||
static inline
|
||||
void btrfs_leak_debug_del(struct list_head *entry)
|
||||
static inline void btrfs_leak_debug_del(spinlock_t *lock,
|
||||
struct list_head *entry)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&leak_lock, flags);
|
||||
spin_lock_irqsave(lock, flags);
|
||||
list_del(entry);
|
||||
spin_unlock_irqrestore(&leak_lock, flags);
|
||||
spin_unlock_irqrestore(lock, flags);
|
||||
}
|
||||
|
||||
static inline void btrfs_extent_buffer_leak_debug_check(void)
|
||||
void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
unsigned long flags;
|
||||
|
||||
while (!list_empty(&buffers)) {
|
||||
eb = list_entry(buffers.next, struct extent_buffer, leak_list);
|
||||
pr_err("BTRFS: buffer leak start %llu len %lu refs %d bflags %lu\n",
|
||||
eb->start, eb->len, atomic_read(&eb->refs), eb->bflags);
|
||||
/*
|
||||
* If we didn't get into open_ctree our allocated_ebs will not be
|
||||
* initialized, so just skip this.
|
||||
*/
|
||||
if (!fs_info->allocated_ebs.next)
|
||||
return;
|
||||
|
||||
spin_lock_irqsave(&fs_info->eb_leak_lock, flags);
|
||||
while (!list_empty(&fs_info->allocated_ebs)) {
|
||||
eb = list_first_entry(&fs_info->allocated_ebs,
|
||||
struct extent_buffer, leak_list);
|
||||
pr_err(
|
||||
"BTRFS: buffer leak start %llu len %lu refs %d bflags %lu owner %llu\n",
|
||||
eb->start, eb->len, atomic_read(&eb->refs), eb->bflags,
|
||||
btrfs_header_owner(eb));
|
||||
list_del(&eb->leak_list);
|
||||
kmem_cache_free(extent_buffer_cache, eb);
|
||||
}
|
||||
spin_unlock_irqrestore(&fs_info->eb_leak_lock, flags);
|
||||
}
|
||||
|
||||
static inline void btrfs_extent_state_leak_debug_check(void)
|
||||
@ -107,9 +119,8 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
|
||||
}
|
||||
}
|
||||
#else
|
||||
#define btrfs_leak_debug_add(new, head) do {} while (0)
|
||||
#define btrfs_leak_debug_del(entry) do {} while (0)
|
||||
#define btrfs_extent_buffer_leak_debug_check() do {} while (0)
|
||||
#define btrfs_leak_debug_add(lock, new, head) do {} while (0)
|
||||
#define btrfs_leak_debug_del(lock, entry) do {} while (0)
|
||||
#define btrfs_extent_state_leak_debug_check() do {} while (0)
|
||||
#define btrfs_debug_check_extent_io_range(c, s, e) do {} while (0)
|
||||
#endif
|
||||
@ -122,7 +133,6 @@ struct tree_entry {
|
||||
|
||||
struct extent_page_data {
|
||||
struct bio *bio;
|
||||
struct extent_io_tree *tree;
|
||||
/* tells writepage not to lock the state bits for this range
|
||||
* it still does the unlocking
|
||||
*/
|
||||
@ -246,8 +256,6 @@ void __cold extent_state_cache_exit(void)
|
||||
|
||||
void __cold extent_io_exit(void)
|
||||
{
|
||||
btrfs_extent_buffer_leak_debug_check();
|
||||
|
||||
/*
|
||||
* Make sure all delayed rcu free are flushed before we
|
||||
* destroy caches.
|
||||
@ -257,6 +265,15 @@ void __cold extent_io_exit(void)
|
||||
bioset_exit(&btrfs_bioset);
|
||||
}
|
||||
|
||||
/*
|
||||
* For the file_extent_tree, we want to hold the inode lock when we lookup and
|
||||
* update the disk_i_size, but lockdep will complain because our io_tree we hold
|
||||
* the tree lock and get the inode lock when setting delalloc. These two things
|
||||
* are unrelated, so make a class for the file_extent_tree so we don't get the
|
||||
* two locking patterns mixed up.
|
||||
*/
|
||||
static struct lock_class_key file_extent_tree_class;
|
||||
|
||||
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *tree, unsigned int owner,
|
||||
void *private_data)
|
||||
@ -268,6 +285,8 @@ void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
||||
spin_lock_init(&tree->lock);
|
||||
tree->private_data = private_data;
|
||||
tree->owner = owner;
|
||||
if (owner == IO_TREE_INODE_FILE_EXTENT)
|
||||
lockdep_set_class(&tree->lock, &file_extent_tree_class);
|
||||
}
|
||||
|
||||
void extent_io_tree_release(struct extent_io_tree *tree)
|
||||
@ -314,7 +333,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
|
||||
state->state = 0;
|
||||
state->failrec = NULL;
|
||||
RB_CLEAR_NODE(&state->rb_node);
|
||||
btrfs_leak_debug_add(&state->leak_list, &states);
|
||||
btrfs_leak_debug_add(&leak_lock, &state->leak_list, &states);
|
||||
refcount_set(&state->refs, 1);
|
||||
init_waitqueue_head(&state->wq);
|
||||
trace_alloc_extent_state(state, mask, _RET_IP_);
|
||||
@ -327,7 +346,7 @@ void free_extent_state(struct extent_state *state)
|
||||
return;
|
||||
if (refcount_dec_and_test(&state->refs)) {
|
||||
WARN_ON(extent_state_in_tree(state));
|
||||
btrfs_leak_debug_del(&state->leak_list);
|
||||
btrfs_leak_debug_del(&leak_lock, &state->leak_list);
|
||||
trace_free_extent_state(state, _RET_IP_);
|
||||
kmem_cache_free(extent_state_cache, state);
|
||||
}
|
||||
@ -1053,6 +1072,16 @@ hit_next:
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this extent already has all the bits we want set, then
|
||||
* skip it, not necessary to split it or do anything with it.
|
||||
*/
|
||||
if ((state->state & bits) == bits) {
|
||||
start = state->end + 1;
|
||||
cache_state(state, cached_state);
|
||||
goto search_again;
|
||||
}
|
||||
|
||||
prealloc = alloc_extent_state_atomic(prealloc);
|
||||
BUG_ON(!prealloc);
|
||||
err = split_state(tree, state, prealloc, start);
|
||||
@ -1567,6 +1596,43 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* find_contiguous_extent_bit: find a contiguous area of bits
|
||||
* @tree - io tree to check
|
||||
* @start - offset to start the search from
|
||||
* @start_ret - the first offset we found with the bits set
|
||||
* @end_ret - the final contiguous range of the bits that were set
|
||||
* @bits - bits to look for
|
||||
*
|
||||
* set_extent_bit and clear_extent_bit can temporarily split contiguous ranges
|
||||
* to set bits appropriately, and then merge them again. During this time it
|
||||
* will drop the tree->lock, so use this helper if you want to find the actual
|
||||
* contiguous area for given bits. We will search to the first bit we find, and
|
||||
* then walk down the tree until we find a non-contiguous area. The area
|
||||
* returned will be the full contiguous area with the bits set.
|
||||
*/
|
||||
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, unsigned bits)
|
||||
{
|
||||
struct extent_state *state;
|
||||
int ret = 1;
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
state = find_first_extent_bit_state(tree, start, bits);
|
||||
if (state) {
|
||||
*start_ret = state->start;
|
||||
*end_ret = state->end;
|
||||
while ((state = next_state(state)) != NULL) {
|
||||
if (state->start > (*end_ret + 1))
|
||||
break;
|
||||
*end_ret = state->end;
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock(&tree->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* find_first_clear_extent_bit - find the first range that has @bits not set.
|
||||
* This range could start before @start.
|
||||
@ -2926,7 +2992,6 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
|
||||
|
||||
/*
|
||||
* @opf: bio REQ_OP_* and REQ_* flags as one value
|
||||
* @tree: tree so we can call our merge_bio hook
|
||||
* @wbc: optional writeback control for io accounting
|
||||
* @page: page to add to the bio
|
||||
* @pg_offset: offset of the new bio or to check whether we are adding
|
||||
@ -2939,7 +3004,7 @@ struct bio *btrfs_bio_clone_partial(struct bio *orig, int offset, int size)
|
||||
* @prev_bio_flags: flags of previous bio to see if we can merge the current one
|
||||
* @bio_flags: flags of the current bio to see if we can merge them
|
||||
*/
|
||||
static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
static int submit_extent_page(unsigned int opf,
|
||||
struct writeback_control *wbc,
|
||||
struct page *page, u64 offset,
|
||||
size_t size, unsigned long pg_offset,
|
||||
@ -2954,6 +3019,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree,
|
||||
struct bio *bio;
|
||||
size_t page_size = min_t(size_t, size, PAGE_SIZE);
|
||||
sector_t sector = offset >> 9;
|
||||
struct extent_io_tree *tree = &BTRFS_I(page->mapping->host)->io_tree;
|
||||
|
||||
ASSERT(bio_ret);
|
||||
|
||||
@ -3062,8 +3128,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
|
||||
* XXX JDM: This needs looking at to ensure proper page locking
|
||||
* return 0 on success, otherwise return error
|
||||
*/
|
||||
static int __do_readpage(struct extent_io_tree *tree,
|
||||
struct page *page,
|
||||
static int __do_readpage(struct page *page,
|
||||
get_extent_t *get_extent,
|
||||
struct extent_map **em_cached,
|
||||
struct bio **bio, int mirror_num,
|
||||
@ -3086,6 +3151,7 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
size_t disk_io_size;
|
||||
size_t blocksize = inode->i_sb->s_blocksize;
|
||||
unsigned long this_bio_flag = 0;
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
|
||||
set_page_extent_mapped(page);
|
||||
|
||||
@ -3242,7 +3308,7 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL,
|
||||
ret = submit_extent_page(REQ_OP_READ | read_flags, NULL,
|
||||
page, offset, disk_io_size,
|
||||
pg_offset, bio,
|
||||
end_bio_extent_readpage, mirror_num,
|
||||
@ -3269,8 +3335,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void contiguous_readpages(struct extent_io_tree *tree,
|
||||
struct page *pages[], int nr_pages,
|
||||
static inline void contiguous_readpages(struct page *pages[], int nr_pages,
|
||||
u64 start, u64 end,
|
||||
struct extent_map **em_cached,
|
||||
struct bio **bio,
|
||||
@ -3280,17 +3345,16 @@ static inline void contiguous_readpages(struct extent_io_tree *tree,
|
||||
struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host);
|
||||
int index;
|
||||
|
||||
btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
|
||||
btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
|
||||
|
||||
for (index = 0; index < nr_pages; index++) {
|
||||
__do_readpage(tree, pages[index], btrfs_get_extent, em_cached,
|
||||
__do_readpage(pages[index], btrfs_get_extent, em_cached,
|
||||
bio, 0, bio_flags, REQ_RAHEAD, prev_em_start);
|
||||
put_page(pages[index]);
|
||||
}
|
||||
}
|
||||
|
||||
static int __extent_read_full_page(struct extent_io_tree *tree,
|
||||
struct page *page,
|
||||
static int __extent_read_full_page(struct page *page,
|
||||
get_extent_t *get_extent,
|
||||
struct bio **bio, int mirror_num,
|
||||
unsigned long *bio_flags,
|
||||
@ -3301,21 +3365,21 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
|
||||
u64 end = start + PAGE_SIZE - 1;
|
||||
int ret;
|
||||
|
||||
btrfs_lock_and_flush_ordered_range(tree, inode, start, end, NULL);
|
||||
btrfs_lock_and_flush_ordered_range(inode, start, end, NULL);
|
||||
|
||||
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
|
||||
ret = __do_readpage(page, get_extent, NULL, bio, mirror_num,
|
||||
bio_flags, read_flags, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
get_extent_t *get_extent, int mirror_num)
|
||||
int extent_read_full_page(struct page *page, get_extent_t *get_extent,
|
||||
int mirror_num)
|
||||
{
|
||||
struct bio *bio = NULL;
|
||||
unsigned long bio_flags = 0;
|
||||
int ret;
|
||||
|
||||
ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
|
||||
ret = __extent_read_full_page(page, get_extent, &bio, mirror_num,
|
||||
&bio_flags, 0);
|
||||
if (bio)
|
||||
ret = submit_one_bio(bio, mirror_num, bio_flags);
|
||||
@ -3423,7 +3487,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
unsigned long nr_written,
|
||||
int *nr_ret)
|
||||
{
|
||||
struct extent_io_tree *tree = epd->tree;
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
u64 start = page_offset(page);
|
||||
u64 page_end = start + PAGE_SIZE - 1;
|
||||
u64 end;
|
||||
@ -3509,7 +3573,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
page->index, cur, end);
|
||||
}
|
||||
|
||||
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
|
||||
ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
|
||||
page, offset, iosize, pg_offset,
|
||||
&epd->bio,
|
||||
end_bio_extent_writepage,
|
||||
@ -3830,8 +3894,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
|
||||
struct writeback_control *wbc,
|
||||
struct extent_page_data *epd)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
struct extent_io_tree *tree = &BTRFS_I(fs_info->btree_inode)->io_tree;
|
||||
u64 offset = eb->start;
|
||||
u32 nritems;
|
||||
int i, num_pages;
|
||||
@ -3864,7 +3926,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
|
||||
|
||||
clear_page_dirty_for_io(p);
|
||||
set_page_writeback(p);
|
||||
ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc,
|
||||
ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc,
|
||||
p, offset, PAGE_SIZE, 0,
|
||||
&epd->bio,
|
||||
end_bio_extent_buffer_writepage,
|
||||
@ -3897,14 +3959,13 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
|
||||
int btree_write_cache_pages(struct address_space *mapping,
|
||||
struct writeback_control *wbc)
|
||||
{
|
||||
struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
|
||||
struct extent_buffer *eb, *prev_eb = NULL;
|
||||
struct extent_page_data epd = {
|
||||
.bio = NULL,
|
||||
.tree = tree,
|
||||
.extent_locked = 0,
|
||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||
};
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
|
||||
int ret = 0;
|
||||
int done = 0;
|
||||
int nr_to_write_done = 0;
|
||||
@ -4018,7 +4079,39 @@ retry:
|
||||
end_write_bio(&epd, ret);
|
||||
return ret;
|
||||
}
|
||||
ret = flush_write_bio(&epd);
|
||||
/*
|
||||
* If something went wrong, don't allow any metadata write bio to be
|
||||
* submitted.
|
||||
*
|
||||
* This would prevent use-after-free if we had dirty pages not
|
||||
* cleaned up, which can still happen by fuzzed images.
|
||||
*
|
||||
* - Bad extent tree
|
||||
* Allowing existing tree block to be allocated for other trees.
|
||||
*
|
||||
* - Log tree operations
|
||||
* Exiting tree blocks get allocated to log tree, bumps its
|
||||
* generation, then get cleaned in tree re-balance.
|
||||
* Such tree block will not be written back, since it's clean,
|
||||
* thus no WRITTEN flag set.
|
||||
* And after log writes back, this tree block is not traced by
|
||||
* any dirty extent_io_tree.
|
||||
*
|
||||
* - Offending tree block gets re-dirtied from its original owner
|
||||
* Since it has bumped generation, no WRITTEN flag, it can be
|
||||
* reused without COWing. This tree block will not be traced
|
||||
* by btrfs_transaction::dirty_pages.
|
||||
*
|
||||
* Now such dirty tree block will not be cleaned by any dirty
|
||||
* extent io tree. Thus we don't want to submit such wild eb
|
||||
* if the fs already has error.
|
||||
*/
|
||||
if (!test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
|
||||
ret = flush_write_bio(&epd);
|
||||
} else {
|
||||
ret = -EUCLEAN;
|
||||
end_write_bio(&epd, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4190,7 +4283,6 @@ int extent_write_full_page(struct page *page, struct writeback_control *wbc)
|
||||
int ret;
|
||||
struct extent_page_data epd = {
|
||||
.bio = NULL,
|
||||
.tree = &BTRFS_I(page->mapping->host)->io_tree,
|
||||
.extent_locked = 0,
|
||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||
};
|
||||
@ -4212,14 +4304,12 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
||||
{
|
||||
int ret = 0;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
struct page *page;
|
||||
unsigned long nr_pages = (end - start + PAGE_SIZE) >>
|
||||
PAGE_SHIFT;
|
||||
|
||||
struct extent_page_data epd = {
|
||||
.bio = NULL,
|
||||
.tree = tree,
|
||||
.extent_locked = 1,
|
||||
.sync_io = mode == WB_SYNC_ALL,
|
||||
};
|
||||
@ -4263,7 +4353,6 @@ int extent_writepages(struct address_space *mapping,
|
||||
int ret = 0;
|
||||
struct extent_page_data epd = {
|
||||
.bio = NULL,
|
||||
.tree = &BTRFS_I(mapping->host)->io_tree,
|
||||
.extent_locked = 0,
|
||||
.sync_io = wbc->sync_mode == WB_SYNC_ALL,
|
||||
};
|
||||
@ -4285,7 +4374,6 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
|
||||
unsigned long bio_flags = 0;
|
||||
struct page *pagepool[16];
|
||||
struct extent_map *em_cached = NULL;
|
||||
struct extent_io_tree *tree = &BTRFS_I(mapping->host)->io_tree;
|
||||
int nr = 0;
|
||||
u64 prev_em_start = (u64)-1;
|
||||
|
||||
@ -4312,7 +4400,7 @@ int extent_readpages(struct address_space *mapping, struct list_head *pages,
|
||||
|
||||
ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end);
|
||||
|
||||
contiguous_readpages(tree, pagepool, nr, contig_start,
|
||||
contiguous_readpages(pagepool, nr, contig_start,
|
||||
contig_end, &em_cached, &bio, &bio_flags,
|
||||
&prev_em_start);
|
||||
}
|
||||
@ -4796,7 +4884,6 @@ out_free_ulist:
|
||||
|
||||
static void __free_extent_buffer(struct extent_buffer *eb)
|
||||
{
|
||||
btrfs_leak_debug_del(&eb->leak_list);
|
||||
kmem_cache_free(extent_buffer_cache, eb);
|
||||
}
|
||||
|
||||
@ -4862,6 +4949,7 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb)
|
||||
static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
|
||||
{
|
||||
btrfs_release_extent_buffer_pages(eb);
|
||||
btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
|
||||
__free_extent_buffer(eb);
|
||||
}
|
||||
|
||||
@ -4883,7 +4971,8 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start,
|
||||
init_waitqueue_head(&eb->write_lock_wq);
|
||||
init_waitqueue_head(&eb->read_lock_wq);
|
||||
|
||||
btrfs_leak_debug_add(&eb->leak_list, &buffers);
|
||||
btrfs_leak_debug_add(&fs_info->eb_leak_lock, &eb->leak_list,
|
||||
&fs_info->allocated_ebs);
|
||||
|
||||
spin_lock_init(&eb->refs_lock);
|
||||
atomic_set(&eb->refs, 1);
|
||||
@ -5230,6 +5319,7 @@ static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
|
||||
}
|
||||
|
||||
static int release_extent_buffer(struct extent_buffer *eb)
|
||||
__releases(&eb->refs_lock)
|
||||
{
|
||||
lockdep_assert_held(&eb->refs_lock);
|
||||
|
||||
@ -5248,6 +5338,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
|
||||
spin_unlock(&eb->refs_lock);
|
||||
}
|
||||
|
||||
btrfs_leak_debug_del(&eb->fs_info->eb_leak_lock, &eb->leak_list);
|
||||
/* Should be safe to release our pages at this point */
|
||||
btrfs_release_extent_buffer_pages(eb);
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
@ -5405,7 +5496,6 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
|
||||
unsigned long num_reads = 0;
|
||||
struct bio *bio = NULL;
|
||||
unsigned long bio_flags = 0;
|
||||
struct extent_io_tree *tree = &BTRFS_I(eb->fs_info->btree_inode)->io_tree;
|
||||
|
||||
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
|
||||
return 0;
|
||||
@ -5453,7 +5543,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num)
|
||||
}
|
||||
|
||||
ClearPageError(page);
|
||||
err = __extent_read_full_page(tree, page,
|
||||
err = __extent_read_full_page(page,
|
||||
btree_get_extent, &bio,
|
||||
mirror_num, &bio_flags,
|
||||
REQ_META);
|
||||
|
@ -189,8 +189,8 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
|
||||
int try_release_extent_mapping(struct page *page, gfp_t mask);
|
||||
int try_release_extent_buffer(struct page *page);
|
||||
|
||||
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
get_extent_t *get_extent, int mirror_num);
|
||||
int extent_read_full_page(struct page *page, get_extent_t *get_extent,
|
||||
int mirror_num);
|
||||
int extent_write_full_page(struct page *page, struct writeback_control *wbc);
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
||||
int mode);
|
||||
@ -325,4 +325,11 @@ bool find_lock_delalloc_range(struct inode *inode,
|
||||
#endif
|
||||
struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
u64 start);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info);
|
||||
#else
|
||||
#define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -23,6 +23,97 @@
|
||||
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
|
||||
PAGE_SIZE))
|
||||
|
||||
/**
|
||||
* @inode - the inode we want to update the disk_i_size for
|
||||
* @new_i_size - the i_size we want to set to, 0 if we use i_size
|
||||
*
|
||||
* With NO_HOLES set this simply sets the disk_is_size to whatever i_size_read()
|
||||
* returns as it is perfectly fine with a file that has holes without hole file
|
||||
* extent items.
|
||||
*
|
||||
* However without NO_HOLES we need to only return the area that is contiguous
|
||||
* from the 0 offset of the file. Otherwise we could end up adjust i_size up
|
||||
* to an extent that has a gap in between.
|
||||
*
|
||||
* Finally new_i_size should only be set in the case of truncate where we're not
|
||||
* ready to use i_size_read() as the limiter yet.
|
||||
*/
|
||||
void btrfs_inode_safe_disk_i_size_write(struct inode *inode, u64 new_i_size)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
u64 start, end, i_size;
|
||||
int ret;
|
||||
|
||||
i_size = new_i_size ?: i_size_read(inode);
|
||||
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
|
||||
BTRFS_I(inode)->disk_i_size = i_size;
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
ret = find_contiguous_extent_bit(&BTRFS_I(inode)->file_extent_tree, 0,
|
||||
&start, &end, EXTENT_DIRTY);
|
||||
if (!ret && start == 0)
|
||||
i_size = min(i_size, end + 1);
|
||||
else
|
||||
i_size = 0;
|
||||
BTRFS_I(inode)->disk_i_size = i_size;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* @inode - the inode we're modifying
|
||||
* @start - the start file offset of the file extent we've inserted
|
||||
* @len - the logical length of the file extent item
|
||||
*
|
||||
* Call when we are inserting a new file extent where there was none before.
|
||||
* Does not need to call this in the case where we're replacing an existing file
|
||||
* extent, however if not sure it's fine to call this multiple times.
|
||||
*
|
||||
* The start and len must match the file extent item, so thus must be sectorsize
|
||||
* aligned.
|
||||
*/
|
||||
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 len)
|
||||
{
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
|
||||
|
||||
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
|
||||
return 0;
|
||||
return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
|
||||
EXTENT_DIRTY);
|
||||
}
|
||||
|
||||
/**
|
||||
* @inode - the inode we're modifying
|
||||
* @start - the start file offset of the file extent we've inserted
|
||||
* @len - the logical length of the file extent item
|
||||
*
|
||||
* Called when we drop a file extent, for example when we truncate. Doesn't
|
||||
* need to be called for cases where we're replacing a file extent, like when
|
||||
* we've COWed a file extent.
|
||||
*
|
||||
* The start and len must match the file extent item, so thus must be sectorsize
|
||||
* aligned.
|
||||
*/
|
||||
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 len)
|
||||
{
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
|
||||
len == (u64)-1);
|
||||
|
||||
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
|
||||
return 0;
|
||||
return clear_extent_bit(&inode->file_extent_tree, start,
|
||||
start + len - 1, EXTENT_DIRTY, 0, 0, NULL);
|
||||
}
|
||||
|
||||
static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
|
||||
u16 csum_size)
|
||||
{
|
||||
@ -949,18 +1040,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
extent_start = key.offset;
|
||||
|
||||
if (type == BTRFS_FILE_EXTENT_REG ||
|
||||
type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
extent_end = extent_start +
|
||||
btrfs_file_extent_num_bytes(leaf, fi);
|
||||
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
size_t size;
|
||||
size = btrfs_file_extent_ram_bytes(leaf, fi);
|
||||
extent_end = ALIGN(extent_start + size,
|
||||
fs_info->sectorsize);
|
||||
}
|
||||
|
||||
extent_end = btrfs_file_extent_end(path);
|
||||
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
|
||||
if (type == BTRFS_FILE_EXTENT_REG ||
|
||||
type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
@ -1007,3 +1087,30 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
root->root_key.objectid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the end offset (non inclusive) of the file extent item the given path
|
||||
* points to. If it points to an inline extent, the returned offset is rounded
|
||||
* up to the sector size.
|
||||
*/
|
||||
u64 btrfs_file_extent_end(const struct btrfs_path *path)
|
||||
{
|
||||
const struct extent_buffer *leaf = path->nodes[0];
|
||||
const int slot = path->slots[0];
|
||||
struct btrfs_file_extent_item *fi;
|
||||
struct btrfs_key key;
|
||||
u64 end;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
|
||||
if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) {
|
||||
end = btrfs_file_extent_ram_bytes(leaf, fi);
|
||||
end = ALIGN(key.offset + end, leaf->fs_info->sectorsize);
|
||||
} else {
|
||||
end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
|
||||
}
|
||||
|
||||
return end;
|
||||
}
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include "qgroup.h"
|
||||
#include "compression.h"
|
||||
#include "delalloc-space.h"
|
||||
#include "reflink.h"
|
||||
|
||||
static struct kmem_cache *btrfs_inode_defrag_cachep;
|
||||
/*
|
||||
@ -277,7 +278,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_key key;
|
||||
struct btrfs_ioctl_defrag_range_args range;
|
||||
int num_defrag;
|
||||
int index;
|
||||
int ret;
|
||||
|
||||
/* get the inode */
|
||||
@ -285,9 +285,7 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
index = srcu_read_lock(&fs_info->subvol_srcu);
|
||||
|
||||
inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
|
||||
inode_root = btrfs_get_fs_root(fs_info, &key, true);
|
||||
if (IS_ERR(inode_root)) {
|
||||
ret = PTR_ERR(inode_root);
|
||||
goto cleanup;
|
||||
@ -297,11 +295,11 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
|
||||
key.type = BTRFS_INODE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
inode = btrfs_iget(fs_info->sb, &key, inode_root);
|
||||
btrfs_put_root(inode_root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
goto cleanup;
|
||||
}
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
|
||||
/* do a chunk of defrag */
|
||||
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
|
||||
@ -337,7 +335,6 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
|
||||
iput(inode);
|
||||
return 0;
|
||||
cleanup:
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
|
||||
return ret;
|
||||
}
|
||||
@ -1552,15 +1549,14 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
||||
u64 num_bytes;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_start_write_no_snapshotting(root);
|
||||
if (!ret)
|
||||
if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
|
||||
return -EAGAIN;
|
||||
|
||||
lockstart = round_down(pos, fs_info->sectorsize);
|
||||
lockend = round_up(pos + *write_bytes,
|
||||
fs_info->sectorsize) - 1;
|
||||
|
||||
btrfs_lock_and_flush_ordered_range(&inode->io_tree, inode, lockstart,
|
||||
btrfs_lock_and_flush_ordered_range(inode, lockstart,
|
||||
lockend, NULL);
|
||||
|
||||
num_bytes = lockend - lockstart + 1;
|
||||
@ -1568,7 +1564,7 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
||||
NULL, NULL, NULL);
|
||||
if (ret <= 0) {
|
||||
ret = 0;
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
} else {
|
||||
*write_bytes = min_t(size_t, *write_bytes ,
|
||||
num_bytes - pos + lockstart);
|
||||
@ -1674,7 +1670,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
|
||||
data_reserved, pos,
|
||||
write_bytes);
|
||||
else
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1778,7 +1774,7 @@ again:
|
||||
|
||||
release_bytes = 0;
|
||||
if (only_release_metadata)
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
|
||||
if (only_release_metadata && copied > 0) {
|
||||
lockstart = round_down(pos,
|
||||
@ -1807,7 +1803,7 @@ again:
|
||||
|
||||
if (release_bytes) {
|
||||
if (only_release_metadata) {
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
release_bytes, true);
|
||||
} else {
|
||||
@ -2070,6 +2066,16 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
|
||||
btrfs_init_log_ctx(&ctx, inode);
|
||||
|
||||
/*
|
||||
* Set the range to full if the NO_HOLES feature is not enabled.
|
||||
* This is to avoid missing file extent items representing holes after
|
||||
* replaying the log.
|
||||
*/
|
||||
if (!btrfs_fs_incompat(fs_info, NO_HOLES)) {
|
||||
start = 0;
|
||||
end = LLONG_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* We write the dirty pages in the range and wait until they complete
|
||||
* out of the ->i_mutex. If so, we can flush the dirty pages by
|
||||
@ -2091,19 +2097,6 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
|
||||
atomic_inc(&root->log_batch);
|
||||
|
||||
/*
|
||||
* If the inode needs a full sync, make sure we use a full range to
|
||||
* avoid log tree corruption, due to hole detection racing with ordered
|
||||
* extent completion for adjacent ranges, and assertion failures during
|
||||
* hole detection. Do this while holding the inode lock, to avoid races
|
||||
* with other tasks.
|
||||
*/
|
||||
if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags)) {
|
||||
start = 0;
|
||||
end = LLONG_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* Before we acquired the inode's lock, someone may have dirtied more
|
||||
* pages in the target range. We need to make sure that writeback for
|
||||
@ -2124,6 +2117,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
*/
|
||||
ret = start_ordered_ops(inode, start, end);
|
||||
if (ret) {
|
||||
up_write(&BTRFS_I(inode)->dio_sem);
|
||||
inode_unlock(inode);
|
||||
goto out;
|
||||
}
|
||||
@ -2486,6 +2480,11 @@ static int btrfs_insert_clone_extent(struct btrfs_trans_handle *trans,
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
|
||||
clone_info->file_offset, clone_len);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* If it's a hole, nothing more needs to be done. */
|
||||
if (clone_info->disk_offset == 0)
|
||||
return 0;
|
||||
@ -2596,6 +2595,24 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
break;
|
||||
}
|
||||
} else if (!clone_info && cur_offset < drop_end) {
|
||||
/*
|
||||
* We are past the i_size here, but since we didn't
|
||||
* insert holes we need to clear the mapped area so we
|
||||
* know to not set disk_i_size in this area until a new
|
||||
* file extent is inserted here.
|
||||
*/
|
||||
ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
|
||||
cur_offset, drop_end - cur_offset);
|
||||
if (ret) {
|
||||
/*
|
||||
* We couldn't clear our area, so we could
|
||||
* presumably adjust up and corrupt the fs, so
|
||||
* we need to abort.
|
||||
*/
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (clone_info && drop_end > clone_info->file_offset) {
|
||||
@ -2686,6 +2703,15 @@ int btrfs_punch_hole_range(struct inode *inode, struct btrfs_path *path,
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_trans;
|
||||
}
|
||||
} else if (!clone_info && cur_offset < drop_end) {
|
||||
/* See the comment in the loop above for the reasoning here. */
|
||||
ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
|
||||
cur_offset, drop_end - cur_offset);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_trans;
|
||||
}
|
||||
|
||||
}
|
||||
if (clone_info) {
|
||||
ret = btrfs_insert_clone_extent(trans, inode, path, clone_info,
|
||||
@ -2935,7 +2961,7 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
|
||||
|
||||
inode->i_ctime = current_time(inode);
|
||||
i_size_write(inode, end);
|
||||
btrfs_ordered_update_i_size(inode, end, NULL);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
ret2 = btrfs_end_transaction(trans);
|
||||
|
||||
|
@ -371,10 +371,10 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
|
||||
}
|
||||
}
|
||||
|
||||
static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, struct inode *inode,
|
||||
int uptodate)
|
||||
static int io_ctl_prepare_pages(struct btrfs_io_ctl *io_ctl, bool uptodate)
|
||||
{
|
||||
struct page *page;
|
||||
struct inode *inode = io_ctl->inode;
|
||||
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
|
||||
int i;
|
||||
|
||||
@ -732,7 +732,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
|
||||
|
||||
readahead_cache(inode);
|
||||
|
||||
ret = io_ctl_prepare_pages(&io_ctl, inode, 1);
|
||||
ret = io_ctl_prepare_pages(&io_ctl, true);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -1067,6 +1067,7 @@ fail:
|
||||
}
|
||||
|
||||
static noinline_for_stack int write_pinned_extent_entries(
|
||||
struct btrfs_trans_handle *trans,
|
||||
struct btrfs_block_group *block_group,
|
||||
struct btrfs_io_ctl *io_ctl,
|
||||
int *entries)
|
||||
@ -1085,7 +1086,7 @@ static noinline_for_stack int write_pinned_extent_entries(
|
||||
* We shouldn't have switched the pinned extents yet so this is the
|
||||
* right one
|
||||
*/
|
||||
unpin = block_group->fs_info->pinned_extents;
|
||||
unpin = &trans->transaction->pinned_extents;
|
||||
|
||||
start = block_group->start;
|
||||
|
||||
@ -1190,7 +1191,7 @@ out:
|
||||
invalidate_inode_pages2(inode->i_mapping);
|
||||
BTRFS_I(inode)->generation = 0;
|
||||
if (block_group) {
|
||||
#ifdef DEBUG
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
btrfs_err(root->fs_info,
|
||||
"failed to write free space cache for block group %llu",
|
||||
block_group->start);
|
||||
@ -1291,7 +1292,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||
}
|
||||
|
||||
/* Lock all pages first so we can lock the extent safely. */
|
||||
ret = io_ctl_prepare_pages(io_ctl, inode, 0);
|
||||
ret = io_ctl_prepare_pages(io_ctl, false);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
@ -1317,7 +1318,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||
* If this changes while we are working we'll get added back to
|
||||
* the dirty list and redo it. No locking needed
|
||||
*/
|
||||
ret = write_pinned_extent_entries(block_group, io_ctl, &entries);
|
||||
ret = write_pinned_extent_entries(trans, block_group, io_ctl, &entries);
|
||||
if (ret)
|
||||
goto out_nospc_locked;
|
||||
|
||||
@ -1366,18 +1367,6 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||
|
||||
return 0;
|
||||
|
||||
out:
|
||||
io_ctl->inode = NULL;
|
||||
io_ctl_free(io_ctl);
|
||||
if (ret) {
|
||||
invalidate_inode_pages2(inode->i_mapping);
|
||||
BTRFS_I(inode)->generation = 0;
|
||||
}
|
||||
btrfs_update_inode(trans, root, inode);
|
||||
if (must_iput)
|
||||
iput(inode);
|
||||
return ret;
|
||||
|
||||
out_nospc_locked:
|
||||
cleanup_bitmap_list(&bitmap_list);
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
@ -1390,7 +1379,17 @@ out_unlock:
|
||||
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
|
||||
up_write(&block_group->data_rwsem);
|
||||
|
||||
goto out;
|
||||
out:
|
||||
io_ctl->inode = NULL;
|
||||
io_ctl_free(io_ctl);
|
||||
if (ret) {
|
||||
invalidate_inode_pages2(inode->i_mapping);
|
||||
BTRFS_I(inode)->generation = 0;
|
||||
}
|
||||
btrfs_update_inode(trans, root, inode);
|
||||
if (must_iput)
|
||||
iput(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
|
||||
@ -1416,7 +1415,7 @@ int btrfs_write_out_cache(struct btrfs_trans_handle *trans,
|
||||
ret = __btrfs_write_out_cache(fs_info->tree_root, inode, ctl,
|
||||
block_group, &block_group->io_ctl, trans);
|
||||
if (ret) {
|
||||
#ifdef DEBUG
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
btrfs_err(fs_info,
|
||||
"failed to write free space cache for block group %llu",
|
||||
block_group->start);
|
||||
@ -4036,7 +4035,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
|
||||
if (release_metadata)
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
inode->i_size, true);
|
||||
#ifdef DEBUG
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
btrfs_err(fs_info,
|
||||
"failed to write free ino cache for root %llu",
|
||||
root->root_key.objectid);
|
||||
|
@ -1251,9 +1251,7 @@ int btrfs_clear_free_space_tree(struct btrfs_fs_info *fs_info)
|
||||
btrfs_free_tree_block(trans, free_space_root, free_space_root->node,
|
||||
0, 1);
|
||||
|
||||
free_extent_buffer(free_space_root->node);
|
||||
free_extent_buffer(free_space_root->commit_root);
|
||||
kfree(free_space_root);
|
||||
btrfs_put_root(free_space_root);
|
||||
|
||||
return btrfs_commit_transaction(trans);
|
||||
|
||||
|
@ -515,7 +515,7 @@ out_release:
|
||||
trace_btrfs_space_reservation(fs_info, "ino_cache", trans->transid,
|
||||
trans->bytes_reserved, 0);
|
||||
btrfs_block_rsv_release(fs_info, trans->block_rsv,
|
||||
trans->bytes_reserved);
|
||||
trans->bytes_reserved, NULL);
|
||||
out:
|
||||
trans->block_rsv = rsv;
|
||||
trans->bytes_reserved = num_bytes;
|
||||
|
154
fs/btrfs/inode.c
154
fs/btrfs/inode.c
@ -28,6 +28,7 @@
|
||||
#include <linux/magic.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/migrate.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include "misc.h"
|
||||
@ -241,6 +242,15 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
btrfs_release_path(path);
|
||||
|
||||
/*
|
||||
* We align size to sectorsize for inline extents just for simplicity
|
||||
* sake.
|
||||
*/
|
||||
size = ALIGN(size, root->fs_info->sectorsize);
|
||||
ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start, size);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
/*
|
||||
* we're an inline extent, so nobody can
|
||||
* extend the file past i_size without locking
|
||||
@ -2446,6 +2456,11 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
ins.offset = disk_num_bytes;
|
||||
ins.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
|
||||
ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), file_pos,
|
||||
ram_bytes);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Release the reserved range from inode dirty range map, as it is
|
||||
* already moved into delayed_ref_head
|
||||
@ -2536,7 +2551,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
*/
|
||||
btrfs_qgroup_free_data(inode, NULL, start,
|
||||
ordered_extent->num_bytes);
|
||||
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
if (freespace_inode)
|
||||
trans = btrfs_join_transaction_spacecache(root);
|
||||
else
|
||||
@ -2607,7 +2622,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_ordered_update_i_size(inode, 0, ordered_extent);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
ret = btrfs_update_inode_fallback(trans, root, inode);
|
||||
if (ret) { /* -ENOMEM or corruption */
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@ -3187,6 +3202,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
|
||||
i_uid_write(inode, btrfs_inode_uid(leaf, inode_item));
|
||||
i_gid_write(inode, btrfs_inode_gid(leaf, inode_item));
|
||||
btrfs_i_size_write(BTRFS_I(inode), btrfs_inode_size(leaf, inode_item));
|
||||
btrfs_inode_set_file_extent_range(BTRFS_I(inode), 0,
|
||||
round_up(i_size_read(inode), fs_info->sectorsize));
|
||||
|
||||
inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->atime);
|
||||
inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->atime);
|
||||
@ -4158,6 +4175,8 @@ search_again:
|
||||
}
|
||||
|
||||
while (1) {
|
||||
u64 clear_start = 0, clear_len = 0;
|
||||
|
||||
fi = NULL;
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
|
||||
@ -4208,6 +4227,8 @@ search_again:
|
||||
|
||||
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
|
||||
u64 num_dec;
|
||||
|
||||
clear_start = found_key.offset;
|
||||
extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
|
||||
if (!del_item) {
|
||||
u64 orig_num_bytes =
|
||||
@ -4215,6 +4236,7 @@ search_again:
|
||||
extent_num_bytes = ALIGN(new_size -
|
||||
found_key.offset,
|
||||
fs_info->sectorsize);
|
||||
clear_start = ALIGN(new_size, fs_info->sectorsize);
|
||||
btrfs_set_file_extent_num_bytes(leaf, fi,
|
||||
extent_num_bytes);
|
||||
num_dec = (orig_num_bytes -
|
||||
@ -4240,6 +4262,7 @@ search_again:
|
||||
inode_sub_bytes(inode, num_dec);
|
||||
}
|
||||
}
|
||||
clear_len = num_dec;
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
/*
|
||||
* we can't truncate inline items that have had
|
||||
@ -4261,12 +4284,33 @@ search_again:
|
||||
*/
|
||||
ret = NEED_TRUNCATE_BLOCK;
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* Inline extents are special, we just treat
|
||||
* them as a full sector worth in the file
|
||||
* extent tree just for simplicity sake.
|
||||
*/
|
||||
clear_len = fs_info->sectorsize;
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))
|
||||
inode_sub_bytes(inode, item_end + 1 - new_size);
|
||||
}
|
||||
delete:
|
||||
/*
|
||||
* We use btrfs_truncate_inode_items() to clean up log trees for
|
||||
* multiple fsyncs, and in this case we don't want to clear the
|
||||
* file extent range because it's just the log.
|
||||
*/
|
||||
if (root == BTRFS_I(inode)->root) {
|
||||
ret = btrfs_inode_clear_file_extent_range(BTRFS_I(inode),
|
||||
clear_start, clear_len);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (del_item)
|
||||
last_size = found_key.offset;
|
||||
else
|
||||
@ -4368,7 +4412,7 @@ out:
|
||||
ASSERT(last_size >= new_size);
|
||||
if (!ret && last_size > new_size)
|
||||
last_size = new_size;
|
||||
btrfs_ordered_update_i_size(inode, last_size, NULL);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, last_size);
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lock_start,
|
||||
(u64)-1, &cached_state);
|
||||
}
|
||||
@ -4576,7 +4620,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
|
||||
if (size <= hole_start)
|
||||
return 0;
|
||||
|
||||
btrfs_lock_and_flush_ordered_range(io_tree, BTRFS_I(inode), hole_start,
|
||||
btrfs_lock_and_flush_ordered_range(BTRFS_I(inode), hole_start,
|
||||
block_end - 1, &cached_state);
|
||||
cur_offset = hole_start;
|
||||
while (1) {
|
||||
@ -4589,14 +4633,21 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
|
||||
}
|
||||
last_byte = min(extent_map_end(em), block_end);
|
||||
last_byte = ALIGN(last_byte, fs_info->sectorsize);
|
||||
hole_size = last_byte - cur_offset;
|
||||
|
||||
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
|
||||
struct extent_map *hole_em;
|
||||
hole_size = last_byte - cur_offset;
|
||||
|
||||
err = maybe_insert_hole(root, inode, cur_offset,
|
||||
hole_size);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
|
||||
cur_offset, hole_size);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode), cur_offset,
|
||||
cur_offset + hole_size - 1, 0);
|
||||
hole_em = alloc_extent_map();
|
||||
@ -4628,6 +4679,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
|
||||
hole_size - 1, 0);
|
||||
}
|
||||
free_extent_map(hole_em);
|
||||
} else {
|
||||
err = btrfs_inode_set_file_extent_range(BTRFS_I(inode),
|
||||
cur_offset, hole_size);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
next:
|
||||
free_extent_map(em);
|
||||
@ -4671,24 +4727,24 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
|
||||
* truncation, it must capture all writes that happened before
|
||||
* this truncation.
|
||||
*/
|
||||
btrfs_wait_for_snapshot_creation(root);
|
||||
btrfs_drew_write_lock(&root->snapshot_lock);
|
||||
ret = btrfs_cont_expand(inode, oldsize, newsize);
|
||||
if (ret) {
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
|
||||
i_size_write(inode, newsize);
|
||||
btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
pagecache_isize_extended(inode, oldsize, newsize);
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
btrfs_end_write_no_snapshotting(root);
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
btrfs_end_transaction(trans);
|
||||
} else {
|
||||
|
||||
@ -5098,7 +5154,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
|
||||
|
||||
btrfs_release_path(path);
|
||||
|
||||
new_root = btrfs_read_fs_root_no_name(fs_info, location);
|
||||
new_root = btrfs_get_fs_root(fs_info, location, true);
|
||||
if (IS_ERR(new_root)) {
|
||||
err = PTR_ERR(new_root);
|
||||
goto out;
|
||||
@ -5179,7 +5235,8 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
|
||||
inode->i_ino = args->location->objectid;
|
||||
memcpy(&BTRFS_I(inode)->location, args->location,
|
||||
sizeof(*args->location));
|
||||
BTRFS_I(inode)->root = args->root;
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(args->root);
|
||||
BUG_ON(args->root && !BTRFS_I(inode)->root);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -5260,7 +5317,7 @@ static struct inode *new_simple_dir(struct super_block *s,
|
||||
if (!inode)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
BTRFS_I(inode)->root = root;
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(root);
|
||||
memcpy(&BTRFS_I(inode)->location, key, sizeof(*key));
|
||||
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
|
||||
|
||||
@ -5307,7 +5364,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
|
||||
struct btrfs_root *sub_root = root;
|
||||
struct btrfs_key location;
|
||||
u8 di_type = 0;
|
||||
int index;
|
||||
int ret = 0;
|
||||
|
||||
if (dentry->d_name.len > BTRFS_NAME_LEN)
|
||||
@ -5334,7 +5390,6 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
|
||||
return inode;
|
||||
}
|
||||
|
||||
index = srcu_read_lock(&fs_info->subvol_srcu);
|
||||
ret = fixup_tree_root_location(fs_info, dir, dentry,
|
||||
&location, &sub_root);
|
||||
if (ret < 0) {
|
||||
@ -5345,7 +5400,8 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
|
||||
} else {
|
||||
inode = btrfs_iget(dir->i_sb, &location, sub_root);
|
||||
}
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
if (root != sub_root)
|
||||
btrfs_put_root(sub_root);
|
||||
|
||||
if (!IS_ERR(inode) && root != sub_root) {
|
||||
down_read(&fs_info->cleanup_work_sem);
|
||||
@ -5826,7 +5882,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
BTRFS_I(inode)->index_cnt = 2;
|
||||
BTRFS_I(inode)->dir_index = *index;
|
||||
BTRFS_I(inode)->root = root;
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(root);
|
||||
BTRFS_I(inode)->generation = trans->transid;
|
||||
inode->i_generation = BTRFS_I(inode)->generation;
|
||||
|
||||
@ -6463,6 +6519,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
|
||||
extent_type = btrfs_file_extent_type(leaf, item);
|
||||
extent_start = found_key.offset;
|
||||
extent_end = btrfs_file_extent_end(path);
|
||||
if (extent_type == BTRFS_FILE_EXTENT_REG ||
|
||||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
/* Only regular file could have regular/prealloc extent */
|
||||
@ -6473,18 +6530,9 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
btrfs_ino(inode));
|
||||
goto out;
|
||||
}
|
||||
extent_end = extent_start +
|
||||
btrfs_file_extent_num_bytes(leaf, item);
|
||||
|
||||
trace_btrfs_get_extent_show_fi_regular(inode, leaf, item,
|
||||
extent_start);
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
size_t size;
|
||||
|
||||
size = btrfs_file_extent_ram_bytes(leaf, item);
|
||||
extent_end = ALIGN(extent_start + size,
|
||||
fs_info->sectorsize);
|
||||
|
||||
trace_btrfs_get_extent_show_fi_inline(inode, leaf, item,
|
||||
path->slots[0],
|
||||
extent_start);
|
||||
@ -8211,9 +8259,7 @@ static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
|
||||
int btrfs_readpage(struct file *file, struct page *page)
|
||||
{
|
||||
struct extent_io_tree *tree;
|
||||
tree = &BTRFS_I(page->mapping->host)->io_tree;
|
||||
return extent_read_full_page(tree, page, btrfs_get_extent, 0);
|
||||
return extent_read_full_page(page, btrfs_get_extent, 0);
|
||||
}
|
||||
|
||||
static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
|
||||
@ -8272,6 +8318,39 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
|
||||
return __btrfs_releasepage(page, gfp_flags);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MIGRATION
|
||||
static int btrfs_migratepage(struct address_space *mapping,
|
||||
struct page *newpage, struct page *page,
|
||||
enum migrate_mode mode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = migrate_page_move_mapping(mapping, newpage, page, 0);
|
||||
if (ret != MIGRATEPAGE_SUCCESS)
|
||||
return ret;
|
||||
|
||||
if (page_has_private(page)) {
|
||||
ClearPagePrivate(page);
|
||||
get_page(newpage);
|
||||
set_page_private(newpage, page_private(page));
|
||||
set_page_private(page, 0);
|
||||
put_page(page);
|
||||
SetPagePrivate(newpage);
|
||||
}
|
||||
|
||||
if (PagePrivate2(page)) {
|
||||
ClearPagePrivate2(page);
|
||||
SetPagePrivate2(newpage);
|
||||
}
|
||||
|
||||
if (mode != MIGRATE_SYNC_NO_COPY)
|
||||
migrate_page_copy(newpage, page);
|
||||
else
|
||||
migrate_page_states(newpage, page);
|
||||
return MIGRATEPAGE_SUCCESS;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void btrfs_invalidatepage(struct page *page, unsigned int offset,
|
||||
unsigned int length)
|
||||
{
|
||||
@ -8647,7 +8726,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
|
||||
break;
|
||||
}
|
||||
|
||||
btrfs_block_rsv_release(fs_info, rsv, -1);
|
||||
btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
|
||||
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
|
||||
rsv, min_size, false);
|
||||
BUG_ON(ret); /* shouldn't happen */
|
||||
@ -8672,7 +8751,7 @@ static int btrfs_truncate(struct inode *inode, bool skip_writeback)
|
||||
ret = PTR_ERR(trans);
|
||||
goto out;
|
||||
}
|
||||
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
}
|
||||
|
||||
if (trans) {
|
||||
@ -8776,6 +8855,8 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
|
||||
extent_io_tree_init(fs_info, &ei->io_tree, IO_TREE_INODE_IO, inode);
|
||||
extent_io_tree_init(fs_info, &ei->io_failure_tree,
|
||||
IO_TREE_INODE_IO_FAILURE, inode);
|
||||
extent_io_tree_init(fs_info, &ei->file_extent_tree,
|
||||
IO_TREE_INODE_FILE_EXTENT, inode);
|
||||
ei->io_tree.track_uptodate = true;
|
||||
ei->io_failure_tree.track_uptodate = true;
|
||||
atomic_set(&ei->sync_writers, 0);
|
||||
@ -8842,6 +8923,8 @@ void btrfs_destroy_inode(struct inode *inode)
|
||||
btrfs_qgroup_check_reserved_leak(inode);
|
||||
inode_tree_del(inode);
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
|
||||
btrfs_inode_clear_file_extent_range(BTRFS_I(inode), 0, (u64)-1);
|
||||
btrfs_put_root(BTRFS_I(inode)->root);
|
||||
}
|
||||
|
||||
int btrfs_drop_inode(struct inode *inode)
|
||||
@ -9669,14 +9752,14 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int nr)
|
||||
while (!list_empty(&splice) && nr) {
|
||||
root = list_first_entry(&splice, struct btrfs_root,
|
||||
delalloc_root);
|
||||
root = btrfs_grab_fs_root(root);
|
||||
root = btrfs_grab_root(root);
|
||||
BUG_ON(!root);
|
||||
list_move_tail(&root->delalloc_root,
|
||||
&fs_info->delalloc_roots);
|
||||
spin_unlock(&fs_info->delalloc_root_lock);
|
||||
|
||||
ret = start_delalloc_inodes(root, nr, false);
|
||||
btrfs_put_fs_root(root);
|
||||
btrfs_put_root(root);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -9938,7 +10021,7 @@ next:
|
||||
else
|
||||
i_size = cur_offset;
|
||||
i_size_write(inode, i_size);
|
||||
btrfs_ordered_update_i_size(inode, i_size, NULL);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
}
|
||||
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
@ -10474,6 +10557,9 @@ static const struct address_space_operations btrfs_aops = {
|
||||
.direct_IO = btrfs_direct_IO,
|
||||
.invalidatepage = btrfs_invalidatepage,
|
||||
.releasepage = btrfs_releasepage,
|
||||
#ifdef CONFIG_MIGRATION
|
||||
.migratepage = btrfs_migratepage,
|
||||
#endif
|
||||
.set_page_dirty = btrfs_set_page_dirty,
|
||||
.error_remove_page = generic_error_remove_page,
|
||||
.swap_activate = btrfs_swap_activate,
|
||||
|
1050
fs/btrfs/ioctl.c
1050
fs/btrfs/ioctl.c
File diff suppressed because it is too large
Load Diff
@ -523,3 +523,138 @@ void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
|
||||
path->locks[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop around taking references on and locking the root node of the tree until
|
||||
* we end up with a lock on the root node.
|
||||
*
|
||||
* Return: root extent buffer with write lock held
|
||||
*/
|
||||
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
while (1) {
|
||||
eb = btrfs_root_node(root);
|
||||
btrfs_tree_lock(eb);
|
||||
if (eb == root->node)
|
||||
break;
|
||||
btrfs_tree_unlock(eb);
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return eb;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop around taking references on and locking the root node of the tree until
|
||||
* we end up with a lock on the root node.
|
||||
*
|
||||
* Return: root extent buffer with read lock held
|
||||
*/
|
||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
while (1) {
|
||||
eb = btrfs_root_node(root);
|
||||
btrfs_tree_read_lock(eb);
|
||||
if (eb == root->node)
|
||||
break;
|
||||
btrfs_tree_read_unlock(eb);
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return eb;
|
||||
}
|
||||
|
||||
/*
|
||||
* DREW locks
|
||||
* ==========
|
||||
*
|
||||
* DREW stands for double-reader-writer-exclusion lock. It's used in situation
|
||||
* where you want to provide A-B exclusion but not AA or BB.
|
||||
*
|
||||
* Currently implementation gives more priority to reader. If a reader and a
|
||||
* writer both race to acquire their respective sides of the lock the writer
|
||||
* would yield its lock as soon as it detects a concurrent reader. Additionally
|
||||
* if there are pending readers no new writers would be allowed to come in and
|
||||
* acquire the lock.
|
||||
*/
|
||||
|
||||
int btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
atomic_set(&lock->readers, 0);
|
||||
init_waitqueue_head(&lock->pending_readers);
|
||||
init_waitqueue_head(&lock->pending_writers);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock)
|
||||
{
|
||||
percpu_counter_destroy(&lock->writers);
|
||||
}
|
||||
|
||||
/* Return true if acquisition is successful, false otherwise */
|
||||
bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
|
||||
{
|
||||
if (atomic_read(&lock->readers))
|
||||
return false;
|
||||
|
||||
percpu_counter_inc(&lock->writers);
|
||||
|
||||
/* Ensure writers count is updated before we check for pending readers */
|
||||
smp_mb();
|
||||
if (atomic_read(&lock->readers)) {
|
||||
btrfs_drew_write_unlock(lock);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
|
||||
{
|
||||
while (true) {
|
||||
if (btrfs_drew_try_write_lock(lock))
|
||||
return;
|
||||
wait_event(lock->pending_writers, !atomic_read(&lock->readers));
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
|
||||
{
|
||||
percpu_counter_dec(&lock->writers);
|
||||
cond_wake_up(&lock->pending_readers);
|
||||
}
|
||||
|
||||
void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
|
||||
{
|
||||
atomic_inc(&lock->readers);
|
||||
|
||||
/*
|
||||
* Ensure the pending reader count is perceieved BEFORE this reader
|
||||
* goes to sleep in case of active writers. This guarantees new writers
|
||||
* won't be allowed and that the current reader will be woken up when
|
||||
* the last active writer finishes its jobs.
|
||||
*/
|
||||
smp_mb__after_atomic();
|
||||
|
||||
wait_event(lock->pending_readers,
|
||||
percpu_counter_sum(&lock->writers) == 0);
|
||||
}
|
||||
|
||||
void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
|
||||
{
|
||||
/*
|
||||
* atomic_dec_and_test implies a full barrier, so woken up writers
|
||||
* are guaranteed to see the decrement
|
||||
*/
|
||||
if (atomic_dec_and_test(&lock->readers))
|
||||
wake_up(&lock->pending_writers);
|
||||
}
|
||||
|
@ -6,6 +6,9 @@
|
||||
#ifndef BTRFS_LOCKING_H
|
||||
#define BTRFS_LOCKING_H
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/wait.h>
|
||||
#include <linux/percpu_counter.h>
|
||||
#include "extent_io.h"
|
||||
|
||||
#define BTRFS_WRITE_LOCK 1
|
||||
@ -13,6 +16,8 @@
|
||||
#define BTRFS_WRITE_LOCK_BLOCKING 3
|
||||
#define BTRFS_READ_LOCK_BLOCKING 4
|
||||
|
||||
struct btrfs_path;
|
||||
|
||||
void btrfs_tree_lock(struct extent_buffer *eb);
|
||||
void btrfs_tree_unlock(struct extent_buffer *eb);
|
||||
|
||||
@ -48,4 +53,19 @@ static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw)
|
||||
BUG();
|
||||
}
|
||||
|
||||
struct btrfs_drew_lock {
|
||||
atomic_t readers;
|
||||
struct percpu_counter writers;
|
||||
wait_queue_head_t pending_writers;
|
||||
wait_queue_head_t pending_readers;
|
||||
};
|
||||
|
||||
int btrfs_drew_lock_init(struct btrfs_drew_lock *lock);
|
||||
void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock);
|
||||
void btrfs_drew_write_lock(struct btrfs_drew_lock *lock);
|
||||
bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock);
|
||||
void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock);
|
||||
void btrfs_drew_read_lock(struct btrfs_drew_lock *lock);
|
||||
void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock);
|
||||
|
||||
#endif
|
||||
|
@ -580,7 +580,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
while (!list_empty(&splice) && nr) {
|
||||
root = list_first_entry(&splice, struct btrfs_root,
|
||||
ordered_root);
|
||||
root = btrfs_grab_fs_root(root);
|
||||
root = btrfs_grab_root(root);
|
||||
BUG_ON(!root);
|
||||
list_move_tail(&root->ordered_root,
|
||||
&fs_info->ordered_roots);
|
||||
@ -588,7 +588,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
|
||||
done = btrfs_wait_ordered_extents(root, nr,
|
||||
range_start, range_len);
|
||||
btrfs_put_fs_root(root);
|
||||
btrfs_put_root(root);
|
||||
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
if (nr != U64_MAX) {
|
||||
@ -785,134 +785,6 @@ out:
|
||||
return entry;
|
||||
}
|
||||
|
||||
/*
|
||||
* After an extent is done, call this to conditionally update the on disk
|
||||
* i_size. i_size is updated to cover any fully written part of the file.
|
||||
*/
|
||||
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
|
||||
struct btrfs_ordered_extent *ordered)
|
||||
{
|
||||
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
|
||||
u64 disk_i_size;
|
||||
u64 new_i_size;
|
||||
u64 i_size = i_size_read(inode);
|
||||
struct rb_node *node;
|
||||
struct rb_node *prev = NULL;
|
||||
struct btrfs_ordered_extent *test;
|
||||
int ret = 1;
|
||||
u64 orig_offset = offset;
|
||||
|
||||
spin_lock_irq(&tree->lock);
|
||||
if (ordered) {
|
||||
offset = entry_end(ordered);
|
||||
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
|
||||
offset = min(offset,
|
||||
ordered->file_offset +
|
||||
ordered->truncated_len);
|
||||
} else {
|
||||
offset = ALIGN(offset, btrfs_inode_sectorsize(inode));
|
||||
}
|
||||
disk_i_size = BTRFS_I(inode)->disk_i_size;
|
||||
|
||||
/*
|
||||
* truncate file.
|
||||
* If ordered is not NULL, then this is called from endio and
|
||||
* disk_i_size will be updated by either truncate itself or any
|
||||
* in-flight IOs which are inside the disk_i_size.
|
||||
*
|
||||
* Because btrfs_setsize() may set i_size with disk_i_size if truncate
|
||||
* fails somehow, we need to make sure we have a precise disk_i_size by
|
||||
* updating it as usual.
|
||||
*
|
||||
*/
|
||||
if (!ordered && disk_i_size > i_size) {
|
||||
BTRFS_I(inode)->disk_i_size = orig_offset;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* if the disk i_size is already at the inode->i_size, or
|
||||
* this ordered extent is inside the disk i_size, we're done
|
||||
*/
|
||||
if (disk_i_size == i_size)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* We still need to update disk_i_size if outstanding_isize is greater
|
||||
* than disk_i_size.
|
||||
*/
|
||||
if (offset <= disk_i_size &&
|
||||
(!ordered || ordered->outstanding_isize <= disk_i_size))
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* walk backward from this ordered extent to disk_i_size.
|
||||
* if we find an ordered extent then we can't update disk i_size
|
||||
* yet
|
||||
*/
|
||||
if (ordered) {
|
||||
node = rb_prev(&ordered->rb_node);
|
||||
} else {
|
||||
prev = tree_search(tree, offset);
|
||||
/*
|
||||
* we insert file extents without involving ordered struct,
|
||||
* so there should be no ordered struct cover this offset
|
||||
*/
|
||||
if (prev) {
|
||||
test = rb_entry(prev, struct btrfs_ordered_extent,
|
||||
rb_node);
|
||||
BUG_ON(offset_in_entry(test, offset));
|
||||
}
|
||||
node = prev;
|
||||
}
|
||||
for (; node; node = rb_prev(node)) {
|
||||
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
|
||||
|
||||
/* We treat this entry as if it doesn't exist */
|
||||
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
|
||||
continue;
|
||||
|
||||
if (entry_end(test) <= disk_i_size)
|
||||
break;
|
||||
if (test->file_offset >= i_size)
|
||||
break;
|
||||
|
||||
/*
|
||||
* We don't update disk_i_size now, so record this undealt
|
||||
* i_size. Or we will not know the real i_size.
|
||||
*/
|
||||
if (test->outstanding_isize < offset)
|
||||
test->outstanding_isize = offset;
|
||||
if (ordered &&
|
||||
ordered->outstanding_isize > test->outstanding_isize)
|
||||
test->outstanding_isize = ordered->outstanding_isize;
|
||||
goto out;
|
||||
}
|
||||
new_i_size = min_t(u64, offset, i_size);
|
||||
|
||||
/*
|
||||
* Some ordered extents may completed before the current one, and
|
||||
* we hold the real i_size in ->outstanding_isize.
|
||||
*/
|
||||
if (ordered && ordered->outstanding_isize > new_i_size)
|
||||
new_i_size = min_t(u64, ordered->outstanding_isize, i_size);
|
||||
BTRFS_I(inode)->disk_i_size = new_i_size;
|
||||
ret = 0;
|
||||
out:
|
||||
/*
|
||||
* We need to do this because we can't remove ordered extents until
|
||||
* after the i_disk_size has been updated and then the inode has been
|
||||
* updated to reflect the change, so we need to tell anybody who finds
|
||||
* this ordered extent that we've already done all the real work, we
|
||||
* just haven't completed all the other work.
|
||||
*/
|
||||
if (ordered)
|
||||
set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
|
||||
spin_unlock_irq(&tree->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* search the ordered extents for one corresponding to 'offset' and
|
||||
* try to find a checksum. This is used because we allow pages to
|
||||
@ -963,7 +835,6 @@ out:
|
||||
* btrfs_flush_ordered_range - Lock the passed range and ensures all pending
|
||||
* ordered extents in it are run to completion.
|
||||
*
|
||||
* @tree: IO tree used for locking out other users of the range
|
||||
* @inode: Inode whose ordered tree is to be searched
|
||||
* @start: Beginning of range to flush
|
||||
* @end: Last byte of range to lock
|
||||
@ -973,8 +844,7 @@ out:
|
||||
* This function always returns with the given range locked, ensuring after it's
|
||||
* called no order extent can be pending.
|
||||
*/
|
||||
void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
|
||||
struct btrfs_inode *inode, u64 start,
|
||||
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
@ -986,7 +856,7 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
|
||||
cachedp = cached_state;
|
||||
|
||||
while (1) {
|
||||
lock_extent_bits(tree, start, end, cachedp);
|
||||
lock_extent_bits(&inode->io_tree, start, end, cachedp);
|
||||
ordered = btrfs_lookup_ordered_range(inode, start,
|
||||
end - start + 1);
|
||||
if (!ordered) {
|
||||
@ -999,7 +869,7 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
|
||||
refcount_dec(&cache->refs);
|
||||
break;
|
||||
}
|
||||
unlock_extent_cached(tree, start, end, cachedp);
|
||||
unlock_extent_cached(&inode->io_tree, start, end, cachedp);
|
||||
btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
|
@ -52,11 +52,6 @@ enum {
|
||||
BTRFS_ORDERED_DIRECT,
|
||||
/* We had an io error when writing this out */
|
||||
BTRFS_ORDERED_IOERR,
|
||||
/*
|
||||
* indicates whether this ordered extent has done its due diligence in
|
||||
* updating the isize
|
||||
*/
|
||||
BTRFS_ORDERED_UPDATED_ISIZE,
|
||||
/* Set when we have to truncate an extent */
|
||||
BTRFS_ORDERED_TRUNCATED,
|
||||
/* Regular IO for COW */
|
||||
@ -182,16 +177,13 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
|
||||
struct btrfs_inode *inode,
|
||||
u64 file_offset,
|
||||
u64 len);
|
||||
int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
|
||||
struct btrfs_ordered_extent *ordered);
|
||||
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||
u8 *sum, int len);
|
||||
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
|
||||
struct btrfs_inode *inode, u64 start,
|
||||
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
struct extent_state **cached_state);
|
||||
int __init ordered_data_init(void);
|
||||
|
@ -383,7 +383,7 @@ static int inherit_props(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (need_reserve) {
|
||||
btrfs_block_rsv_release(fs_info, trans->block_rsv,
|
||||
num_bytes);
|
||||
num_bytes, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -1030,6 +1030,7 @@ out_add_root:
|
||||
ret = qgroup_rescan_init(fs_info, 0, 1);
|
||||
if (!ret) {
|
||||
qgroup_rescan_zero_tracking(fs_info);
|
||||
fs_info->qgroup_rescan_running = true;
|
||||
btrfs_queue_work(fs_info->qgroup_rescan_workers,
|
||||
&fs_info->qgroup_rescan_work);
|
||||
}
|
||||
@ -1037,11 +1038,8 @@ out_add_root:
|
||||
out_free_path:
|
||||
btrfs_free_path(path);
|
||||
out_free_root:
|
||||
if (ret) {
|
||||
free_extent_buffer(quota_root->node);
|
||||
free_extent_buffer(quota_root->commit_root);
|
||||
kfree(quota_root);
|
||||
}
|
||||
if (ret)
|
||||
btrfs_put_root(quota_root);
|
||||
out:
|
||||
if (ret) {
|
||||
ulist_free(fs_info->qgroup_ulist);
|
||||
@ -1104,9 +1102,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
|
||||
btrfs_tree_unlock(quota_root->node);
|
||||
btrfs_free_tree_block(trans, quota_root, quota_root->node, 0, 1);
|
||||
|
||||
free_extent_buffer(quota_root->node);
|
||||
free_extent_buffer(quota_root->commit_root);
|
||||
kfree(quota_root);
|
||||
btrfs_put_root(quota_root);
|
||||
|
||||
end_trans:
|
||||
ret = btrfs_end_transaction(trans);
|
||||
@ -3237,7 +3233,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
|
||||
}
|
||||
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
|
||||
if (init_flags) {
|
||||
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
||||
@ -3252,7 +3247,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
return ret;
|
||||
}
|
||||
@ -3263,9 +3257,6 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
|
||||
sizeof(fs_info->qgroup_rescan_progress));
|
||||
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
|
||||
init_completion(&fs_info->qgroup_rescan_completion);
|
||||
fs_info->qgroup_rescan_running = true;
|
||||
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
|
||||
btrfs_init_work(&fs_info->qgroup_rescan_work,
|
||||
@ -3326,8 +3317,11 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
|
||||
|
||||
qgroup_rescan_zero_tracking(fs_info);
|
||||
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
fs_info->qgroup_rescan_running = true;
|
||||
btrfs_queue_work(fs_info->qgroup_rescan_workers,
|
||||
&fs_info->qgroup_rescan_work);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -3339,9 +3333,7 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
running = fs_info->qgroup_rescan_running;
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
|
||||
if (!running)
|
||||
@ -3363,9 +3355,13 @@ int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
|
||||
void
|
||||
btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)
|
||||
if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) {
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
fs_info->qgroup_rescan_running = true;
|
||||
btrfs_queue_work(fs_info->qgroup_rescan_workers,
|
||||
&fs_info->qgroup_rescan_work);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -206,7 +206,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
|
||||
struct btrfs_stripe_hash *h;
|
||||
int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
|
||||
int i;
|
||||
int table_size;
|
||||
|
||||
if (info->stripe_hash_table)
|
||||
return 0;
|
||||
@ -218,8 +217,7 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
|
||||
* Try harder to allocate and fallback to vmalloc to lower the chance
|
||||
* of a failing mount.
|
||||
*/
|
||||
table_size = sizeof(*table) + sizeof(*h) * num_entries;
|
||||
table = kvzalloc(table_size, GFP_KERNEL);
|
||||
table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
|
||||
if (!table)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1196,22 +1194,19 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
||||
int nr_data = rbio->nr_data;
|
||||
int stripe;
|
||||
int pagenr;
|
||||
int p_stripe = -1;
|
||||
int q_stripe = -1;
|
||||
bool has_qstripe;
|
||||
struct bio_list bio_list;
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
bio_list_init(&bio_list);
|
||||
|
||||
if (rbio->real_stripes - rbio->nr_data == 1) {
|
||||
p_stripe = rbio->real_stripes - 1;
|
||||
} else if (rbio->real_stripes - rbio->nr_data == 2) {
|
||||
p_stripe = rbio->real_stripes - 2;
|
||||
q_stripe = rbio->real_stripes - 1;
|
||||
} else {
|
||||
if (rbio->real_stripes - rbio->nr_data == 1)
|
||||
has_qstripe = false;
|
||||
else if (rbio->real_stripes - rbio->nr_data == 2)
|
||||
has_qstripe = true;
|
||||
else
|
||||
BUG();
|
||||
}
|
||||
|
||||
/* at this point we either have a full stripe,
|
||||
* or we've read the full stripe from the drive.
|
||||
@ -1255,7 +1250,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
|
||||
SetPageUptodate(p);
|
||||
pointers[stripe++] = kmap(p);
|
||||
|
||||
if (q_stripe != -1) {
|
||||
if (has_qstripe) {
|
||||
|
||||
/*
|
||||
* raid6, add the qstripe and call the
|
||||
@ -2353,8 +2348,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
||||
int nr_data = rbio->nr_data;
|
||||
int stripe;
|
||||
int pagenr;
|
||||
int p_stripe = -1;
|
||||
int q_stripe = -1;
|
||||
bool has_qstripe;
|
||||
struct page *p_page = NULL;
|
||||
struct page *q_page = NULL;
|
||||
struct bio_list bio_list;
|
||||
@ -2364,14 +2358,12 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
||||
|
||||
bio_list_init(&bio_list);
|
||||
|
||||
if (rbio->real_stripes - rbio->nr_data == 1) {
|
||||
p_stripe = rbio->real_stripes - 1;
|
||||
} else if (rbio->real_stripes - rbio->nr_data == 2) {
|
||||
p_stripe = rbio->real_stripes - 2;
|
||||
q_stripe = rbio->real_stripes - 1;
|
||||
} else {
|
||||
if (rbio->real_stripes - rbio->nr_data == 1)
|
||||
has_qstripe = false;
|
||||
else if (rbio->real_stripes - rbio->nr_data == 2)
|
||||
has_qstripe = true;
|
||||
else
|
||||
BUG();
|
||||
}
|
||||
|
||||
if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) {
|
||||
is_replace = 1;
|
||||
@ -2393,7 +2385,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
||||
goto cleanup;
|
||||
SetPageUptodate(p_page);
|
||||
|
||||
if (q_stripe != -1) {
|
||||
if (has_qstripe) {
|
||||
q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
|
||||
if (!q_page) {
|
||||
__free_page(p_page);
|
||||
@ -2416,8 +2408,7 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
|
||||
/* then add the parity stripe */
|
||||
pointers[stripe++] = kmap(p_page);
|
||||
|
||||
if (q_stripe != -1) {
|
||||
|
||||
if (has_qstripe) {
|
||||
/*
|
||||
* raid6, add the qstripe and call the
|
||||
* library function to fill in our p/q
|
||||
|
@ -8,7 +8,7 @@
|
||||
|
||||
struct rcu_string {
|
||||
struct rcu_head rcu;
|
||||
char str[0];
|
||||
char str[];
|
||||
};
|
||||
|
||||
static inline struct rcu_string *rcu_string_strdup(const char *src, gfp_t mask)
|
||||
|
@ -803,6 +803,15 @@ int btrfs_ref_tree_mod(struct btrfs_fs_info *fs_info,
|
||||
kfree(ref);
|
||||
kfree(ra);
|
||||
goto out_unlock;
|
||||
} else if (be->num_refs == 0) {
|
||||
btrfs_err(fs_info,
|
||||
"trying to do action %d for a bytenr that has 0 total references",
|
||||
action);
|
||||
dump_block_entry(fs_info, be);
|
||||
dump_ref_action(fs_info, ra);
|
||||
kfree(ref);
|
||||
kfree(ra);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!parent) {
|
||||
|
804
fs/btrfs/reflink.c
Normal file
804
fs/btrfs/reflink.c
Normal file
@ -0,0 +1,804 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/iversion.h>
|
||||
#include "compression.h"
|
||||
#include "ctree.h"
|
||||
#include "delalloc-space.h"
|
||||
#include "reflink.h"
|
||||
#include "transaction.h"
|
||||
|
||||
#define BTRFS_MAX_DEDUPE_LEN SZ_16M
|
||||
|
||||
static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode,
|
||||
u64 endoff,
|
||||
const u64 destoff,
|
||||
const u64 olen,
|
||||
int no_time_update)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
int ret;
|
||||
|
||||
inode_inc_iversion(inode);
|
||||
if (!no_time_update)
|
||||
inode->i_mtime = inode->i_ctime = current_time(inode);
|
||||
/*
|
||||
* We round up to the block size at eof when determining which
|
||||
* extents to clone above, but shouldn't round up the file size.
|
||||
*/
|
||||
if (endoff > destoff + olen)
|
||||
endoff = destoff + olen;
|
||||
if (endoff > inode->i_size) {
|
||||
i_size_write(inode, endoff);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
}
|
||||
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
btrfs_end_transaction(trans);
|
||||
goto out;
|
||||
}
|
||||
ret = btrfs_end_transaction(trans);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int copy_inline_to_page(struct inode *inode,
|
||||
const u64 file_offset,
|
||||
char *inline_data,
|
||||
const u64 size,
|
||||
const u64 datal,
|
||||
const u8 comp_type)
|
||||
{
|
||||
const u64 block_size = btrfs_inode_sectorsize(inode);
|
||||
const u64 range_end = file_offset + block_size - 1;
|
||||
const size_t inline_size = size - btrfs_file_extent_calc_inline_size(0);
|
||||
char *data_start = inline_data + btrfs_file_extent_calc_inline_size(0);
|
||||
struct extent_changeset *data_reserved = NULL;
|
||||
struct page *page = NULL;
|
||||
int ret;
|
||||
|
||||
ASSERT(IS_ALIGNED(file_offset, block_size));
|
||||
|
||||
/*
|
||||
* We have flushed and locked the ranges of the source and destination
|
||||
* inodes, we also have locked the inodes, so we are safe to do a
|
||||
* reservation here. Also we must not do the reservation while holding
|
||||
* a transaction open, otherwise we would deadlock.
|
||||
*/
|
||||
ret = btrfs_delalloc_reserve_space(inode, &data_reserved, file_offset,
|
||||
block_size);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
page = find_or_create_page(inode->i_mapping, file_offset >> PAGE_SHIFT,
|
||||
btrfs_alloc_write_mask(inode->i_mapping));
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
set_page_extent_mapped(page);
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, file_offset, range_end,
|
||||
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
|
||||
0, 0, NULL);
|
||||
ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
if (comp_type == BTRFS_COMPRESS_NONE) {
|
||||
char *map;
|
||||
|
||||
map = kmap(page);
|
||||
memcpy(map, data_start, datal);
|
||||
flush_dcache_page(page);
|
||||
kunmap(page);
|
||||
} else {
|
||||
ret = btrfs_decompress(comp_type, data_start, page, 0,
|
||||
inline_size, datal);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
flush_dcache_page(page);
|
||||
}
|
||||
|
||||
/*
|
||||
* If our inline data is smaller then the block/page size, then the
|
||||
* remaining of the block/page is equivalent to zeroes. We had something
|
||||
* like the following done:
|
||||
*
|
||||
* $ xfs_io -f -c "pwrite -S 0xab 0 500" file
|
||||
* $ sync # (or fsync)
|
||||
* $ xfs_io -c "falloc 0 4K" file
|
||||
* $ xfs_io -c "pwrite -S 0xcd 4K 4K"
|
||||
*
|
||||
* So what's in the range [500, 4095] corresponds to zeroes.
|
||||
*/
|
||||
if (datal < block_size) {
|
||||
char *map;
|
||||
|
||||
map = kmap(page);
|
||||
memset(map + datal, 0, block_size - datal);
|
||||
flush_dcache_page(page);
|
||||
kunmap(page);
|
||||
}
|
||||
|
||||
SetPageUptodate(page);
|
||||
ClearPageChecked(page);
|
||||
set_page_dirty(page);
|
||||
out_unlock:
|
||||
if (page) {
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
}
|
||||
if (ret)
|
||||
btrfs_delalloc_release_space(inode, data_reserved, file_offset,
|
||||
block_size, true);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), block_size);
|
||||
out:
|
||||
extent_changeset_free(data_reserved);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Deal with cloning of inline extents. We try to copy the inline extent from
|
||||
* the source inode to destination inode when possible. When not possible we
|
||||
* copy the inline extent's data into the respective page of the inode.
|
||||
*/
|
||||
static int clone_copy_inline_extent(struct inode *dst,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *new_key,
|
||||
const u64 drop_start,
|
||||
const u64 datal,
|
||||
const u64 size,
|
||||
const u8 comp_type,
|
||||
char *inline_data,
|
||||
struct btrfs_trans_handle **trans_out)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb);
|
||||
struct btrfs_root *root = BTRFS_I(dst)->root;
|
||||
const u64 aligned_end = ALIGN(new_key->offset + datal,
|
||||
fs_info->sectorsize);
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
int ret;
|
||||
struct btrfs_key key;
|
||||
|
||||
if (new_key->offset > 0) {
|
||||
ret = copy_inline_to_page(dst, new_key->offset, inline_data,
|
||||
size, datal, comp_type);
|
||||
goto out;
|
||||
}
|
||||
|
||||
key.objectid = btrfs_ino(BTRFS_I(dst));
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = 0;
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret > 0) {
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
else if (ret > 0)
|
||||
goto copy_inline_extent;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
||||
if (key.objectid == btrfs_ino(BTRFS_I(dst)) &&
|
||||
key.type == BTRFS_EXTENT_DATA_KEY) {
|
||||
/*
|
||||
* There's an implicit hole at file offset 0, copy the
|
||||
* inline extent's data to the page.
|
||||
*/
|
||||
ASSERT(key.offset > 0);
|
||||
ret = copy_inline_to_page(dst, new_key->offset,
|
||||
inline_data, size, datal,
|
||||
comp_type);
|
||||
goto out;
|
||||
}
|
||||
} else if (i_size_read(dst) <= datal) {
|
||||
struct btrfs_file_extent_item *ei;
|
||||
|
||||
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
/*
|
||||
* If it's an inline extent replace it with the source inline
|
||||
* extent, otherwise copy the source inline extent data into
|
||||
* the respective page at the destination inode.
|
||||
*/
|
||||
if (btrfs_file_extent_type(path->nodes[0], ei) ==
|
||||
BTRFS_FILE_EXTENT_INLINE)
|
||||
goto copy_inline_extent;
|
||||
|
||||
ret = copy_inline_to_page(dst, new_key->offset, inline_data,
|
||||
size, datal, comp_type);
|
||||
goto out;
|
||||
}
|
||||
|
||||
copy_inline_extent:
|
||||
ret = 0;
|
||||
/*
|
||||
* We have no extent items, or we have an extent at offset 0 which may
|
||||
* or may not be inlined. All these cases are dealt the same way.
|
||||
*/
|
||||
if (i_size_read(dst) > datal) {
|
||||
/*
|
||||
* At the destination offset 0 we have either a hole, a regular
|
||||
* extent or an inline extent larger then the one we want to
|
||||
* clone. Deal with all these cases by copying the inline extent
|
||||
* data into the respective page at the destination inode.
|
||||
*/
|
||||
ret = copy_inline_to_page(dst, new_key->offset, inline_data,
|
||||
size, datal, comp_type);
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_release_path(path);
|
||||
/*
|
||||
* If we end up here it means were copy the inline extent into a leaf
|
||||
* of the destination inode. We know we will drop or adjust at most one
|
||||
* extent item in the destination root.
|
||||
*
|
||||
* 1 unit - adjusting old extent (we may have to split it)
|
||||
* 1 unit - add new extent
|
||||
* 1 unit - inode update
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 3);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
goto out;
|
||||
}
|
||||
ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = btrfs_insert_empty_item(trans, root, path, new_key, size);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
write_extent_buffer(path->nodes[0], inline_data,
|
||||
btrfs_item_ptr_offset(path->nodes[0],
|
||||
path->slots[0]),
|
||||
size);
|
||||
inode_add_bytes(dst, datal);
|
||||
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
|
||||
out:
|
||||
if (!ret && !trans) {
|
||||
/*
|
||||
* No transaction here means we copied the inline extent into a
|
||||
* page of the destination inode.
|
||||
*
|
||||
* 1 unit to update inode item
|
||||
*/
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
}
|
||||
}
|
||||
if (ret && trans) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
btrfs_end_transaction(trans);
|
||||
}
|
||||
if (!ret)
|
||||
*trans_out = trans;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_clone() - clone a range from inode file to another
|
||||
*
|
||||
* @src: Inode to clone from
|
||||
* @inode: Inode to clone to
|
||||
* @off: Offset within source to start clone from
|
||||
* @olen: Original length, passed by user, of range to clone
|
||||
* @olen_aligned: Block-aligned value of olen
|
||||
* @destoff: Offset within @inode to start clone
|
||||
* @no_time_update: Whether to update mtime/ctime on the target inode
|
||||
*/
|
||||
static int btrfs_clone(struct inode *src, struct inode *inode,
|
||||
const u64 off, const u64 olen, const u64 olen_aligned,
|
||||
const u64 destoff, int no_time_update)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_path *path = NULL;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_trans_handle *trans;
|
||||
char *buf = NULL;
|
||||
struct btrfs_key key;
|
||||
u32 nritems;
|
||||
int slot;
|
||||
int ret;
|
||||
const u64 len = olen_aligned;
|
||||
u64 last_dest_end = destoff;
|
||||
|
||||
ret = -ENOMEM;
|
||||
buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return ret;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
kvfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
path->reada = READA_FORWARD;
|
||||
/* Clone data */
|
||||
key.objectid = btrfs_ino(BTRFS_I(src));
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = off;
|
||||
|
||||
while (1) {
|
||||
u64 next_key_min_offset = key.offset + 1;
|
||||
struct btrfs_file_extent_item *extent;
|
||||
int type;
|
||||
u32 size;
|
||||
struct btrfs_key new_key;
|
||||
u64 disko = 0, diskl = 0;
|
||||
u64 datao = 0, datal = 0;
|
||||
u8 comp;
|
||||
u64 drop_start;
|
||||
|
||||
/* Note the key will change type as we walk through the tree */
|
||||
path->leave_spinning = 1;
|
||||
ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,
|
||||
0, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/*
|
||||
* First search, if no extent item that starts at offset off was
|
||||
* found but the previous item is an extent item, it's possible
|
||||
* it might overlap our target range, therefore process it.
|
||||
*/
|
||||
if (key.offset == off && ret > 0 && path->slots[0] > 0) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key,
|
||||
path->slots[0] - 1);
|
||||
if (key.type == BTRFS_EXTENT_DATA_KEY)
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||
process_slot:
|
||||
if (path->slots[0] >= nritems) {
|
||||
ret = btrfs_next_leaf(BTRFS_I(src)->root, path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0)
|
||||
break;
|
||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
if (key.type > BTRFS_EXTENT_DATA_KEY ||
|
||||
key.objectid != btrfs_ino(BTRFS_I(src)))
|
||||
break;
|
||||
|
||||
ASSERT(key.type == BTRFS_EXTENT_DATA_KEY);
|
||||
|
||||
extent = btrfs_item_ptr(leaf, slot,
|
||||
struct btrfs_file_extent_item);
|
||||
comp = btrfs_file_extent_compression(leaf, extent);
|
||||
type = btrfs_file_extent_type(leaf, extent);
|
||||
if (type == BTRFS_FILE_EXTENT_REG ||
|
||||
type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
disko = btrfs_file_extent_disk_bytenr(leaf, extent);
|
||||
diskl = btrfs_file_extent_disk_num_bytes(leaf, extent);
|
||||
datao = btrfs_file_extent_offset(leaf, extent);
|
||||
datal = btrfs_file_extent_num_bytes(leaf, extent);
|
||||
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
/* Take upper bound, may be compressed */
|
||||
datal = btrfs_file_extent_ram_bytes(leaf, extent);
|
||||
}
|
||||
|
||||
/*
|
||||
* The first search might have left us at an extent item that
|
||||
* ends before our target range's start, can happen if we have
|
||||
* holes and NO_HOLES feature enabled.
|
||||
*/
|
||||
if (key.offset + datal <= off) {
|
||||
path->slots[0]++;
|
||||
goto process_slot;
|
||||
} else if (key.offset >= off + len) {
|
||||
break;
|
||||
}
|
||||
next_key_min_offset = key.offset + datal;
|
||||
size = btrfs_item_size_nr(leaf, slot);
|
||||
read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot),
|
||||
size);
|
||||
|
||||
btrfs_release_path(path);
|
||||
path->leave_spinning = 0;
|
||||
|
||||
memcpy(&new_key, &key, sizeof(new_key));
|
||||
new_key.objectid = btrfs_ino(BTRFS_I(inode));
|
||||
if (off <= key.offset)
|
||||
new_key.offset = key.offset + destoff - off;
|
||||
else
|
||||
new_key.offset = destoff;
|
||||
|
||||
/*
|
||||
* Deal with a hole that doesn't have an extent item that
|
||||
* represents it (NO_HOLES feature enabled).
|
||||
* This hole is either in the middle of the cloning range or at
|
||||
* the beginning (fully overlaps it or partially overlaps it).
|
||||
*/
|
||||
if (new_key.offset != last_dest_end)
|
||||
drop_start = last_dest_end;
|
||||
else
|
||||
drop_start = new_key.offset;
|
||||
|
||||
if (type == BTRFS_FILE_EXTENT_REG ||
|
||||
type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
struct btrfs_clone_extent_info clone_info;
|
||||
|
||||
/*
|
||||
* a | --- range to clone ---| b
|
||||
* | ------------- extent ------------- |
|
||||
*/
|
||||
|
||||
/* Subtract range b */
|
||||
if (key.offset + datal > off + len)
|
||||
datal = off + len - key.offset;
|
||||
|
||||
/* Subtract range a */
|
||||
if (off > key.offset) {
|
||||
datao += off - key.offset;
|
||||
datal -= off - key.offset;
|
||||
}
|
||||
|
||||
clone_info.disk_offset = disko;
|
||||
clone_info.disk_len = diskl;
|
||||
clone_info.data_offset = datao;
|
||||
clone_info.data_len = datal;
|
||||
clone_info.file_offset = new_key.offset;
|
||||
clone_info.extent_buf = buf;
|
||||
clone_info.item_size = size;
|
||||
ret = btrfs_punch_hole_range(inode, path, drop_start,
|
||||
new_key.offset + datal - 1, &clone_info,
|
||||
&trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
/*
|
||||
* Inline extents always have to start at file offset 0
|
||||
* and can never be bigger then the sector size. We can
|
||||
* never clone only parts of an inline extent, since all
|
||||
* reflink operations must start at a sector size aligned
|
||||
* offset, and the length must be aligned too or end at
|
||||
* the i_size (which implies the whole inlined data).
|
||||
*/
|
||||
ASSERT(key.offset == 0);
|
||||
ASSERT(datal <= fs_info->sectorsize);
|
||||
if (key.offset != 0 || datal > fs_info->sectorsize)
|
||||
return -EUCLEAN;
|
||||
|
||||
ret = clone_copy_inline_extent(inode, path, &new_key,
|
||||
drop_start, datal, size,
|
||||
comp, buf, &trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_release_path(path);
|
||||
|
||||
last_dest_end = ALIGN(new_key.offset + datal,
|
||||
fs_info->sectorsize);
|
||||
ret = clone_finish_inode_update(trans, inode, last_dest_end,
|
||||
destoff, olen, no_time_update);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (new_key.offset + datal >= destoff + len)
|
||||
break;
|
||||
|
||||
btrfs_release_path(path);
|
||||
key.offset = next_key_min_offset;
|
||||
|
||||
if (fatal_signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
if (last_dest_end < destoff + len) {
|
||||
/*
|
||||
* We have an implicit hole that fully or partially overlaps our
|
||||
* cloning range at its end. This means that we either have the
|
||||
* NO_HOLES feature enabled or the implicit hole happened due to
|
||||
* mixing buffered and direct IO writes against this file.
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
path->leave_spinning = 0;
|
||||
|
||||
ret = btrfs_punch_hole_range(inode, path, last_dest_end,
|
||||
destoff + len - 1, NULL, &trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = clone_finish_inode_update(trans, inode, destoff + len,
|
||||
destoff, olen, no_time_update);
|
||||
}
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
kvfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len)
|
||||
{
|
||||
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
|
||||
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
|
||||
}
|
||||
|
||||
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len)
|
||||
{
|
||||
if (inode1 < inode2) {
|
||||
swap(inode1, inode2);
|
||||
swap(loff1, loff2);
|
||||
} else if (inode1 == inode2 && loff2 < loff1) {
|
||||
swap(loff1, loff2);
|
||||
}
|
||||
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
|
||||
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
|
||||
}
|
||||
|
||||
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
|
||||
struct inode *dst, u64 dst_loff)
|
||||
{
|
||||
const u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Lock destination range to serialize with concurrent readpages() and
|
||||
* source range to serialize with relocation.
|
||||
*/
|
||||
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
|
||||
ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
|
||||
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
|
||||
struct inode *dst, u64 dst_loff)
|
||||
{
|
||||
int ret;
|
||||
u64 i, tail_len, chunk_count;
|
||||
struct btrfs_root *root_dst = BTRFS_I(dst)->root;
|
||||
|
||||
spin_lock(&root_dst->root_item_lock);
|
||||
if (root_dst->send_in_progress) {
|
||||
btrfs_warn_rl(root_dst->fs_info,
|
||||
"cannot deduplicate to root %llu while send operations are using it (%d in progress)",
|
||||
root_dst->root_key.objectid,
|
||||
root_dst->send_in_progress);
|
||||
spin_unlock(&root_dst->root_item_lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
root_dst->dedupe_in_progress++;
|
||||
spin_unlock(&root_dst->root_item_lock);
|
||||
|
||||
tail_len = olen % BTRFS_MAX_DEDUPE_LEN;
|
||||
chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN);
|
||||
|
||||
for (i = 0; i < chunk_count; i++) {
|
||||
ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN,
|
||||
dst, dst_loff);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
loff += BTRFS_MAX_DEDUPE_LEN;
|
||||
dst_loff += BTRFS_MAX_DEDUPE_LEN;
|
||||
}
|
||||
|
||||
if (tail_len > 0)
|
||||
ret = btrfs_extent_same_range(src, loff, tail_len, dst, dst_loff);
|
||||
out:
|
||||
spin_lock(&root_dst->root_item_lock);
|
||||
root_dst->dedupe_in_progress--;
|
||||
spin_unlock(&root_dst->root_item_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
u64 off, u64 olen, u64 destoff)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct inode *src = file_inode(file_src);
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
int ret;
|
||||
int wb_ret;
|
||||
u64 len = olen;
|
||||
u64 bs = fs_info->sb->s_blocksize;
|
||||
|
||||
/*
|
||||
* VFS's generic_remap_file_range_prep() protects us from cloning the
|
||||
* eof block into the middle of a file, which would result in corruption
|
||||
* if the file size is not blocksize aligned. So we don't need to check
|
||||
* for that case here.
|
||||
*/
|
||||
if (off + len == src->i_size)
|
||||
len = ALIGN(src->i_size, bs) - off;
|
||||
|
||||
if (destoff > inode->i_size) {
|
||||
const u64 wb_start = ALIGN_DOWN(inode->i_size, bs);
|
||||
|
||||
ret = btrfs_cont_expand(inode, inode->i_size, destoff);
|
||||
if (ret)
|
||||
return ret;
|
||||
/*
|
||||
* We may have truncated the last block if the inode's size is
|
||||
* not sector size aligned, so we need to wait for writeback to
|
||||
* complete before proceeding further, otherwise we can race
|
||||
* with cloning and attempt to increment a reference to an
|
||||
* extent that no longer exists (writeback completed right after
|
||||
* we found the previous extent covering eof and before we
|
||||
* attempted to increment its reference count).
|
||||
*/
|
||||
ret = btrfs_wait_ordered_range(inode, wb_start,
|
||||
destoff - wb_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock destination range to serialize with concurrent readpages() and
|
||||
* source range to serialize with relocation.
|
||||
*/
|
||||
btrfs_double_extent_lock(src, off, inode, destoff, len);
|
||||
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
|
||||
btrfs_double_extent_unlock(src, off, inode, destoff, len);
|
||||
|
||||
/*
|
||||
* We may have copied an inline extent into a page of the destination
|
||||
* range, so wait for writeback to complete before truncating pages
|
||||
* from the page cache. This is a rare case.
|
||||
*/
|
||||
wb_ret = btrfs_wait_ordered_range(inode, destoff, len);
|
||||
ret = ret ? ret : wb_ret;
|
||||
/*
|
||||
* Truncate page cache pages so that future reads will see the cloned
|
||||
* data immediately and not the previous data.
|
||||
*/
|
||||
truncate_inode_pages_range(&inode->i_data,
|
||||
round_down(destoff, PAGE_SIZE),
|
||||
round_up(destoff + len, PAGE_SIZE) - 1);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out,
|
||||
loff_t *len, unsigned int remap_flags)
|
||||
{
|
||||
struct inode *inode_in = file_inode(file_in);
|
||||
struct inode *inode_out = file_inode(file_out);
|
||||
u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize;
|
||||
bool same_inode = inode_out == inode_in;
|
||||
u64 wb_len;
|
||||
int ret;
|
||||
|
||||
if (!(remap_flags & REMAP_FILE_DEDUP)) {
|
||||
struct btrfs_root *root_out = BTRFS_I(inode_out)->root;
|
||||
|
||||
if (btrfs_root_readonly(root_out))
|
||||
return -EROFS;
|
||||
|
||||
if (file_in->f_path.mnt != file_out->f_path.mnt ||
|
||||
inode_in->i_sb != inode_out->i_sb)
|
||||
return -EXDEV;
|
||||
}
|
||||
|
||||
/* Don't make the dst file partly checksummed */
|
||||
if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) !=
|
||||
(BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that the inodes are locked, we need to start writeback ourselves
|
||||
* and can not rely on the writeback from the VFS's generic helper
|
||||
* generic_remap_file_range_prep() because:
|
||||
*
|
||||
* 1) For compression we must call filemap_fdatawrite_range() range
|
||||
* twice (btrfs_fdatawrite_range() does it for us), and the generic
|
||||
* helper only calls it once;
|
||||
*
|
||||
* 2) filemap_fdatawrite_range(), called by the generic helper only
|
||||
* waits for the writeback to complete, i.e. for IO to be done, and
|
||||
* not for the ordered extents to complete. We need to wait for them
|
||||
* to complete so that new file extent items are in the fs tree.
|
||||
*/
|
||||
if (*len == 0 && !(remap_flags & REMAP_FILE_DEDUP))
|
||||
wb_len = ALIGN(inode_in->i_size, bs) - ALIGN_DOWN(pos_in, bs);
|
||||
else
|
||||
wb_len = ALIGN(*len, bs);
|
||||
|
||||
/*
|
||||
* Since we don't lock ranges, wait for ongoing lockless dio writes (as
|
||||
* any in progress could create its ordered extents after we wait for
|
||||
* existing ordered extents below).
|
||||
*/
|
||||
inode_dio_wait(inode_in);
|
||||
if (!same_inode)
|
||||
inode_dio_wait(inode_out);
|
||||
|
||||
/*
|
||||
* Workaround to make sure NOCOW buffered write reach disk as NOCOW.
|
||||
*
|
||||
* Btrfs' back references do not have a block level granularity, they
|
||||
* work at the whole extent level.
|
||||
* NOCOW buffered write without data space reserved may not be able
|
||||
* to fall back to CoW due to lack of data space, thus could cause
|
||||
* data loss.
|
||||
*
|
||||
* Here we take a shortcut by flushing the whole inode, so that all
|
||||
* nocow write should reach disk as nocow before we increase the
|
||||
* reference of the extent. We could do better by only flushing NOCOW
|
||||
* data, but that needs extra accounting.
|
||||
*
|
||||
* Also we don't need to check ASYNC_EXTENT, as async extent will be
|
||||
* CoWed anyway, not affecting nocow part.
|
||||
*/
|
||||
ret = filemap_flush(inode_in->i_mapping);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
|
||||
wb_len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs),
|
||||
wb_len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out,
|
||||
len, remap_flags);
|
||||
}
|
||||
|
||||
loff_t btrfs_remap_file_range(struct file *src_file, loff_t off,
|
||||
struct file *dst_file, loff_t destoff, loff_t len,
|
||||
unsigned int remap_flags)
|
||||
{
|
||||
struct inode *src_inode = file_inode(src_file);
|
||||
struct inode *dst_inode = file_inode(dst_file);
|
||||
bool same_inode = dst_inode == src_inode;
|
||||
int ret;
|
||||
|
||||
if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY))
|
||||
return -EINVAL;
|
||||
|
||||
if (same_inode)
|
||||
inode_lock(src_inode);
|
||||
else
|
||||
lock_two_nondirectories(src_inode, dst_inode);
|
||||
|
||||
ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff,
|
||||
&len, remap_flags);
|
||||
if (ret < 0 || len == 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (remap_flags & REMAP_FILE_DEDUP)
|
||||
ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff);
|
||||
else
|
||||
ret = btrfs_clone_files(dst_file, src_file, off, len, destoff);
|
||||
|
||||
out_unlock:
|
||||
if (same_inode)
|
||||
inode_unlock(src_inode);
|
||||
else
|
||||
unlock_two_nondirectories(src_inode, dst_inode);
|
||||
|
||||
return ret < 0 ? ret : len;
|
||||
}
|
12
fs/btrfs/reflink.h
Normal file
12
fs/btrfs/reflink.h
Normal file
@ -0,0 +1,12 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_REFLINK_H
|
||||
#define BTRFS_REFLINK_H
|
||||
|
||||
#include <linux/fs.h>
|
||||
|
||||
loff_t btrfs_remap_file_range(struct file *file_in, loff_t pos_in,
|
||||
struct file *file_out, loff_t pos_out,
|
||||
loff_t len, unsigned int remap_flags);
|
||||
|
||||
#endif /* BTRFS_REFLINK_H */
|
File diff suppressed because it is too large
Load Diff
@ -22,7 +22,6 @@
|
||||
static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
|
||||
struct btrfs_root_item *item)
|
||||
{
|
||||
uuid_le uuid;
|
||||
u32 len;
|
||||
int need_reset = 0;
|
||||
|
||||
@ -44,8 +43,7 @@ static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
|
||||
sizeof(*item) - offsetof(struct btrfs_root_item,
|
||||
generation_v2));
|
||||
|
||||
uuid_le_gen(&uuid);
|
||||
memcpy(item->uuid, uuid.b, BTRFS_UUID_SIZE);
|
||||
generate_random_guid(item->uuid);
|
||||
}
|
||||
}
|
||||
|
||||
@ -255,25 +253,7 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
|
||||
root_key.objectid = key.offset;
|
||||
key.offset++;
|
||||
|
||||
/*
|
||||
* The root might have been inserted already, as before we look
|
||||
* for orphan roots, log replay might have happened, which
|
||||
* triggers a transaction commit and qgroup accounting, which
|
||||
* in turn reads and inserts fs roots while doing backref
|
||||
* walking.
|
||||
*/
|
||||
root = btrfs_lookup_fs_root(fs_info, root_key.objectid);
|
||||
if (root) {
|
||||
WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
|
||||
&root->state));
|
||||
if (btrfs_root_refs(&root->root_item) == 0) {
|
||||
set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
|
||||
btrfs_add_dead_root(root);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
root = btrfs_read_fs_root(tree_root, &root_key);
|
||||
root = btrfs_get_fs_root(fs_info, &root_key, false);
|
||||
err = PTR_ERR_OR_ZERO(root);
|
||||
if (err && err != -ENOENT) {
|
||||
break;
|
||||
@ -300,25 +280,12 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info)
|
||||
continue;
|
||||
}
|
||||
|
||||
err = btrfs_init_fs_root(root);
|
||||
if (err) {
|
||||
btrfs_free_fs_root(root);
|
||||
break;
|
||||
}
|
||||
|
||||
set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
|
||||
|
||||
err = btrfs_insert_fs_root(fs_info, root);
|
||||
if (err) {
|
||||
BUG_ON(err == -EEXIST);
|
||||
btrfs_free_fs_root(root);
|
||||
break;
|
||||
}
|
||||
|
||||
WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state));
|
||||
if (btrfs_root_refs(&root->root_item) == 0) {
|
||||
set_bit(BTRFS_ROOT_DEAD_TREE, &root->state);
|
||||
btrfs_add_dead_root(root);
|
||||
}
|
||||
btrfs_put_root(root);
|
||||
}
|
||||
|
||||
btrfs_free_path(path);
|
||||
@ -553,5 +520,5 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
|
||||
void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_rsv *rsv)
|
||||
{
|
||||
btrfs_block_rsv_release(fs_info, rsv, (u64)-1);
|
||||
btrfs_block_rsv_release(fs_info, rsv, (u64)-1, NULL);
|
||||
}
|
||||
|
@ -149,7 +149,7 @@ struct scrub_parity {
|
||||
*/
|
||||
unsigned long *ebitmap;
|
||||
|
||||
unsigned long bitmap[0];
|
||||
unsigned long bitmap[];
|
||||
};
|
||||
|
||||
struct scrub_ctx {
|
||||
@ -653,7 +653,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
root_key.objectid = root;
|
||||
root_key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
root_key.offset = (u64)-1;
|
||||
local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
|
||||
local_root = btrfs_get_fs_root(fs_info, &root_key, true);
|
||||
if (IS_ERR(local_root)) {
|
||||
ret = PTR_ERR(local_root);
|
||||
goto err;
|
||||
@ -668,6 +668,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
|
||||
ret = btrfs_search_slot(NULL, local_root, &key, swarn->path, 0, 0);
|
||||
if (ret) {
|
||||
btrfs_put_root(local_root);
|
||||
btrfs_release_path(swarn->path);
|
||||
goto err;
|
||||
}
|
||||
@ -688,6 +689,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
ipath = init_ipath(4096, local_root, swarn->path);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (IS_ERR(ipath)) {
|
||||
btrfs_put_root(local_root);
|
||||
ret = PTR_ERR(ipath);
|
||||
ipath = NULL;
|
||||
goto err;
|
||||
@ -711,6 +713,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root,
|
||||
min(isize - offset, (u64)PAGE_SIZE), nlink,
|
||||
(char *)(unsigned long)ipath->fspath->val[i]);
|
||||
|
||||
btrfs_put_root(local_root);
|
||||
free_ipath(ipath);
|
||||
return 0;
|
||||
|
||||
|
@ -5586,10 +5586,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_root *root = sctx->send_root;
|
||||
struct btrfs_file_extent_item *fi;
|
||||
struct btrfs_key key;
|
||||
u64 extent_end;
|
||||
u8 type;
|
||||
int ret;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
@ -5609,18 +5606,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset)
|
||||
if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
goto out;
|
||||
|
||||
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
type = btrfs_file_extent_type(path->nodes[0], fi);
|
||||
if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
|
||||
extent_end = ALIGN(key.offset + size,
|
||||
sctx->send_root->fs_info->sectorsize);
|
||||
} else {
|
||||
extent_end = key.offset +
|
||||
btrfs_file_extent_num_bytes(path->nodes[0], fi);
|
||||
}
|
||||
sctx->cur_inode_last_extent = extent_end;
|
||||
sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
@ -5674,16 +5660,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx,
|
||||
break;
|
||||
|
||||
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
|
||||
if (btrfs_file_extent_type(leaf, fi) ==
|
||||
BTRFS_FILE_EXTENT_INLINE) {
|
||||
u64 size = btrfs_file_extent_ram_bytes(leaf, fi);
|
||||
|
||||
extent_end = ALIGN(key.offset + size,
|
||||
root->fs_info->sectorsize);
|
||||
} else {
|
||||
extent_end = key.offset +
|
||||
btrfs_file_extent_num_bytes(leaf, fi);
|
||||
}
|
||||
extent_end = btrfs_file_extent_end(path);
|
||||
if (extent_end <= start)
|
||||
goto next;
|
||||
if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) {
|
||||
@ -5704,9 +5681,6 @@ out:
|
||||
static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
|
||||
struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_file_extent_item *fi;
|
||||
u64 extent_end;
|
||||
u8 type;
|
||||
int ret = 0;
|
||||
|
||||
if (sctx->cur_ino != key->objectid || !need_send_hole(sctx))
|
||||
@ -5718,18 +5692,6 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
|
||||
return ret;
|
||||
}
|
||||
|
||||
fi = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
type = btrfs_file_extent_type(path->nodes[0], fi);
|
||||
if (type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi);
|
||||
extent_end = ALIGN(key->offset + size,
|
||||
sctx->send_root->fs_info->sectorsize);
|
||||
} else {
|
||||
extent_end = key->offset +
|
||||
btrfs_file_extent_num_bytes(path->nodes[0], fi);
|
||||
}
|
||||
|
||||
if (path->slots[0] == 0 &&
|
||||
sctx->cur_inode_last_extent < key->offset) {
|
||||
/*
|
||||
@ -5755,7 +5717,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path,
|
||||
else
|
||||
ret = 0;
|
||||
}
|
||||
sctx->cur_inode_last_extent = extent_end;
|
||||
sctx->cur_inode_last_extent = btrfs_file_extent_end(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -7066,7 +7028,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
int clone_sources_to_rollback = 0;
|
||||
unsigned alloc_size;
|
||||
int sort_clone_roots = 0;
|
||||
int index;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
@ -7193,11 +7154,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
index = srcu_read_lock(&fs_info->subvol_srcu);
|
||||
|
||||
clone_root = btrfs_read_fs_root_no_name(fs_info, &key);
|
||||
clone_root = btrfs_get_fs_root(fs_info, &key, true);
|
||||
if (IS_ERR(clone_root)) {
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = PTR_ERR(clone_root);
|
||||
goto out;
|
||||
}
|
||||
@ -7205,20 +7163,19 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
if (!btrfs_root_readonly(clone_root) ||
|
||||
btrfs_root_dead(clone_root)) {
|
||||
spin_unlock(&clone_root->root_item_lock);
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
btrfs_put_root(clone_root);
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
if (clone_root->dedupe_in_progress) {
|
||||
dedupe_in_progress_warn(clone_root);
|
||||
spin_unlock(&clone_root->root_item_lock);
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
btrfs_put_root(clone_root);
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
clone_root->send_in_progress++;
|
||||
spin_unlock(&clone_root->root_item_lock);
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
|
||||
sctx->clone_roots[i].root = clone_root;
|
||||
clone_sources_to_rollback = i + 1;
|
||||
@ -7232,11 +7189,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
|
||||
index = srcu_read_lock(&fs_info->subvol_srcu);
|
||||
|
||||
sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key);
|
||||
sctx->parent_root = btrfs_get_fs_root(fs_info, &key, true);
|
||||
if (IS_ERR(sctx->parent_root)) {
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = PTR_ERR(sctx->parent_root);
|
||||
goto out;
|
||||
}
|
||||
@ -7246,20 +7200,16 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
if (!btrfs_root_readonly(sctx->parent_root) ||
|
||||
btrfs_root_dead(sctx->parent_root)) {
|
||||
spin_unlock(&sctx->parent_root->root_item_lock);
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
if (sctx->parent_root->dedupe_in_progress) {
|
||||
dedupe_in_progress_warn(sctx->parent_root);
|
||||
spin_unlock(&sctx->parent_root->root_item_lock);
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
spin_unlock(&sctx->parent_root->root_item_lock);
|
||||
|
||||
srcu_read_unlock(&fs_info->subvol_srcu, index);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -7267,7 +7217,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
|
||||
* is behind the current send position. This is checked while searching
|
||||
* for possible clone sources.
|
||||
*/
|
||||
sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root;
|
||||
sctx->clone_roots[sctx->clone_roots_cnt++].root =
|
||||
btrfs_grab_root(sctx->send_root);
|
||||
|
||||
/* We do a bsearch later */
|
||||
sort(sctx->clone_roots, sctx->clone_roots_cnt,
|
||||
@ -7352,18 +7303,24 @@ out:
|
||||
}
|
||||
|
||||
if (sort_clone_roots) {
|
||||
for (i = 0; i < sctx->clone_roots_cnt; i++)
|
||||
for (i = 0; i < sctx->clone_roots_cnt; i++) {
|
||||
btrfs_root_dec_send_in_progress(
|
||||
sctx->clone_roots[i].root);
|
||||
btrfs_put_root(sctx->clone_roots[i].root);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; sctx && i < clone_sources_to_rollback; i++)
|
||||
for (i = 0; sctx && i < clone_sources_to_rollback; i++) {
|
||||
btrfs_root_dec_send_in_progress(
|
||||
sctx->clone_roots[i].root);
|
||||
btrfs_put_root(sctx->clone_roots[i].root);
|
||||
}
|
||||
|
||||
btrfs_root_dec_send_in_progress(send_root);
|
||||
}
|
||||
if (sctx && !IS_ERR_OR_NULL(sctx->parent_root))
|
||||
if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) {
|
||||
btrfs_root_dec_send_in_progress(sctx->parent_root);
|
||||
btrfs_put_root(sctx->parent_root);
|
||||
}
|
||||
|
||||
kvfree(clone_sources_tmp);
|
||||
|
||||
|
@ -10,6 +10,153 @@
|
||||
#include "transaction.h"
|
||||
#include "block-group.h"
|
||||
|
||||
/*
|
||||
* HOW DOES SPACE RESERVATION WORK
|
||||
*
|
||||
* If you want to know about delalloc specifically, there is a separate comment
|
||||
* for that with the delalloc code. This comment is about how the whole system
|
||||
* works generally.
|
||||
*
|
||||
* BASIC CONCEPTS
|
||||
*
|
||||
* 1) space_info. This is the ultimate arbiter of how much space we can use.
|
||||
* There's a description of the bytes_ fields with the struct declaration,
|
||||
* refer to that for specifics on each field. Suffice it to say that for
|
||||
* reservations we care about total_bytes - SUM(space_info->bytes_) when
|
||||
* determining if there is space to make an allocation. There is a space_info
|
||||
* for METADATA, SYSTEM, and DATA areas.
|
||||
*
|
||||
* 2) block_rsv's. These are basically buckets for every different type of
|
||||
* metadata reservation we have. You can see the comment in the block_rsv
|
||||
* code on the rules for each type, but generally block_rsv->reserved is how
|
||||
* much space is accounted for in space_info->bytes_may_use.
|
||||
*
|
||||
* 3) btrfs_calc*_size. These are the worst case calculations we used based
|
||||
* on the number of items we will want to modify. We have one for changing
|
||||
* items, and one for inserting new items. Generally we use these helpers to
|
||||
* determine the size of the block reserves, and then use the actual bytes
|
||||
* values to adjust the space_info counters.
|
||||
*
|
||||
* MAKING RESERVATIONS, THE NORMAL CASE
|
||||
*
|
||||
* We call into either btrfs_reserve_data_bytes() or
|
||||
* btrfs_reserve_metadata_bytes(), depending on which we're looking for, with
|
||||
* num_bytes we want to reserve.
|
||||
*
|
||||
* ->reserve
|
||||
* space_info->bytes_may_reserve += num_bytes
|
||||
*
|
||||
* ->extent allocation
|
||||
* Call btrfs_add_reserved_bytes() which does
|
||||
* space_info->bytes_may_reserve -= num_bytes
|
||||
* space_info->bytes_reserved += extent_bytes
|
||||
*
|
||||
* ->insert reference
|
||||
* Call btrfs_update_block_group() which does
|
||||
* space_info->bytes_reserved -= extent_bytes
|
||||
* space_info->bytes_used += extent_bytes
|
||||
*
|
||||
* MAKING RESERVATIONS, FLUSHING NORMALLY (non-priority)
|
||||
*
|
||||
* Assume we are unable to simply make the reservation because we do not have
|
||||
* enough space
|
||||
*
|
||||
* -> __reserve_bytes
|
||||
* create a reserve_ticket with ->bytes set to our reservation, add it to
|
||||
* the tail of space_info->tickets, kick async flush thread
|
||||
*
|
||||
* ->handle_reserve_ticket
|
||||
* wait on ticket->wait for ->bytes to be reduced to 0, or ->error to be set
|
||||
* on the ticket.
|
||||
*
|
||||
* -> btrfs_async_reclaim_metadata_space/btrfs_async_reclaim_data_space
|
||||
* Flushes various things attempting to free up space.
|
||||
*
|
||||
* -> btrfs_try_granting_tickets()
|
||||
* This is called by anything that either subtracts space from
|
||||
* space_info->bytes_may_use, ->bytes_pinned, etc, or adds to the
|
||||
* space_info->total_bytes. This loops through the ->priority_tickets and
|
||||
* then the ->tickets list checking to see if the reservation can be
|
||||
* completed. If it can the space is added to space_info->bytes_may_use and
|
||||
* the ticket is woken up.
|
||||
*
|
||||
* -> ticket wakeup
|
||||
* Check if ->bytes == 0, if it does we got our reservation and we can carry
|
||||
* on, if not return the appropriate error (ENOSPC, but can be EINTR if we
|
||||
* were interrupted.)
|
||||
*
|
||||
* MAKING RESERVATIONS, FLUSHING HIGH PRIORITY
|
||||
*
|
||||
* Same as the above, except we add ourselves to the
|
||||
* space_info->priority_tickets, and we do not use ticket->wait, we simply
|
||||
* call flush_space() ourselves for the states that are safe for us to call
|
||||
* without deadlocking and hope for the best.
|
||||
*
|
||||
* THE FLUSHING STATES
|
||||
*
|
||||
* Generally speaking we will have two cases for each state, a "nice" state
|
||||
* and a "ALL THE THINGS" state. In btrfs we delay a lot of work in order to
|
||||
* reduce the locking over head on the various trees, and even to keep from
|
||||
* doing any work at all in the case of delayed refs. Each of these delayed
|
||||
* things however hold reservations, and so letting them run allows us to
|
||||
* reclaim space so we can make new reservations.
|
||||
*
|
||||
* FLUSH_DELAYED_ITEMS
|
||||
* Every inode has a delayed item to update the inode. Take a simple write
|
||||
* for example, we would update the inode item at write time to update the
|
||||
* mtime, and then again at finish_ordered_io() time in order to update the
|
||||
* isize or bytes. We keep these delayed items to coalesce these operations
|
||||
* into a single operation done on demand. These are an easy way to reclaim
|
||||
* metadata space.
|
||||
*
|
||||
* FLUSH_DELALLOC
|
||||
* Look at the delalloc comment to get an idea of how much space is reserved
|
||||
* for delayed allocation. We can reclaim some of this space simply by
|
||||
* running delalloc, but usually we need to wait for ordered extents to
|
||||
* reclaim the bulk of this space.
|
||||
*
|
||||
* FLUSH_DELAYED_REFS
|
||||
* We have a block reserve for the outstanding delayed refs space, and every
|
||||
* delayed ref operation holds a reservation. Running these is a quick way
|
||||
* to reclaim space, but we want to hold this until the end because COW can
|
||||
* churn a lot and we can avoid making some extent tree modifications if we
|
||||
* are able to delay for as long as possible.
|
||||
*
|
||||
* ALLOC_CHUNK
|
||||
* We will skip this the first time through space reservation, because of
|
||||
* overcommit and we don't want to have a lot of useless metadata space when
|
||||
* our worst case reservations will likely never come true.
|
||||
*
|
||||
* RUN_DELAYED_IPUTS
|
||||
* If we're freeing inodes we're likely freeing checksums, file extent
|
||||
* items, and extent tree items. Loads of space could be freed up by these
|
||||
* operations, however they won't be usable until the transaction commits.
|
||||
*
|
||||
* COMMIT_TRANS
|
||||
* may_commit_transaction() is the ultimate arbiter on whether we commit the
|
||||
* transaction or not. In order to avoid constantly churning we do all the
|
||||
* above flushing first and then commit the transaction as the last resort.
|
||||
* However we need to take into account things like pinned space that would
|
||||
* be freed, plus any delayed work we may not have gotten rid of in the case
|
||||
* of metadata.
|
||||
*
|
||||
* OVERCOMMIT
|
||||
*
|
||||
* Because we hold so many reservations for metadata we will allow you to
|
||||
* reserve more space than is currently free in the currently allocate
|
||||
* metadata space. This only happens with metadata, data does not allow
|
||||
* overcommitting.
|
||||
*
|
||||
* You can see the current logic for when we allow overcommit in
|
||||
* btrfs_can_overcommit(), but it only applies to unallocated space. If there
|
||||
* is no unallocated space to be had, all reservations are kept within the
|
||||
* free space in the allocated metadata chunks.
|
||||
*
|
||||
* Because of overcommitting, you generally want to use the
|
||||
* btrfs_can_overcommit() logic for metadata allocations, as it does the right
|
||||
* thing with or without extra unallocated space.
|
||||
*/
|
||||
|
||||
u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
|
||||
bool may_use_included)
|
||||
{
|
||||
@ -159,25 +306,19 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
|
||||
return (global->size << 1);
|
||||
}
|
||||
|
||||
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, u64 bytes,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
u64 profile;
|
||||
u64 avail;
|
||||
u64 used;
|
||||
int factor;
|
||||
|
||||
/* Don't overcommit when in mixed mode. */
|
||||
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
return 0;
|
||||
|
||||
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
profile = btrfs_system_alloc_profile(fs_info);
|
||||
else
|
||||
profile = btrfs_metadata_alloc_profile(fs_info);
|
||||
|
||||
used = btrfs_space_info_used(space_info, true);
|
||||
avail = atomic64_read(&fs_info->free_chunk_space);
|
||||
|
||||
/*
|
||||
@ -198,6 +339,22 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
|
||||
avail >>= 3;
|
||||
else
|
||||
avail >>= 1;
|
||||
return avail;
|
||||
}
|
||||
|
||||
int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, u64 bytes,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
u64 avail;
|
||||
u64 used;
|
||||
|
||||
/* Don't overcommit when in mixed mode */
|
||||
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
return 0;
|
||||
|
||||
used = btrfs_space_info_used(space_info, true);
|
||||
avail = calc_available_free_space(fs_info, space_info, flush);
|
||||
|
||||
if (used + bytes < space_info->total_bytes + avail)
|
||||
return 1;
|
||||
@ -232,6 +389,8 @@ again:
|
||||
space_info,
|
||||
ticket->bytes);
|
||||
list_del_init(&ticket->list);
|
||||
ASSERT(space_info->reclaim_size >= ticket->bytes);
|
||||
space_info->reclaim_size -= ticket->bytes;
|
||||
ticket->bytes = 0;
|
||||
space_info->tickets_id++;
|
||||
wake_up(&ticket->wait);
|
||||
@ -627,15 +786,26 @@ static inline u64
|
||||
btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info)
|
||||
{
|
||||
struct reserve_ticket *ticket;
|
||||
u64 used;
|
||||
u64 avail;
|
||||
u64 expected;
|
||||
u64 to_reclaim = 0;
|
||||
u64 to_reclaim = space_info->reclaim_size;
|
||||
|
||||
lockdep_assert_held(&space_info->lock);
|
||||
|
||||
avail = calc_available_free_space(fs_info, space_info,
|
||||
BTRFS_RESERVE_FLUSH_ALL);
|
||||
used = btrfs_space_info_used(space_info, true);
|
||||
|
||||
/*
|
||||
* We may be flushing because suddenly we have less space than we had
|
||||
* before, and now we're well over-committed based on our current free
|
||||
* space. If that's the case add in our overage so we make sure to put
|
||||
* appropriate pressure on the flushing state machine.
|
||||
*/
|
||||
if (space_info->total_bytes + avail < used)
|
||||
to_reclaim += used - (space_info->total_bytes + avail);
|
||||
|
||||
list_for_each_entry(ticket, &space_info->tickets, list)
|
||||
to_reclaim += ticket->bytes;
|
||||
list_for_each_entry(ticket, &space_info->priority_tickets, list)
|
||||
to_reclaim += ticket->bytes;
|
||||
if (to_reclaim)
|
||||
return to_reclaim;
|
||||
|
||||
@ -1020,8 +1190,10 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||
* the list and we will do our own flushing further down.
|
||||
*/
|
||||
if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
|
||||
ASSERT(space_info->reclaim_size >= 0);
|
||||
ticket.bytes = orig_bytes;
|
||||
ticket.error = 0;
|
||||
space_info->reclaim_size += ticket.bytes;
|
||||
init_waitqueue_head(&ticket.wait);
|
||||
if (flush == BTRFS_RESERVE_FLUSH_ALL) {
|
||||
list_add_tail(&ticket.list, &space_info->tickets);
|
||||
|
@ -54,6 +54,13 @@ struct btrfs_space_info {
|
||||
struct list_head ro_bgs;
|
||||
struct list_head priority_tickets;
|
||||
struct list_head tickets;
|
||||
|
||||
/*
|
||||
* Size of space that needs to be reclaimed in order to satisfy pending
|
||||
* tickets
|
||||
*/
|
||||
u64 reclaim_size;
|
||||
|
||||
/*
|
||||
* tickets_id just indicates the next ticket will be handled, so note
|
||||
* it's not stored per ticket.
|
||||
|
@ -244,7 +244,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
||||
trans->aborted = errno;
|
||||
WRITE_ONCE(trans->aborted, errno);
|
||||
/* Nothing used. The other threads that have joined this
|
||||
* transaction may be able to continue. */
|
||||
if (!trans->dirty && list_empty(&trans->new_bgs)) {
|
||||
@ -873,7 +873,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
break;
|
||||
#endif
|
||||
case Opt_err:
|
||||
btrfs_info(info, "unrecognized mount option '%s'", p);
|
||||
btrfs_err(info, "unrecognized mount option '%s'", p);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
default:
|
||||
@ -1024,11 +1024,11 @@ out:
|
||||
return error;
|
||||
}
|
||||
|
||||
static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
u64 subvol_objectid)
|
||||
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
u64 subvol_objectid)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->tree_root;
|
||||
struct btrfs_root *fs_root;
|
||||
struct btrfs_root *fs_root = NULL;
|
||||
struct btrfs_root_ref *root_ref;
|
||||
struct btrfs_inode_ref *inode_ref;
|
||||
struct btrfs_key key;
|
||||
@ -1096,9 +1096,10 @@ static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
key.objectid = subvol_objectid;
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
fs_root = btrfs_read_fs_root_no_name(fs_info, &key);
|
||||
fs_root = btrfs_get_fs_root(fs_info, &key, true);
|
||||
if (IS_ERR(fs_root)) {
|
||||
ret = PTR_ERR(fs_root);
|
||||
fs_root = NULL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
@ -1143,6 +1144,8 @@ static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
ptr[0] = '/';
|
||||
btrfs_release_path(path);
|
||||
}
|
||||
btrfs_put_root(fs_root);
|
||||
fs_root = NULL;
|
||||
}
|
||||
|
||||
btrfs_free_path(path);
|
||||
@ -1155,6 +1158,7 @@ static char *get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
return name;
|
||||
|
||||
err:
|
||||
btrfs_put_root(fs_root);
|
||||
btrfs_free_path(path);
|
||||
kfree(name);
|
||||
return ERR_PTR(ret);
|
||||
@ -1438,8 +1442,8 @@ static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
subvol_name = get_subvol_name_from_objectid(btrfs_sb(mnt->mnt_sb),
|
||||
subvol_objectid);
|
||||
subvol_name = btrfs_get_subvol_name_from_objectid(
|
||||
btrfs_sb(mnt->mnt_sb), subvol_objectid);
|
||||
if (IS_ERR(subvol_name)) {
|
||||
root = ERR_CAST(subvol_name);
|
||||
subvol_name = NULL;
|
||||
@ -1518,14 +1522,17 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
|
||||
/*
|
||||
* Setup a dummy root and fs_info for test/set super. This is because
|
||||
* we don't actually fill this stuff out until open_ctree, but we need
|
||||
* it for searching for existing supers, so this lets us do that and
|
||||
* then open_ctree will properly initialize everything later.
|
||||
* then open_ctree will properly initialize the file system specific
|
||||
* settings later. btrfs_init_fs_info initializes the static elements
|
||||
* of the fs_info (locks and such) to make cleanup easier if we find a
|
||||
* superblock with our given fs_devices later on at sget() time.
|
||||
*/
|
||||
fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL);
|
||||
if (!fs_info) {
|
||||
error = -ENOMEM;
|
||||
goto error_sec_opts;
|
||||
}
|
||||
btrfs_init_fs_info(fs_info);
|
||||
|
||||
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
|
||||
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
|
||||
@ -1571,7 +1578,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
|
||||
|
||||
if (s->s_root) {
|
||||
btrfs_close_devices(fs_devices);
|
||||
free_fs_info(fs_info);
|
||||
btrfs_free_fs_info(fs_info);
|
||||
if ((flags ^ s->s_flags) & SB_RDONLY)
|
||||
error = -EBUSY;
|
||||
} else {
|
||||
@ -1594,7 +1601,7 @@ static struct dentry *btrfs_mount_root(struct file_system_type *fs_type,
|
||||
error_close_devices:
|
||||
btrfs_close_devices(fs_devices);
|
||||
error_fs_info:
|
||||
free_fs_info(fs_info);
|
||||
btrfs_free_fs_info(fs_info);
|
||||
error_sec_opts:
|
||||
security_free_mnt_opts(&new_sec_opts);
|
||||
return ERR_PTR(error);
|
||||
@ -2170,7 +2177,7 @@ static void btrfs_kill_super(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
kill_anon_super(sb);
|
||||
free_fs_info(fs_info);
|
||||
btrfs_free_fs_info(fs_info);
|
||||
}
|
||||
|
||||
static struct file_system_type btrfs_fs_type = {
|
||||
@ -2203,7 +2210,7 @@ static int btrfs_control_open(struct inode *inode, struct file *file)
|
||||
}
|
||||
|
||||
/*
|
||||
* used by btrfsctl to scan devices when no FS is mounted
|
||||
* Used by /dev/btrfs-control for devices ioctls.
|
||||
*/
|
||||
static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||
unsigned long arg)
|
||||
|
@ -155,7 +155,7 @@ static ssize_t btrfs_feature_attr_show(struct kobject *kobj,
|
||||
} else
|
||||
val = can_modify_feature(fa);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
|
||||
static ssize_t btrfs_feature_attr_store(struct kobject *kobj,
|
||||
@ -295,7 +295,7 @@ static const struct attribute_group btrfs_feature_attr_group = {
|
||||
static ssize_t rmdir_subvol_show(struct kobject *kobj,
|
||||
struct kobj_attribute *ka, char *buf)
|
||||
{
|
||||
return snprintf(buf, PAGE_SIZE, "0\n");
|
||||
return scnprintf(buf, PAGE_SIZE, "0\n");
|
||||
}
|
||||
BTRFS_ATTR(static_feature, rmdir_subvol, rmdir_subvol_show);
|
||||
|
||||
@ -310,12 +310,12 @@ static ssize_t supported_checksums_show(struct kobject *kobj,
|
||||
* This "trick" only works as long as 'enum btrfs_csum_type' has
|
||||
* no holes in it
|
||||
*/
|
||||
ret += snprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "%s%s",
|
||||
(i == 0 ? "" : " "), btrfs_super_csum_name(i));
|
||||
|
||||
}
|
||||
|
||||
ret += snprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
||||
ret += scnprintf(buf + ret, PAGE_SIZE - ret, "\n");
|
||||
return ret;
|
||||
}
|
||||
BTRFS_ATTR(static_feature, supported_checksums, supported_checksums_show);
|
||||
@ -350,7 +350,7 @@ static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
atomic64_read(&fs_info->discard_ctl.discardable_bytes));
|
||||
}
|
||||
BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show);
|
||||
@ -361,7 +361,7 @@ static ssize_t btrfs_discardable_extents_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n",
|
||||
atomic_read(&fs_info->discard_ctl.discardable_extents));
|
||||
}
|
||||
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);
|
||||
@ -372,7 +372,7 @@ static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
fs_info->discard_ctl.discard_bitmap_bytes);
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
|
||||
@ -383,7 +383,7 @@ static ssize_t btrfs_discard_bytes_saved_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
atomic64_read(&fs_info->discard_ctl.discard_bytes_saved));
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show);
|
||||
@ -394,7 +394,7 @@ static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
fs_info->discard_ctl.discard_extent_bytes);
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
|
||||
@ -405,7 +405,7 @@ static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.iops_limit));
|
||||
}
|
||||
|
||||
@ -435,7 +435,7 @@ static ssize_t btrfs_discard_kbps_limit_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.kbps_limit));
|
||||
}
|
||||
|
||||
@ -465,7 +465,7 @@ static ssize_t btrfs_discard_max_discard_size_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
READ_ONCE(fs_info->discard_ctl.max_discard_size));
|
||||
}
|
||||
|
||||
@ -530,7 +530,7 @@ static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
|
||||
val = *value_ptr;
|
||||
if (lock)
|
||||
spin_unlock(lock);
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||
}
|
||||
|
||||
static ssize_t global_rsv_size_show(struct kobject *kobj,
|
||||
@ -576,7 +576,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj,
|
||||
val += block_group->used;
|
||||
}
|
||||
up_read(&sinfo->groups_sem);
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
|
||||
}
|
||||
|
||||
static struct attribute *raid_attrs[] = {
|
||||
@ -613,7 +613,7 @@ static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_space_info *sinfo = to_space_info(kobj);
|
||||
s64 val = percpu_counter_sum(&sinfo->total_bytes_pinned);
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%lld\n", val);
|
||||
}
|
||||
|
||||
SPACE_INFO_ATTR(flags);
|
||||
@ -670,7 +670,7 @@ static ssize_t btrfs_label_show(struct kobject *kobj,
|
||||
ssize_t ret;
|
||||
|
||||
spin_lock(&fs_info->super_lock);
|
||||
ret = snprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
|
||||
ret = scnprintf(buf, PAGE_SIZE, label[0] ? "%s\n" : "%s", label);
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
|
||||
return ret;
|
||||
@ -718,7 +718,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
|
||||
@ -728,8 +728,8 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||
fs_info->super_copy->sectorsize);
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n",
|
||||
fs_info->super_copy->sectorsize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
|
||||
@ -739,8 +739,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||
fs_info->super_copy->sectorsize);
|
||||
return scnprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->sectorsize);
|
||||
}
|
||||
|
||||
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);
|
||||
@ -752,7 +751,7 @@ static ssize_t quota_override_show(struct kobject *kobj,
|
||||
int quota_override;
|
||||
|
||||
quota_override = test_bit(BTRFS_FS_QUOTA_OVERRIDE, &fs_info->flags);
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", quota_override);
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", quota_override);
|
||||
}
|
||||
|
||||
static ssize_t quota_override_store(struct kobject *kobj,
|
||||
@ -790,7 +789,7 @@ static ssize_t btrfs_metadata_uuid_show(struct kobject *kobj,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%pU\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%pU\n",
|
||||
fs_info->fs_devices->metadata_uuid);
|
||||
}
|
||||
|
||||
@ -802,7 +801,7 @@ static ssize_t btrfs_checksum_show(struct kobject *kobj,
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
u16 csum_type = btrfs_super_csum_type(fs_info->super_copy);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%s (%s)\n",
|
||||
return scnprintf(buf, PAGE_SIZE, "%s (%s)\n",
|
||||
btrfs_super_csum_name(csum_type),
|
||||
crypto_shash_driver_name(fs_info->csum_shash));
|
||||
}
|
||||
@ -960,7 +959,7 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
|
||||
addrm_unknown_feature_attrs(fs_info, false);
|
||||
sysfs_remove_group(&fs_info->fs_devices->fsid_kobj, &btrfs_feature_attr_group);
|
||||
sysfs_remove_files(&fs_info->fs_devices->fsid_kobj, btrfs_attrs);
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, NULL);
|
||||
btrfs_sysfs_remove_devices_dir(fs_info->fs_devices, NULL);
|
||||
}
|
||||
|
||||
static const char * const btrfs_feature_set_names[FEAT_MAX] = {
|
||||
@ -992,7 +991,7 @@ char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags)
|
||||
continue;
|
||||
|
||||
name = btrfs_feature_attrs[set][i].kobj_attr.attr.name;
|
||||
len += snprintf(str + len, bufsize - len, "%s%s",
|
||||
len += scnprintf(str + len, bufsize - len, "%s%s",
|
||||
len ? "," : "", name);
|
||||
}
|
||||
|
||||
@ -1149,7 +1148,7 @@ int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
|
||||
|
||||
/* when one_device is NULL, it removes all device links */
|
||||
|
||||
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device)
|
||||
{
|
||||
struct hd_struct *disk;
|
||||
@ -1201,11 +1200,11 @@ static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show);
|
||||
|
||||
static ssize_t btrfs_sysfs_missing_show(struct kobject *kobj,
|
||||
static ssize_t btrfs_devinfo_missing_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
{
|
||||
int val;
|
||||
@ -1214,9 +1213,9 @@ static ssize_t btrfs_sysfs_missing_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, missing, btrfs_sysfs_missing_show);
|
||||
BTRFS_ATTR(devid, missing, btrfs_devinfo_missing_show);
|
||||
|
||||
static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
@ -1228,7 +1227,7 @@ static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
|
||||
|
||||
@ -1241,7 +1240,7 @@ static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
return scnprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
|
||||
|
||||
@ -1269,7 +1268,7 @@ static struct kobj_type devid_ktype = {
|
||||
.release = btrfs_release_devid_kobj,
|
||||
};
|
||||
|
||||
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device)
|
||||
{
|
||||
int error = 0;
|
||||
@ -1371,7 +1370,7 @@ int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs)
|
||||
if (!fs_devs->devices_kobj) {
|
||||
btrfs_err(fs_devs->fs_info,
|
||||
"failed to init sysfs device interface");
|
||||
kobject_put(&fs_devs->fsid_kobj);
|
||||
btrfs_sysfs_remove_fsid(fs_devs);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -1395,13 +1394,13 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
|
||||
|
||||
btrfs_set_fs_info_ptr(fs_info);
|
||||
|
||||
error = btrfs_sysfs_add_device_link(fs_devs, NULL);
|
||||
error = btrfs_sysfs_add_devices_dir(fs_devs, NULL);
|
||||
if (error)
|
||||
return error;
|
||||
|
||||
error = sysfs_create_files(fsid_kobj, btrfs_attrs);
|
||||
if (error) {
|
||||
btrfs_sysfs_rm_device_link(fs_devs, NULL);
|
||||
btrfs_sysfs_remove_devices_dir(fs_devs, NULL);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -14,9 +14,9 @@ enum btrfs_feature_set {
|
||||
|
||||
char *btrfs_printable_features(enum btrfs_feature_set set, u64 flags);
|
||||
const char * const btrfs_feature_set_name(enum btrfs_feature_set set);
|
||||
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_add_devices_dir(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device);
|
||||
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
int btrfs_sysfs_remove_devices_dir(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device);
|
||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs);
|
||||
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
|
||||
|
@ -120,6 +120,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
||||
kfree(fs_info);
|
||||
return NULL;
|
||||
}
|
||||
INIT_LIST_HEAD(&fs_info->fs_devices->devices);
|
||||
|
||||
fs_info->super_copy = kzalloc(sizeof(struct btrfs_super_block),
|
||||
GFP_KERNEL);
|
||||
if (!fs_info->super_copy) {
|
||||
@ -128,39 +130,10 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
btrfs_init_fs_info(fs_info);
|
||||
|
||||
fs_info->nodesize = nodesize;
|
||||
fs_info->sectorsize = sectorsize;
|
||||
|
||||
if (init_srcu_struct(&fs_info->subvol_srcu)) {
|
||||
kfree(fs_info->fs_devices);
|
||||
kfree(fs_info->super_copy);
|
||||
kfree(fs_info);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
spin_lock_init(&fs_info->buffer_lock);
|
||||
spin_lock_init(&fs_info->qgroup_lock);
|
||||
spin_lock_init(&fs_info->super_lock);
|
||||
spin_lock_init(&fs_info->fs_roots_radix_lock);
|
||||
mutex_init(&fs_info->qgroup_ioctl_lock);
|
||||
mutex_init(&fs_info->qgroup_rescan_lock);
|
||||
rwlock_init(&fs_info->tree_mod_log_lock);
|
||||
fs_info->running_transaction = NULL;
|
||||
fs_info->qgroup_tree = RB_ROOT;
|
||||
fs_info->qgroup_ulist = NULL;
|
||||
atomic64_set(&fs_info->tree_mod_seq, 0);
|
||||
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
|
||||
INIT_LIST_HEAD(&fs_info->dead_roots);
|
||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||
INIT_LIST_HEAD(&fs_info->fs_devices->devices);
|
||||
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
|
||||
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
|
||||
extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
|
||||
IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
|
||||
extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
|
||||
IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
|
||||
extent_map_tree_init(&fs_info->mapping_tree);
|
||||
fs_info->pinned_extents = &fs_info->freed_extents[0];
|
||||
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
||||
|
||||
test_mnt->mnt_sb->s_fs_info = fs_info;
|
||||
@ -210,8 +183,9 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
btrfs_free_qgroup_config(fs_info);
|
||||
btrfs_free_fs_roots(fs_info);
|
||||
cleanup_srcu_struct(&fs_info->subvol_srcu);
|
||||
kfree(fs_info->super_copy);
|
||||
btrfs_check_leaked_roots(fs_info);
|
||||
btrfs_extent_buffer_leak_debug_check(fs_info);
|
||||
kfree(fs_info->fs_devices);
|
||||
kfree(fs_info);
|
||||
}
|
||||
@ -223,11 +197,7 @@ void btrfs_free_dummy_root(struct btrfs_root *root)
|
||||
/* Will be freed by btrfs_free_fs_roots */
|
||||
if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
|
||||
return;
|
||||
if (root->node) {
|
||||
/* One for allocate_extent_buffer */
|
||||
free_extent_buffer(root->node);
|
||||
}
|
||||
kfree(root);
|
||||
btrfs_put_root(root);
|
||||
}
|
||||
|
||||
struct btrfs_block_group *
|
||||
|
@ -507,6 +507,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
|
||||
test_err("couldn't insert fs root %d", ret);
|
||||
goto out;
|
||||
}
|
||||
btrfs_put_root(tmp_root);
|
||||
|
||||
tmp_root = btrfs_alloc_dummy_root(fs_info);
|
||||
if (IS_ERR(tmp_root)) {
|
||||
@ -521,6 +522,7 @@ int btrfs_test_qgroups(u32 sectorsize, u32 nodesize)
|
||||
test_err("couldn't insert fs root %d", ret);
|
||||
goto out;
|
||||
}
|
||||
btrfs_put_root(tmp_root);
|
||||
|
||||
test_msg("running qgroup tests");
|
||||
ret = test_no_shared_qgroup(root, sectorsize, nodesize);
|
||||
|
@ -221,7 +221,7 @@ void btrfs_trans_release_chunk_metadata(struct btrfs_trans_handle *trans)
|
||||
WARN_ON_ONCE(!list_empty(&trans->new_bgs));
|
||||
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->chunk_block_rsv,
|
||||
trans->chunk_bytes_reserved);
|
||||
trans->chunk_bytes_reserved, NULL);
|
||||
trans->chunk_bytes_reserved = 0;
|
||||
}
|
||||
|
||||
@ -243,7 +243,7 @@ loop:
|
||||
|
||||
cur_trans = fs_info->running_transaction;
|
||||
if (cur_trans) {
|
||||
if (cur_trans->aborted) {
|
||||
if (TRANS_ABORTED(cur_trans)) {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
return cur_trans->aborted;
|
||||
}
|
||||
@ -336,6 +336,8 @@ loop:
|
||||
list_add_tail(&cur_trans->list, &fs_info->trans_list);
|
||||
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
|
||||
IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
|
||||
extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
|
||||
IO_TREE_FS_PINNED_EXTENTS, NULL);
|
||||
fs_info->generation++;
|
||||
cur_trans->transid = fs_info->generation;
|
||||
fs_info->running_transaction = cur_trans;
|
||||
@ -459,7 +461,7 @@ static inline int is_transaction_blocked(struct btrfs_transaction *trans)
|
||||
{
|
||||
return (trans->state >= TRANS_STATE_COMMIT_START &&
|
||||
trans->state < TRANS_STATE_UNBLOCKED &&
|
||||
!trans->aborted);
|
||||
!TRANS_ABORTED(trans));
|
||||
}
|
||||
|
||||
/* wait for commit against the current transaction to become unblocked
|
||||
@ -478,7 +480,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
|
||||
|
||||
wait_event(fs_info->transaction_wait,
|
||||
cur_trans->state >= TRANS_STATE_UNBLOCKED ||
|
||||
cur_trans->aborted);
|
||||
TRANS_ABORTED(cur_trans));
|
||||
btrfs_put_transaction(cur_trans);
|
||||
} else {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
@ -673,7 +675,7 @@ join_fail:
|
||||
alloc_fail:
|
||||
if (num_bytes)
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
|
||||
num_bytes);
|
||||
num_bytes, NULL);
|
||||
reserve_fail:
|
||||
btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
|
||||
return ERR_PTR(ret);
|
||||
@ -896,7 +898,7 @@ static void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans)
|
||||
trace_btrfs_space_reservation(fs_info, "transaction",
|
||||
trans->transid, trans->bytes_reserved, 0);
|
||||
btrfs_block_rsv_release(fs_info, trans->block_rsv,
|
||||
trans->bytes_reserved);
|
||||
trans->bytes_reserved, NULL);
|
||||
trans->bytes_reserved = 0;
|
||||
}
|
||||
|
||||
@ -937,7 +939,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
if (throttle)
|
||||
btrfs_run_delayed_iputs(info);
|
||||
|
||||
if (trans->aborted ||
|
||||
if (TRANS_ABORTED(trans) ||
|
||||
test_bit(BTRFS_FS_STATE_ERROR, &info->fs_state)) {
|
||||
wake_up_process(info->transaction_kthread);
|
||||
err = -EIO;
|
||||
@ -1262,8 +1264,10 @@ void btrfs_add_dead_root(struct btrfs_root *root)
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (list_empty(&root->root_list))
|
||||
if (list_empty(&root->root_list)) {
|
||||
btrfs_grab_root(root);
|
||||
list_add_tail(&root->root_list, &fs_info->dead_roots);
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
}
|
||||
|
||||
@ -1477,7 +1481,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
u64 index = 0;
|
||||
u64 objectid;
|
||||
u64 root_flags;
|
||||
uuid_le new_uuid;
|
||||
|
||||
ASSERT(pending->path);
|
||||
path = pending->path;
|
||||
@ -1570,8 +1573,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_set_root_generation_v2(new_root_item,
|
||||
trans->transid);
|
||||
uuid_le_gen(&new_uuid);
|
||||
memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE);
|
||||
generate_random_guid(new_root_item->uuid);
|
||||
memcpy(new_root_item->parent_uuid, root->root_item.uuid,
|
||||
BTRFS_UUID_SIZE);
|
||||
if (!(root_flags & BTRFS_ROOT_SUBVOL_RDONLY)) {
|
||||
@ -1633,7 +1635,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
key.offset = (u64)-1;
|
||||
pending->snap = btrfs_read_fs_root_no_name(fs_info, &key);
|
||||
pending->snap = btrfs_get_fs_root(fs_info, &key, true);
|
||||
if (IS_ERR(pending->snap)) {
|
||||
ret = PTR_ERR(pending->snap);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@ -1682,7 +1684,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto fail;
|
||||
}
|
||||
ret = btrfs_uuid_tree_add(trans, new_uuid.b, BTRFS_UUID_KEY_SUBVOL,
|
||||
ret = btrfs_uuid_tree_add(trans, new_root_item->uuid,
|
||||
BTRFS_UUID_KEY_SUBVOL,
|
||||
objectid);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
@ -1794,7 +1797,8 @@ static void wait_current_trans_commit_start(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_transaction *trans)
|
||||
{
|
||||
wait_event(fs_info->transaction_blocked_wait,
|
||||
trans->state >= TRANS_STATE_COMMIT_START || trans->aborted);
|
||||
trans->state >= TRANS_STATE_COMMIT_START ||
|
||||
TRANS_ABORTED(trans));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1806,7 +1810,8 @@ static void wait_current_trans_commit_start_and_unblock(
|
||||
struct btrfs_transaction *trans)
|
||||
{
|
||||
wait_event(fs_info->transaction_wait,
|
||||
trans->state >= TRANS_STATE_UNBLOCKED || trans->aborted);
|
||||
trans->state >= TRANS_STATE_UNBLOCKED ||
|
||||
TRANS_ABORTED(trans));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2026,7 +2031,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
trans->dirty = true;
|
||||
|
||||
/* Stop the commit early if ->aborted is set */
|
||||
if (unlikely(READ_ONCE(cur_trans->aborted))) {
|
||||
if (TRANS_ABORTED(cur_trans)) {
|
||||
ret = cur_trans->aborted;
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
@ -2100,7 +2105,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
wait_for_commit(cur_trans);
|
||||
|
||||
if (unlikely(cur_trans->aborted))
|
||||
if (TRANS_ABORTED(cur_trans))
|
||||
ret = cur_trans->aborted;
|
||||
|
||||
btrfs_put_transaction(cur_trans);
|
||||
@ -2119,7 +2124,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
wait_for_commit(prev_trans);
|
||||
ret = prev_trans->aborted;
|
||||
ret = READ_ONCE(prev_trans->aborted);
|
||||
|
||||
btrfs_put_transaction(prev_trans);
|
||||
if (ret)
|
||||
@ -2173,8 +2178,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
wait_event(cur_trans->writer_wait,
|
||||
atomic_read(&cur_trans->num_writers) == 1);
|
||||
|
||||
/* ->aborted might be set after the previous check, so check it */
|
||||
if (unlikely(READ_ONCE(cur_trans->aborted))) {
|
||||
if (TRANS_ABORTED(cur_trans)) {
|
||||
ret = cur_trans->aborted;
|
||||
goto scrub_continue;
|
||||
}
|
||||
@ -2191,10 +2195,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
* core function of the snapshot creation.
|
||||
*/
|
||||
ret = create_pending_snapshots(trans);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
if (ret)
|
||||
goto unlock_reloc;
|
||||
|
||||
/*
|
||||
* We insert the dir indexes of the snapshots and update the inode
|
||||
@ -2207,16 +2209,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
* the nodes and leaves.
|
||||
*/
|
||||
ret = btrfs_run_delayed_items(trans);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
if (ret)
|
||||
goto unlock_reloc;
|
||||
|
||||
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
if (ret)
|
||||
goto unlock_reloc;
|
||||
|
||||
/*
|
||||
* make sure none of the code above managed to slip in a
|
||||
@ -2242,11 +2240,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
mutex_lock(&fs_info->tree_log_mutex);
|
||||
|
||||
ret = commit_fs_roots(trans);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
if (ret)
|
||||
goto unlock_tree_log;
|
||||
|
||||
/*
|
||||
* Since the transaction is done, we can apply the pending changes
|
||||
@ -2264,39 +2259,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
* new delayed refs. Must handle them or qgroup can be wrong.
|
||||
*/
|
||||
ret = btrfs_run_delayed_refs(trans, (unsigned long)-1);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
if (ret)
|
||||
goto unlock_tree_log;
|
||||
|
||||
/*
|
||||
* Since fs roots are all committed, we can get a quite accurate
|
||||
* new_roots. So let's do quota accounting.
|
||||
*/
|
||||
ret = btrfs_qgroup_account_extents(trans);
|
||||
if (ret < 0) {
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto unlock_tree_log;
|
||||
|
||||
ret = commit_cowonly_roots(trans);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
if (ret)
|
||||
goto unlock_tree_log;
|
||||
|
||||
/*
|
||||
* The tasks which save the space cache and inode cache may also
|
||||
* update ->aborted, check it.
|
||||
*/
|
||||
if (unlikely(READ_ONCE(cur_trans->aborted))) {
|
||||
if (TRANS_ABORTED(cur_trans)) {
|
||||
ret = cur_trans->aborted;
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
goto scrub_continue;
|
||||
goto unlock_tree_log;
|
||||
}
|
||||
|
||||
btrfs_prepare_extent_commit(fs_info);
|
||||
@ -2343,6 +2327,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
if (ret) {
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
"Error while writing out transaction");
|
||||
/*
|
||||
* reloc_mutex has been unlocked, tree_log_mutex is still held
|
||||
* but we can't jump to unlock_tree_log causing double unlock
|
||||
*/
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
goto scrub_continue;
|
||||
}
|
||||
@ -2391,6 +2379,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
return ret;
|
||||
|
||||
unlock_tree_log:
|
||||
mutex_unlock(&fs_info->tree_log_mutex);
|
||||
unlock_reloc:
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
scrub_continue:
|
||||
btrfs_scrub_continue(fs_info);
|
||||
cleanup_transaction:
|
||||
@ -2434,13 +2426,18 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
|
||||
btrfs_debug(fs_info, "cleaner removing %llu", root->root_key.objectid);
|
||||
|
||||
btrfs_kill_all_delayed_nodes(root);
|
||||
if (root->ino_cache_inode) {
|
||||
iput(root->ino_cache_inode);
|
||||
root->ino_cache_inode = NULL;
|
||||
}
|
||||
|
||||
if (btrfs_header_backref_rev(root->node) <
|
||||
BTRFS_MIXED_BACKREF_REV)
|
||||
ret = btrfs_drop_snapshot(root, NULL, 0, 0);
|
||||
ret = btrfs_drop_snapshot(root, 0, 0);
|
||||
else
|
||||
ret = btrfs_drop_snapshot(root, NULL, 1, 0);
|
||||
ret = btrfs_drop_snapshot(root, 1, 0);
|
||||
|
||||
btrfs_put_root(root);
|
||||
return (ret < 0) ? 0 : 1;
|
||||
}
|
||||
|
||||
|
@ -71,6 +71,7 @@ struct btrfs_transaction {
|
||||
*/
|
||||
struct list_head io_bgs;
|
||||
struct list_head dropped_roots;
|
||||
struct extent_io_tree pinned_extents;
|
||||
|
||||
/*
|
||||
* we need to make sure block group deletion doesn't race with
|
||||
@ -115,6 +116,10 @@ struct btrfs_trans_handle {
|
||||
struct btrfs_block_rsv *orig_rsv;
|
||||
refcount_t use_count;
|
||||
unsigned int type;
|
||||
/*
|
||||
* Error code of transaction abort, set outside of locks and must use
|
||||
* the READ_ONCE/WRITE_ONCE access
|
||||
*/
|
||||
short aborted;
|
||||
bool adding_csums;
|
||||
bool allocating_chunk;
|
||||
@ -126,6 +131,14 @@ struct btrfs_trans_handle {
|
||||
struct list_head new_bgs;
|
||||
};
|
||||
|
||||
/*
|
||||
* The abort status can be changed between calls and is not protected by locks.
|
||||
* This accepts btrfs_transaction and btrfs_trans_handle as types. Once it's
|
||||
* set to a non-zero value it does not change, so the macro should be in checks
|
||||
* but is not necessary for further reads of the value.
|
||||
*/
|
||||
#define TRANS_ABORTED(trans) (unlikely(READ_ONCE((trans)->aborted)))
|
||||
|
||||
struct btrfs_pending_snapshot {
|
||||
struct dentry *dentry;
|
||||
struct inode *dir;
|
||||
|
@ -18,6 +18,8 @@
|
||||
#include "compression.h"
|
||||
#include "qgroup.h"
|
||||
#include "inode-map.h"
|
||||
#include "block-group.h"
|
||||
#include "space-info.h"
|
||||
|
||||
/* magic values for the inode_only field in btrfs_log_inode:
|
||||
*
|
||||
@ -94,8 +96,8 @@ enum {
|
||||
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct btrfs_inode *inode,
|
||||
int inode_only,
|
||||
const loff_t start,
|
||||
const loff_t end,
|
||||
u64 start,
|
||||
u64 end,
|
||||
struct btrfs_log_ctx *ctx);
|
||||
static int link_to_fixup_dir(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
@ -311,7 +313,7 @@ static int process_one_buffer(struct btrfs_root *log,
|
||||
}
|
||||
|
||||
if (wc->pin)
|
||||
ret = btrfs_pin_extent_for_log_replay(fs_info, eb->start,
|
||||
ret = btrfs_pin_extent_for_log_replay(wc->trans, eb->start,
|
||||
eb->len);
|
||||
|
||||
if (!ret && btrfs_buffer_uptodate(eb, gen, 0)) {
|
||||
@ -830,6 +832,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_inode_set_file_extent_range(BTRFS_I(inode), start,
|
||||
extent_end - start);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
inode_add_bytes(inode, nbytes);
|
||||
update_inode:
|
||||
ret = btrfs_update_inode(trans, root, inode);
|
||||
@ -2659,18 +2666,39 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Correctly adjust the reserved bytes occupied by a log tree extent buffer
|
||||
*/
|
||||
static void unaccount_log_buffer(struct btrfs_fs_info *fs_info, u64 start)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, start);
|
||||
if (!cache) {
|
||||
btrfs_err(fs_info, "unable to find block group for %llu", start);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock(&cache->space_info->lock);
|
||||
spin_lock(&cache->lock);
|
||||
cache->reserved -= fs_info->nodesize;
|
||||
cache->space_info->bytes_reserved -= fs_info->nodesize;
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
}
|
||||
|
||||
static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, int *level,
|
||||
struct walk_control *wc)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
u64 root_owner;
|
||||
u64 bytenr;
|
||||
u64 ptr_gen;
|
||||
struct extent_buffer *next;
|
||||
struct extent_buffer *cur;
|
||||
struct extent_buffer *parent;
|
||||
u32 blocksize;
|
||||
int ret = 0;
|
||||
|
||||
@ -2690,9 +2718,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]);
|
||||
blocksize = fs_info->nodesize;
|
||||
|
||||
parent = path->nodes[*level];
|
||||
root_owner = btrfs_header_owner(parent);
|
||||
|
||||
next = btrfs_find_create_tree_block(fs_info, bytenr);
|
||||
if (IS_ERR(next))
|
||||
return PTR_ERR(next);
|
||||
@ -2720,18 +2745,16 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
btrfs_clean_tree_block(next);
|
||||
btrfs_wait_tree_block_writeback(next);
|
||||
btrfs_tree_unlock(next);
|
||||
ret = btrfs_pin_reserved_extent(trans,
|
||||
bytenr, blocksize);
|
||||
if (ret) {
|
||||
free_extent_buffer(next);
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
|
||||
clear_extent_buffer_dirty(next);
|
||||
}
|
||||
|
||||
WARN_ON(root_owner !=
|
||||
BTRFS_TREE_LOG_OBJECTID);
|
||||
ret = btrfs_pin_reserved_extent(fs_info,
|
||||
bytenr, blocksize);
|
||||
if (ret) {
|
||||
free_extent_buffer(next);
|
||||
return ret;
|
||||
unaccount_log_buffer(fs_info, bytenr);
|
||||
}
|
||||
}
|
||||
free_extent_buffer(next);
|
||||
@ -2762,7 +2785,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
|
||||
struct walk_control *wc)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
u64 root_owner;
|
||||
int i;
|
||||
int slot;
|
||||
int ret;
|
||||
@ -2775,13 +2797,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
|
||||
WARN_ON(*level == 0);
|
||||
return 0;
|
||||
} else {
|
||||
struct extent_buffer *parent;
|
||||
if (path->nodes[*level] == root->node)
|
||||
parent = path->nodes[*level];
|
||||
else
|
||||
parent = path->nodes[*level + 1];
|
||||
|
||||
root_owner = btrfs_header_owner(parent);
|
||||
ret = wc->process_func(root, path->nodes[*level], wc,
|
||||
btrfs_header_generation(path->nodes[*level]),
|
||||
*level);
|
||||
@ -2799,17 +2814,18 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
|
||||
btrfs_clean_tree_block(next);
|
||||
btrfs_wait_tree_block_writeback(next);
|
||||
btrfs_tree_unlock(next);
|
||||
ret = btrfs_pin_reserved_extent(trans,
|
||||
path->nodes[*level]->start,
|
||||
path->nodes[*level]->len);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
|
||||
clear_extent_buffer_dirty(next);
|
||||
}
|
||||
|
||||
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
|
||||
ret = btrfs_pin_reserved_extent(fs_info,
|
||||
path->nodes[*level]->start,
|
||||
path->nodes[*level]->len);
|
||||
if (ret)
|
||||
return ret;
|
||||
unaccount_log_buffer(fs_info,
|
||||
path->nodes[*level]->start);
|
||||
}
|
||||
}
|
||||
free_extent_buffer(path->nodes[*level]);
|
||||
path->nodes[*level] = NULL;
|
||||
@ -2880,15 +2896,15 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
|
||||
btrfs_clean_tree_block(next);
|
||||
btrfs_wait_tree_block_writeback(next);
|
||||
btrfs_tree_unlock(next);
|
||||
ret = btrfs_pin_reserved_extent(trans,
|
||||
next->start, next->len);
|
||||
if (ret)
|
||||
goto out;
|
||||
} else {
|
||||
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &next->bflags))
|
||||
clear_extent_buffer_dirty(next);
|
||||
unaccount_log_buffer(fs_info, next->start);
|
||||
}
|
||||
|
||||
ret = btrfs_pin_reserved_extent(fs_info, next->start,
|
||||
next->len);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3283,8 +3299,7 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
|
||||
|
||||
clear_extent_bits(&log->dirty_log_pages, 0, (u64)-1,
|
||||
EXTENT_DIRTY | EXTENT_NEW | EXTENT_NEED_WAIT);
|
||||
free_extent_buffer(log->node);
|
||||
kfree(log);
|
||||
btrfs_put_root(log);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4518,13 +4533,15 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
||||
static int btrfs_log_holes(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_inode *inode,
|
||||
struct btrfs_path *path)
|
||||
struct btrfs_path *path,
|
||||
const u64 start,
|
||||
const u64 end)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
const u64 i_size = i_size_read(&inode->vfs_inode);
|
||||
u64 prev_extent_end = 0;
|
||||
u64 prev_extent_end = start;
|
||||
int ret;
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
|
||||
@ -4532,16 +4549,21 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
|
||||
|
||||
key.objectid = ino;
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = 0;
|
||||
key.offset = start;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret > 0 && path->slots[0] > 0) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
|
||||
if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
while (true) {
|
||||
struct btrfs_file_extent_item *extent;
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
u64 len;
|
||||
u64 extent_end;
|
||||
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
@ -4558,9 +4580,18 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
|
||||
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
break;
|
||||
|
||||
extent_end = btrfs_file_extent_end(path);
|
||||
if (extent_end <= start)
|
||||
goto next_slot;
|
||||
|
||||
/* We have a hole, log it. */
|
||||
if (prev_extent_end < key.offset) {
|
||||
const u64 hole_len = key.offset - prev_extent_end;
|
||||
u64 hole_len;
|
||||
|
||||
if (key.offset >= end)
|
||||
hole_len = end - prev_extent_end;
|
||||
else
|
||||
hole_len = key.offset - prev_extent_end;
|
||||
|
||||
/*
|
||||
* Release the path to avoid deadlocks with other code
|
||||
@ -4590,27 +4621,20 @@ static int btrfs_log_holes(struct btrfs_trans_handle *trans,
|
||||
leaf = path->nodes[0];
|
||||
}
|
||||
|
||||
extent = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
if (btrfs_file_extent_type(leaf, extent) ==
|
||||
BTRFS_FILE_EXTENT_INLINE) {
|
||||
len = btrfs_file_extent_ram_bytes(leaf, extent);
|
||||
prev_extent_end = ALIGN(key.offset + len,
|
||||
fs_info->sectorsize);
|
||||
} else {
|
||||
len = btrfs_file_extent_num_bytes(leaf, extent);
|
||||
prev_extent_end = key.offset + len;
|
||||
}
|
||||
|
||||
prev_extent_end = min(extent_end, end);
|
||||
if (extent_end >= end)
|
||||
break;
|
||||
next_slot:
|
||||
path->slots[0]++;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (prev_extent_end < i_size) {
|
||||
if (prev_extent_end < end && prev_extent_end < i_size) {
|
||||
u64 hole_len;
|
||||
|
||||
btrfs_release_path(path);
|
||||
hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
|
||||
hole_len = min(ALIGN(i_size, fs_info->sectorsize), end);
|
||||
hole_len -= prev_extent_end;
|
||||
ret = btrfs_insert_file_extent(trans, root->log_root,
|
||||
ino, prev_extent_end, 0, 0,
|
||||
hole_len, 0, hole_len,
|
||||
@ -4938,6 +4962,178 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int copy_inode_items_to_log(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode,
|
||||
struct btrfs_key *min_key,
|
||||
const struct btrfs_key *max_key,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_path *dst_path,
|
||||
const u64 logged_isize,
|
||||
const bool recursive_logging,
|
||||
const int inode_only,
|
||||
const u64 start,
|
||||
const u64 end,
|
||||
struct btrfs_log_ctx *ctx,
|
||||
bool *need_log_inode_item)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
int ins_start_slot = 0;
|
||||
int ins_nr = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We must make sure we don't copy extent items that are entirely out of
|
||||
* the range [start, end - 1]. This is not just an optimization to avoid
|
||||
* copying but also needed to avoid a corruption where we end up with
|
||||
* file extent items in the log tree that have overlapping ranges - this
|
||||
* can happen if we race with ordered extent completion for ranges that
|
||||
* are outside our target range. For example we copy an extent item and
|
||||
* when we move to the next leaf, that extent was trimmed and a new one
|
||||
* covering a subrange of it, but with a higher key, was inserted - we
|
||||
* would then copy this other extent too, resulting in a log tree with
|
||||
* 2 extent items that represent overlapping ranges.
|
||||
*
|
||||
* We can copy the entire extents at the range bondaries however, even
|
||||
* if they cover an area outside the target range. That's ok.
|
||||
*/
|
||||
while (1) {
|
||||
ret = btrfs_search_forward(root, min_key, path, trans->transid);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
again:
|
||||
/* Note, ins_nr might be > 0 here, cleanup outside the loop */
|
||||
if (min_key->objectid != max_key->objectid)
|
||||
break;
|
||||
if (min_key->type > max_key->type)
|
||||
break;
|
||||
|
||||
if (min_key->type == BTRFS_INODE_ITEM_KEY)
|
||||
*need_log_inode_item = false;
|
||||
|
||||
if ((min_key->type == BTRFS_INODE_REF_KEY ||
|
||||
min_key->type == BTRFS_INODE_EXTREF_KEY) &&
|
||||
inode->generation == trans->transid &&
|
||||
!recursive_logging) {
|
||||
u64 other_ino = 0;
|
||||
u64 other_parent = 0;
|
||||
|
||||
ret = btrfs_check_ref_name_override(path->nodes[0],
|
||||
path->slots[0], min_key, inode,
|
||||
&other_ino, &other_parent);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret > 0 && ctx &&
|
||||
other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
|
||||
if (ins_nr > 0) {
|
||||
ins_nr++;
|
||||
} else {
|
||||
ins_nr = 1;
|
||||
ins_start_slot = path->slots[0];
|
||||
}
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot, ins_nr,
|
||||
inode_only, logged_isize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ins_nr = 0;
|
||||
|
||||
ret = log_conflicting_inodes(trans, root, path,
|
||||
ctx, other_ino, other_parent);
|
||||
if (ret)
|
||||
return ret;
|
||||
btrfs_release_path(path);
|
||||
goto next_key;
|
||||
}
|
||||
}
|
||||
|
||||
/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
|
||||
if (min_key->type == BTRFS_XATTR_ITEM_KEY) {
|
||||
if (ins_nr == 0)
|
||||
goto next_slot;
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot,
|
||||
ins_nr, inode_only, logged_isize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ins_nr = 0;
|
||||
goto next_slot;
|
||||
}
|
||||
|
||||
if (min_key->type == BTRFS_EXTENT_DATA_KEY) {
|
||||
const u64 extent_end = btrfs_file_extent_end(path);
|
||||
|
||||
if (extent_end <= start) {
|
||||
if (ins_nr > 0) {
|
||||
ret = copy_items(trans, inode, dst_path,
|
||||
path, ins_start_slot,
|
||||
ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ins_nr = 0;
|
||||
}
|
||||
goto next_slot;
|
||||
}
|
||||
if (extent_end >= end) {
|
||||
ins_nr++;
|
||||
if (ins_nr == 1)
|
||||
ins_start_slot = path->slots[0];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
|
||||
ins_nr++;
|
||||
goto next_slot;
|
||||
} else if (!ins_nr) {
|
||||
ins_start_slot = path->slots[0];
|
||||
ins_nr = 1;
|
||||
goto next_slot;
|
||||
}
|
||||
|
||||
ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
|
||||
ins_nr, inode_only, logged_isize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ins_nr = 1;
|
||||
ins_start_slot = path->slots[0];
|
||||
next_slot:
|
||||
path->slots[0]++;
|
||||
if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], min_key,
|
||||
path->slots[0]);
|
||||
goto again;
|
||||
}
|
||||
if (ins_nr) {
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot, ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ins_nr = 0;
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
next_key:
|
||||
if (min_key->offset < (u64)-1) {
|
||||
min_key->offset++;
|
||||
} else if (min_key->type < max_key->type) {
|
||||
min_key->type++;
|
||||
min_key->offset = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ins_nr)
|
||||
ret = copy_items(trans, inode, dst_path, path, ins_start_slot,
|
||||
ins_nr, inode_only, logged_isize);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* log a single inode in the tree log.
|
||||
* At least one parent directory for this inode must exist in the tree
|
||||
* or be logged already.
|
||||
@ -4955,8 +5151,8 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
|
||||
static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct btrfs_inode *inode,
|
||||
int inode_only,
|
||||
const loff_t start,
|
||||
const loff_t end,
|
||||
u64 start,
|
||||
u64 end,
|
||||
struct btrfs_log_ctx *ctx)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
@ -4967,9 +5163,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *log = root->log_root;
|
||||
int err = 0;
|
||||
int ret;
|
||||
int nritems;
|
||||
int ins_start_slot = 0;
|
||||
int ins_nr;
|
||||
bool fast_search = false;
|
||||
u64 ino = btrfs_ino(inode);
|
||||
struct extent_map_tree *em_tree = &inode->extent_tree;
|
||||
@ -4987,6 +5180,9 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
start = ALIGN_DOWN(start, fs_info->sectorsize);
|
||||
end = ALIGN(end, fs_info->sectorsize);
|
||||
|
||||
min_key.objectid = ino;
|
||||
min_key.type = BTRFS_INODE_ITEM_KEY;
|
||||
min_key.offset = 0;
|
||||
@ -5100,139 +5296,12 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
while (1) {
|
||||
ins_nr = 0;
|
||||
ret = btrfs_search_forward(root, &min_key,
|
||||
path, trans->transid);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (ret != 0)
|
||||
break;
|
||||
again:
|
||||
/* note, ins_nr might be > 0 here, cleanup outside the loop */
|
||||
if (min_key.objectid != ino)
|
||||
break;
|
||||
if (min_key.type > max_key.type)
|
||||
break;
|
||||
|
||||
if (min_key.type == BTRFS_INODE_ITEM_KEY)
|
||||
need_log_inode_item = false;
|
||||
|
||||
if ((min_key.type == BTRFS_INODE_REF_KEY ||
|
||||
min_key.type == BTRFS_INODE_EXTREF_KEY) &&
|
||||
inode->generation == trans->transid &&
|
||||
!recursive_logging) {
|
||||
u64 other_ino = 0;
|
||||
u64 other_parent = 0;
|
||||
|
||||
ret = btrfs_check_ref_name_override(path->nodes[0],
|
||||
path->slots[0], &min_key, inode,
|
||||
&other_ino, &other_parent);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
} else if (ret > 0 && ctx &&
|
||||
other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
|
||||
if (ins_nr > 0) {
|
||||
ins_nr++;
|
||||
} else {
|
||||
ins_nr = 1;
|
||||
ins_start_slot = path->slots[0];
|
||||
}
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot,
|
||||
ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ins_nr = 0;
|
||||
|
||||
err = log_conflicting_inodes(trans, root, path,
|
||||
ctx, other_ino, other_parent);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
btrfs_release_path(path);
|
||||
goto next_key;
|
||||
}
|
||||
}
|
||||
|
||||
/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
|
||||
if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
|
||||
if (ins_nr == 0)
|
||||
goto next_slot;
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot,
|
||||
ins_nr, inode_only, logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ins_nr = 0;
|
||||
goto next_slot;
|
||||
}
|
||||
|
||||
if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
|
||||
ins_nr++;
|
||||
goto next_slot;
|
||||
} else if (!ins_nr) {
|
||||
ins_start_slot = path->slots[0];
|
||||
ins_nr = 1;
|
||||
goto next_slot;
|
||||
}
|
||||
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot, ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ins_nr = 1;
|
||||
ins_start_slot = path->slots[0];
|
||||
next_slot:
|
||||
|
||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||
path->slots[0]++;
|
||||
if (path->slots[0] < nritems) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &min_key,
|
||||
path->slots[0]);
|
||||
goto again;
|
||||
}
|
||||
if (ins_nr) {
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot,
|
||||
ins_nr, inode_only, logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ins_nr = 0;
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
next_key:
|
||||
if (min_key.offset < (u64)-1) {
|
||||
min_key.offset++;
|
||||
} else if (min_key.type < max_key.type) {
|
||||
min_key.type++;
|
||||
min_key.offset = 0;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ins_nr) {
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot, ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ins_nr = 0;
|
||||
}
|
||||
err = copy_inode_items_to_log(trans, inode, &min_key, &max_key,
|
||||
path, dst_path, logged_isize,
|
||||
recursive_logging, inode_only,
|
||||
start, end, ctx, &need_log_inode_item);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
btrfs_release_path(path);
|
||||
btrfs_release_path(dst_path);
|
||||
@ -5243,7 +5312,7 @@ next_key:
|
||||
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
|
||||
btrfs_release_path(path);
|
||||
btrfs_release_path(dst_path);
|
||||
err = btrfs_log_holes(trans, root, inode, path);
|
||||
err = btrfs_log_holes(trans, root, inode, path, start, end);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -6145,7 +6214,7 @@ again:
|
||||
if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID)
|
||||
break;
|
||||
|
||||
log = btrfs_read_fs_root(log_root_tree, &found_key);
|
||||
log = btrfs_read_tree_root(log_root_tree, &found_key);
|
||||
if (IS_ERR(log)) {
|
||||
ret = PTR_ERR(log);
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
@ -6157,7 +6226,7 @@ again:
|
||||
tmp_key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
tmp_key.offset = (u64)-1;
|
||||
|
||||
wc.replay_dest = btrfs_read_fs_root_no_name(fs_info, &tmp_key);
|
||||
wc.replay_dest = btrfs_get_fs_root(fs_info, &tmp_key, true);
|
||||
if (IS_ERR(wc.replay_dest)) {
|
||||
ret = PTR_ERR(wc.replay_dest);
|
||||
|
||||
@ -6173,12 +6242,10 @@ again:
|
||||
* each subsequent pass.
|
||||
*/
|
||||
if (ret == -ENOENT)
|
||||
ret = btrfs_pin_extent_for_log_replay(fs_info,
|
||||
ret = btrfs_pin_extent_for_log_replay(trans,
|
||||
log->node->start,
|
||||
log->node->len);
|
||||
free_extent_buffer(log->node);
|
||||
free_extent_buffer(log->commit_root);
|
||||
kfree(log);
|
||||
btrfs_put_root(log);
|
||||
|
||||
if (!ret)
|
||||
goto next;
|
||||
@ -6214,9 +6281,8 @@ again:
|
||||
}
|
||||
|
||||
wc.replay_dest->log_root = NULL;
|
||||
free_extent_buffer(log->node);
|
||||
free_extent_buffer(log->commit_root);
|
||||
kfree(log);
|
||||
btrfs_put_root(wc.replay_dest);
|
||||
btrfs_put_root(log);
|
||||
|
||||
if (ret)
|
||||
goto error;
|
||||
@ -6247,10 +6313,9 @@ next:
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
free_extent_buffer(log_root_tree->node);
|
||||
log_root_tree->log_root = NULL;
|
||||
clear_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags);
|
||||
kfree(log_root_tree);
|
||||
btrfs_put_root(log_root_tree);
|
||||
|
||||
return 0;
|
||||
error:
|
||||
|
@ -246,9 +246,53 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
|
||||
int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
|
||||
u64))
|
||||
/*
|
||||
* Check if there's an matching subvolume for given UUID
|
||||
*
|
||||
* Return:
|
||||
* 0 check succeeded, the entry is not outdated
|
||||
* > 0 if the check failed, the caller should remove the entry
|
||||
* < 0 if an error occurred
|
||||
*/
|
||||
static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
|
||||
u8 *uuid, u8 type, u64 subvolid)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
int ret = 0;
|
||||
struct btrfs_root *subvol_root;
|
||||
|
||||
if (type != BTRFS_UUID_KEY_SUBVOL &&
|
||||
type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
|
||||
goto out;
|
||||
|
||||
key.objectid = subvolid;
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = (u64)-1;
|
||||
subvol_root = btrfs_get_fs_root(fs_info, &key, true);
|
||||
if (IS_ERR(subvol_root)) {
|
||||
ret = PTR_ERR(subvol_root);
|
||||
if (ret == -ENOENT)
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
switch (type) {
|
||||
case BTRFS_UUID_KEY_SUBVOL:
|
||||
if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
|
||||
ret = 1;
|
||||
break;
|
||||
case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
|
||||
if (memcmp(uuid, subvol_root->root_item.received_uuid,
|
||||
BTRFS_UUID_SIZE))
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
btrfs_put_root(subvol_root);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->uuid_root;
|
||||
struct btrfs_key key;
|
||||
@ -278,6 +322,10 @@ again_search_slot:
|
||||
}
|
||||
|
||||
while (1) {
|
||||
if (btrfs_fs_closing(fs_info)) {
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
cond_resched();
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
@ -305,7 +353,8 @@ again_search_slot:
|
||||
read_extent_buffer(leaf, &subid_le, offset,
|
||||
sizeof(subid_le));
|
||||
subid_cpu = le64_to_cpu(subid_le);
|
||||
ret = check_func(fs_info, uuid, key.type, subid_cpu);
|
||||
ret = btrfs_check_uuid_tree_entry(fs_info, uuid,
|
||||
key.type, subid_cpu);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -17,8 +17,6 @@ extern struct mutex uuid_mutex;
|
||||
|
||||
#define BTRFS_STRIPE_LEN SZ_64K
|
||||
|
||||
struct buffer_head;
|
||||
|
||||
struct btrfs_io_geometry {
|
||||
/* remaining bytes before crossing a stripe */
|
||||
u64 len;
|
||||
@ -209,6 +207,10 @@ BTRFS_DEVICE_GETSET_FUNCS(total_bytes);
|
||||
BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
|
||||
BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
|
||||
|
||||
enum btrfs_chunk_allocation_policy {
|
||||
BTRFS_CHUNK_ALLOC_REGULAR,
|
||||
};
|
||||
|
||||
struct btrfs_fs_devices {
|
||||
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
|
||||
u8 metadata_uuid[BTRFS_FSID_SIZE];
|
||||
@ -260,6 +262,8 @@ struct btrfs_fs_devices {
|
||||
struct kobject *devices_kobj;
|
||||
struct kobject *devinfo_kobj;
|
||||
struct completion kobj_unregister;
|
||||
|
||||
enum btrfs_chunk_allocation_policy chunk_alloc_policy;
|
||||
};
|
||||
|
||||
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
|
||||
@ -461,7 +465,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_uuid_scan_kthread(void *data);
|
||||
int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
||||
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *max_avail);
|
||||
@ -474,7 +478,6 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
|
||||
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
|
||||
void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
|
||||
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
|
||||
void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path);
|
||||
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 len);
|
||||
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
|
||||
@ -484,6 +487,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
|
||||
int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
|
||||
struct extent_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 length);
|
||||
void btrfs_release_disk_super(struct btrfs_super_block *super);
|
||||
|
||||
static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
|
||||
int index)
|
||||
|
@ -43,6 +43,16 @@ static inline void guid_copy(guid_t *dst, const guid_t *src)
|
||||
memcpy(dst, src, sizeof(guid_t));
|
||||
}
|
||||
|
||||
static inline void import_guid(guid_t *dst, const __u8 *src)
|
||||
{
|
||||
memcpy(dst, src, sizeof(guid_t));
|
||||
}
|
||||
|
||||
static inline void export_guid(__u8 *dst, const guid_t *src)
|
||||
{
|
||||
memcpy(dst, src, sizeof(guid_t));
|
||||
}
|
||||
|
||||
static inline bool guid_is_null(const guid_t *guid)
|
||||
{
|
||||
return guid_equal(guid, &guid_null);
|
||||
@ -58,12 +68,23 @@ static inline void uuid_copy(uuid_t *dst, const uuid_t *src)
|
||||
memcpy(dst, src, sizeof(uuid_t));
|
||||
}
|
||||
|
||||
static inline void import_uuid(uuid_t *dst, const __u8 *src)
|
||||
{
|
||||
memcpy(dst, src, sizeof(uuid_t));
|
||||
}
|
||||
|
||||
static inline void export_uuid(__u8 *dst, const uuid_t *src)
|
||||
{
|
||||
memcpy(dst, src, sizeof(uuid_t));
|
||||
}
|
||||
|
||||
static inline bool uuid_is_null(const uuid_t *uuid)
|
||||
{
|
||||
return uuid_equal(uuid, &uuid_null);
|
||||
}
|
||||
|
||||
void generate_random_uuid(unsigned char uuid[16]);
|
||||
void generate_random_guid(unsigned char guid[16]);
|
||||
|
||||
extern void guid_gen(guid_t *u);
|
||||
extern void uuid_gen(uuid_t *u);
|
||||
@ -77,7 +98,6 @@ int guid_parse(const char *uuid, guid_t *u);
|
||||
int uuid_parse(const char *uuid, uuid_t *u);
|
||||
|
||||
/* backwards compatibility, don't use in new code */
|
||||
#define uuid_le_gen(u) guid_gen(u)
|
||||
#define uuid_le_to_bin(guid, u) guid_parse(guid, u)
|
||||
|
||||
static inline int uuid_le_cmp(const guid_t u1, const guid_t u2)
|
||||
|
@ -81,13 +81,14 @@ TRACE_DEFINE_ENUM(COMMIT_TRANS);
|
||||
|
||||
#define show_extent_io_tree_owner(owner) \
|
||||
__print_symbolic(owner, \
|
||||
{ IO_TREE_FS_INFO_FREED_EXTENTS0, "FREED_EXTENTS0" }, \
|
||||
{ IO_TREE_FS_INFO_FREED_EXTENTS1, "FREED_EXTENTS1" }, \
|
||||
{ IO_TREE_FS_PINNED_EXTENTS, "PINNED_EXTENTS" }, \
|
||||
{ IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS" }, \
|
||||
{ IO_TREE_INODE_IO, "INODE_IO" }, \
|
||||
{ IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE" }, \
|
||||
{ IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS" }, \
|
||||
{ IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES" }, \
|
||||
{ IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES" }, \
|
||||
{ IO_TREE_INODE_FILE_EXTENT, "INODE_FILE_EXTENT" }, \
|
||||
{ IO_TREE_SELFTEST, "SELFTEST" })
|
||||
|
||||
#define BTRFS_GROUP_FLAGS \
|
||||
@ -468,7 +469,6 @@ DEFINE_EVENT(
|
||||
{ (1 << BTRFS_ORDERED_PREALLOC), "PREALLOC" }, \
|
||||
{ (1 << BTRFS_ORDERED_DIRECT), "DIRECT" }, \
|
||||
{ (1 << BTRFS_ORDERED_IOERR), "IOERR" }, \
|
||||
{ (1 << BTRFS_ORDERED_UPDATED_ISIZE), "UPDATED_ISIZE" }, \
|
||||
{ (1 << BTRFS_ORDERED_TRUNCATED), "TRUNCATED" })
|
||||
|
||||
|
||||
|
@ -36,17 +36,24 @@ struct btrfs_ioctl_vol_args {
|
||||
#define BTRFS_DEVICE_PATH_NAME_MAX 1024
|
||||
#define BTRFS_SUBVOL_NAME_MAX 4039
|
||||
|
||||
#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
|
||||
/*
|
||||
* Deprecated since 5.7:
|
||||
*
|
||||
* BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
|
||||
*/
|
||||
|
||||
#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
|
||||
#define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2)
|
||||
|
||||
#define BTRFS_DEVICE_SPEC_BY_ID (1ULL << 3)
|
||||
|
||||
#define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4)
|
||||
|
||||
#define BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED \
|
||||
(BTRFS_SUBVOL_CREATE_ASYNC | \
|
||||
BTRFS_SUBVOL_RDONLY | \
|
||||
(BTRFS_SUBVOL_RDONLY | \
|
||||
BTRFS_SUBVOL_QGROUP_INHERIT | \
|
||||
BTRFS_DEVICE_SPEC_BY_ID)
|
||||
BTRFS_DEVICE_SPEC_BY_ID | \
|
||||
BTRFS_SUBVOL_SPEC_BY_ID)
|
||||
|
||||
#define BTRFS_FSID_SIZE 16
|
||||
#define BTRFS_UUID_SIZE 16
|
||||
@ -97,16 +104,29 @@ struct btrfs_ioctl_qgroup_limit_args {
|
||||
};
|
||||
|
||||
/*
|
||||
* flags for subvolumes
|
||||
* Arguments for specification of subvolumes or devices, supporting by-name or
|
||||
* by-id and flags
|
||||
*
|
||||
* Used by:
|
||||
* struct btrfs_ioctl_vol_args_v2.flags
|
||||
* The set of supported flags depends on the ioctl
|
||||
*
|
||||
* BTRFS_SUBVOL_RDONLY is also provided/consumed by the following ioctls:
|
||||
* - BTRFS_IOC_SUBVOL_GETFLAGS
|
||||
* - BTRFS_IOC_SUBVOL_SETFLAGS
|
||||
*/
|
||||
|
||||
/* Supported flags for BTRFS_IOC_RM_DEV_V2 */
|
||||
#define BTRFS_DEVICE_REMOVE_ARGS_MASK \
|
||||
(BTRFS_DEVICE_SPEC_BY_ID)
|
||||
|
||||
/* Supported flags for BTRFS_IOC_SNAP_CREATE_V2 and BTRFS_IOC_SUBVOL_CREATE_V2 */
|
||||
#define BTRFS_SUBVOL_CREATE_ARGS_MASK \
|
||||
(BTRFS_SUBVOL_RDONLY | \
|
||||
BTRFS_SUBVOL_QGROUP_INHERIT)
|
||||
|
||||
/* Supported flags for BTRFS_IOC_SNAP_DESTROY_V2 */
|
||||
#define BTRFS_SUBVOL_DELETE_ARGS_MASK \
|
||||
(BTRFS_SUBVOL_SPEC_BY_ID)
|
||||
|
||||
struct btrfs_ioctl_vol_args_v2 {
|
||||
__s64 fd;
|
||||
__u64 transid;
|
||||
@ -121,6 +141,7 @@ struct btrfs_ioctl_vol_args_v2 {
|
||||
union {
|
||||
char name[BTRFS_SUBVOL_NAME_MAX + 1];
|
||||
__u64 devid;
|
||||
__u64 subvolid;
|
||||
};
|
||||
};
|
||||
|
||||
@ -949,5 +970,7 @@ enum btrfs_err_code {
|
||||
struct btrfs_ioctl_get_subvol_rootref_args)
|
||||
#define BTRFS_IOC_INO_LOOKUP_USER _IOWR(BTRFS_IOCTL_MAGIC, 62, \
|
||||
struct btrfs_ioctl_ino_lookup_user_args)
|
||||
#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
|
||||
struct btrfs_ioctl_vol_args_v2)
|
||||
|
||||
#endif /* _UAPI_LINUX_BTRFS_H */
|
||||
|
10
lib/uuid.c
10
lib/uuid.c
@ -40,6 +40,16 @@ void generate_random_uuid(unsigned char uuid[16])
|
||||
}
|
||||
EXPORT_SYMBOL(generate_random_uuid);
|
||||
|
||||
void generate_random_guid(unsigned char guid[16])
|
||||
{
|
||||
get_random_bytes(guid, 16);
|
||||
/* Set GUID version to 4 --- truly random generation */
|
||||
guid[7] = (guid[7] & 0x0F) | 0x40;
|
||||
/* Set the GUID variant to DCE */
|
||||
guid[8] = (guid[8] & 0x3F) | 0x80;
|
||||
}
|
||||
EXPORT_SYMBOL(generate_random_guid);
|
||||
|
||||
static void __uuid_gen_common(__u8 b[16])
|
||||
{
|
||||
prandom_bytes(b, 16);
|
||||
|
Loading…
Reference in New Issue
Block a user