forked from Minki/linux
for-6.1-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmM6zNkACgkQxWXV+ddt WDsNMg/+LTuwf6Js+mAl1AgtSpLOl2gLfNBJAUXhzwPbc3nF9bwONE/EUYEXTo5h kTf1cQRj0NCIZ7iHDwXuWNm77diNl+SChEDIoc7k0d6P7Qmmn2AWbTLM4dleyg5S 6jxPpOMbegycQfL9tSJNaiT9zlZxj9Z+0yPibR99otrgtuv6zuvRxcdh34rEFIyf xoabO3/18lAKHzYzAZxNXMpbUSBmqLPVoZEOcfBAXvcuIJkzKRP6Y9gwlYs+kn+D J8BPa3LoSNxXrpCvWzlu7vO3gwNp7H7pQQqZKjjEcOZ+dj2UYQeTyJvl1vdzaNyk EoFYlkaKkYi7RaonuHjNaTeD/igJf8Eo6DTiXzACECssbKutlvNG4HXuFApsWy7M T7KZ5jTAQ98ZMYjgZ27UbEpFZd8lYHzV952Njjo9zbRVbqwaPEZTTdkjpz+3X6t4 Z0A951ixOYKiOVdu3Uj1fHaBv0n/p0wrXIGt3ZIdjufM9TctV3oJwOZOiM2H0ccb XJVwsQG92+ja9XLZrw8H62PCKBYo3LL52r9b9NVodY9aTsQWTfiV5OP84RRlncCp hzPkHmO1YIyVcLoijagiO7cW21pQbKfqsRX/P1F7DXyjosHppmDS7IHDWA7Adf3W QA6eBnoWqVwBh7P+IyxJuRG0CrnxkPZeAZIhohDwk5Mt4NGATkA= =NlUz -----END PGP SIGNATURE----- Merge tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "There's a bunch of performance improvements, most notably the FIEMAP speedup, the new block group tree to speed up mount on large filesystems, more io_uring integration, some sysfs exports and the usual fixes and core updates. Summary: Performance: - outstanding FIEMAP speed improvement - algorithmic change how extents are enumerated leads to orders of magnitude speed boost (uncached and cached) - extent sharing check speedup (2.2x uncached, 3x cached) - add more cancellation points, allowing to interrupt seeking in files with large number of extents - more efficient hole and data seeking (4x uncached, 1.3x cached) - sample results: 256M, 32K extents: 4s -> 29ms (~150x) 512M, 64K extents: 30s -> 59ms (~550x) 1G, 128K extents: 225s -> 120ms (~1800x) - improved inode logging, especially for directories (on dbench workload throughput +25%, max latency -21%) - improved buffered IO, remove redundant extent state tracking, lowering memory consumption and avoiding rb tree traversal - add sysfs tunable to let qgroup temporarily skip exact accounting when deleting snapshot, leading to a speedup but requiring a rescan after that, will be used by snapper - support io_uring and buffered writes, until now it was just for direct IO, with the no-wait semantics implemented in the buffered write path it now works and leads to speed improvement in IOPS (2x), throughput (2.2x), latency (depends, 2x to 150x) - small performance improvements when dropping and searching for extent maps as well as when flushing delalloc in COW mode (throughput +5MB/s) User visible changes: - new incompatible feature block-group-tree adding a dedicated tree for tracking block groups, this allows a much faster load during mount and avoids seeking unlike when it's scattered in the extent tree items - this reduces mount time for many-terabyte sized filesystems - conversion tool will be provided so existing filesystem can also be updated in place - to reduce test matrix and feature combinations requires no-holes and free-space-tree (mkfs defaults since 5.15) - improved reporting of super block corruption detected by scrub - scrub also tries to repair super block and does not wait until next commit - discard stats and tunables are exported in sysfs (/sys/fs/btrfs/FSID/discard) - qgroup status is exported in sysfs (/sys/sys/fs/btrfs/FSID/qgroups/) - verify that super block was not modified when thawing filesystem Fixes: - FIEMAP fixes - fix extent sharing status, does not depend on the cached status where merged - flush delalloc so compressed extents are reported correctly - fix alignment of VMA for memory mapped files on THP - send: fix failures when processing inodes with no links (orphan files and directories) - fix race between quota enable and quota rescan ioctl - handle more corner cases for read-only compat feature verification - fix missed extent on fsync after dropping extent maps Core: - lockdep annotations to validate various transactions states and state transitions - preliminary support for fs-verity in send - more effective memory use in scrub for subpage where sector is smaller than page - block group caching progress logic has been removed, load is now synchronous - simplify end IO callbacks and bio handling, use chained bios instead of own tracking - add no-wait semantics to several functions (tree search, nocow, flushing, buffered write - cleanups and refactoring MM changes: - export balance_dirty_pages_ratelimited_flags" * tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (177 commits) btrfs: set generation before calling btrfs_clean_tree_block in btrfs_init_new_buffer btrfs: drop extent map range more efficiently btrfs: avoid pointless extent map tree search when flushing delalloc btrfs: remove unnecessary next extent map search btrfs: remove unnecessary NULL pointer checks when searching extent maps btrfs: assert tree is locked when clearing extent map from logging btrfs: remove unnecessary extent map initializations btrfs: remove the refcount warning/check at free_extent_map() btrfs: add helper to replace extent map range with a new extent map btrfs: move open coded extent map tree deletion out of inode eviction btrfs: use cond_resched_rwlock_write() during inode eviction btrfs: use extent_map_end() at btrfs_drop_extent_map_range() btrfs: move btrfs_drop_extent_cache() to extent_map.c btrfs: fix missed extent on fsync after dropping extent maps btrfs: remove stale prototype of btrfs_write_inode btrfs: enable nowait async buffered writes btrfs: assert nowait mode is not used for some btree search functions btrfs: make btrfs_buffered_write nowait compatible btrfs: plumb NOWAIT through the write path btrfs: make lock_and_cleanup_extent_if_need nowait compatible ...
This commit is contained in:
commit
76e4503534
@ -31,7 +31,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
|
||||
subpage.o tree-mod-log.o
|
||||
subpage.o tree-mod-log.o extent-io-tree.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
|
@ -1511,16 +1511,118 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if an extent is shared or not
|
||||
/*
|
||||
* The caller has joined a transaction or is holding a read lock on the
|
||||
* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
|
||||
* snapshot field changing while updating or checking the cache.
|
||||
*/
|
||||
static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, int level, bool *is_shared)
|
||||
{
|
||||
struct btrfs_backref_shared_cache_entry *entry;
|
||||
|
||||
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Level -1 is used for the data extent, which is not reliable to cache
|
||||
* because its reference count can increase or decrease without us
|
||||
* realizing. We cache results only for extent buffers that lead from
|
||||
* the root node down to the leaf with the file extent item.
|
||||
*/
|
||||
ASSERT(level >= 0);
|
||||
|
||||
entry = &cache->entries[level];
|
||||
|
||||
/* Unused cache entry or being used for some other extent buffer. */
|
||||
if (entry->bytenr != bytenr)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* We cached a false result, but the last snapshot generation of the
|
||||
* root changed, so we now have a snapshot. Don't trust the result.
|
||||
*/
|
||||
if (!entry->is_shared &&
|
||||
entry->gen != btrfs_root_last_snapshot(&root->root_item))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we cached a true result and the last generation used for dropping
|
||||
* a root changed, we can not trust the result, because the dropped root
|
||||
* could be a snapshot sharing this extent buffer.
|
||||
*/
|
||||
if (entry->is_shared &&
|
||||
entry->gen != btrfs_get_last_root_drop_gen(root->fs_info))
|
||||
return false;
|
||||
|
||||
*is_shared = entry->is_shared;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has joined a transaction or is holding a read lock on the
|
||||
* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
|
||||
* snapshot field changing while updating or checking the cache.
|
||||
*/
|
||||
static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
|
||||
struct btrfs_root *root,
|
||||
u64 bytenr, int level, bool is_shared)
|
||||
{
|
||||
struct btrfs_backref_shared_cache_entry *entry;
|
||||
u64 gen;
|
||||
|
||||
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Level -1 is used for the data extent, which is not reliable to cache
|
||||
* because its reference count can increase or decrease without us
|
||||
* realizing. We cache results only for extent buffers that lead from
|
||||
* the root node down to the leaf with the file extent item.
|
||||
*/
|
||||
ASSERT(level >= 0);
|
||||
|
||||
if (is_shared)
|
||||
gen = btrfs_get_last_root_drop_gen(root->fs_info);
|
||||
else
|
||||
gen = btrfs_root_last_snapshot(&root->root_item);
|
||||
|
||||
entry = &cache->entries[level];
|
||||
entry->bytenr = bytenr;
|
||||
entry->is_shared = is_shared;
|
||||
entry->gen = gen;
|
||||
|
||||
/*
|
||||
* If we found an extent buffer is shared, set the cache result for all
|
||||
* extent buffers below it to true. As nodes in the path are COWed,
|
||||
* their sharedness is moved to their children, and if a leaf is COWed,
|
||||
* then the sharedness of a data extent becomes direct, the refcount of
|
||||
* data extent is increased in the extent item at the extent tree.
|
||||
*/
|
||||
if (is_shared) {
|
||||
for (int i = 0; i < level; i++) {
|
||||
entry = &cache->entries[i];
|
||||
entry->is_shared = is_shared;
|
||||
entry->gen = gen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a data extent is shared or not.
|
||||
*
|
||||
* @root: root inode belongs to
|
||||
* @inum: inode number of the inode whose extent we are checking
|
||||
* @bytenr: logical bytenr of the extent we are checking
|
||||
* @roots: list of roots this extent is shared among
|
||||
* @tmp: temporary list used for iteration
|
||||
* @root: The root the inode belongs to.
|
||||
* @inum: Number of the inode whose extent we are checking.
|
||||
* @bytenr: Logical bytenr of the extent we are checking.
|
||||
* @extent_gen: Generation of the extent (file extent item) or 0 if it is
|
||||
* not known.
|
||||
* @roots: List of roots this extent is shared among.
|
||||
* @tmp: Temporary list used for iteration.
|
||||
* @cache: A backref lookup result cache.
|
||||
*
|
||||
* btrfs_check_shared uses the backref walking code but will short
|
||||
* btrfs_is_data_extent_shared uses the backref walking code but will short
|
||||
* circuit as soon as it finds a root or inode that doesn't match the
|
||||
* one passed in. This provides a significant performance benefit for
|
||||
* callers (such as fiemap) which want to know whether the extent is
|
||||
@ -1531,8 +1633,10 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
|
||||
*
|
||||
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
|
||||
*/
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
struct ulist *roots, struct ulist *tmp)
|
||||
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
u64 extent_gen,
|
||||
struct ulist *roots, struct ulist *tmp,
|
||||
struct btrfs_backref_shared_cache *cache)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_trans_handle *trans;
|
||||
@ -1545,6 +1649,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
.inum = inum,
|
||||
.share_count = 0,
|
||||
};
|
||||
int level;
|
||||
|
||||
ulist_init(roots);
|
||||
ulist_init(tmp);
|
||||
@ -1561,22 +1666,52 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
btrfs_get_tree_mod_seq(fs_info, &elem);
|
||||
}
|
||||
|
||||
/* -1 means we are in the bytenr of the data extent. */
|
||||
level = -1;
|
||||
ULIST_ITER_INIT(&uiter);
|
||||
while (1) {
|
||||
bool is_shared;
|
||||
bool cached;
|
||||
|
||||
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
|
||||
roots, NULL, &shared, false);
|
||||
if (ret == BACKREF_FOUND_SHARED) {
|
||||
/* this is the only condition under which we return 1 */
|
||||
ret = 1;
|
||||
if (level >= 0)
|
||||
store_backref_shared_cache(cache, root, bytenr,
|
||||
level, true);
|
||||
break;
|
||||
}
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
break;
|
||||
ret = 0;
|
||||
/*
|
||||
* If our data extent is not shared through reflinks and it was
|
||||
* created in a generation after the last one used to create a
|
||||
* snapshot of the inode's root, then it can not be shared
|
||||
* indirectly through subtrees, as that can only happen with
|
||||
* snapshots. In this case bail out, no need to check for the
|
||||
* sharedness of extent buffers.
|
||||
*/
|
||||
if (level == -1 &&
|
||||
extent_gen > btrfs_root_last_snapshot(&root->root_item))
|
||||
break;
|
||||
|
||||
if (level >= 0)
|
||||
store_backref_shared_cache(cache, root, bytenr,
|
||||
level, false);
|
||||
node = ulist_next(tmp, &uiter);
|
||||
if (!node)
|
||||
break;
|
||||
bytenr = node->val;
|
||||
level++;
|
||||
cached = lookup_backref_shared_cache(cache, root, bytenr, level,
|
||||
&is_shared);
|
||||
if (cached) {
|
||||
ret = (is_shared ? 1 : 0);
|
||||
break;
|
||||
}
|
||||
shared.share_count = 0;
|
||||
cond_resched();
|
||||
}
|
||||
|
@ -17,6 +17,20 @@ struct inode_fs_paths {
|
||||
struct btrfs_data_container *fspath;
|
||||
};
|
||||
|
||||
struct btrfs_backref_shared_cache_entry {
|
||||
u64 bytenr;
|
||||
u64 gen;
|
||||
bool is_shared;
|
||||
};
|
||||
|
||||
struct btrfs_backref_shared_cache {
|
||||
/*
|
||||
* A path from a root to a leaf that has a file extent item pointing to
|
||||
* a given data extent should never exceed the maximum b+tree height.
|
||||
*/
|
||||
struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL];
|
||||
};
|
||||
|
||||
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
|
||||
void *ctx);
|
||||
|
||||
@ -62,8 +76,10 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
|
||||
u64 start_off, struct btrfs_path *path,
|
||||
struct btrfs_inode_extref **ret_extref,
|
||||
u64 *found_off);
|
||||
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
struct ulist *roots, struct ulist *tmp_ulist);
|
||||
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
|
||||
u64 extent_gen,
|
||||
struct ulist *roots, struct ulist *tmp,
|
||||
struct btrfs_backref_shared_cache *cache);
|
||||
|
||||
int __init btrfs_prelim_ref_init(void);
|
||||
void __cold btrfs_prelim_ref_exit(void);
|
||||
|
@ -593,8 +593,6 @@ next:
|
||||
|
||||
if (need_resched() ||
|
||||
rwsem_is_contended(&fs_info->commit_root_sem)) {
|
||||
if (wakeup)
|
||||
caching_ctl->progress = last;
|
||||
btrfs_release_path(path);
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
mutex_unlock(&caching_ctl->mutex);
|
||||
@ -618,9 +616,6 @@ next:
|
||||
key.objectid = last;
|
||||
key.offset = 0;
|
||||
key.type = BTRFS_EXTENT_ITEM_KEY;
|
||||
|
||||
if (wakeup)
|
||||
caching_ctl->progress = last;
|
||||
btrfs_release_path(path);
|
||||
goto next;
|
||||
}
|
||||
@ -655,7 +650,6 @@ next:
|
||||
|
||||
total_found += add_new_free_space(block_group, last,
|
||||
block_group->start + block_group->length);
|
||||
caching_ctl->progress = (u64)-1;
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
@ -725,8 +719,6 @@ done:
|
||||
}
|
||||
#endif
|
||||
|
||||
caching_ctl->progress = (u64)-1;
|
||||
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
btrfs_free_excluded_extents(block_group);
|
||||
mutex_unlock(&caching_ctl->mutex);
|
||||
@ -755,7 +747,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
|
||||
mutex_init(&caching_ctl->mutex);
|
||||
init_waitqueue_head(&caching_ctl->wait);
|
||||
caching_ctl->block_group = cache;
|
||||
caching_ctl->progress = cache->start;
|
||||
refcount_set(&caching_ctl->count, 2);
|
||||
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
|
||||
|
||||
@ -772,7 +763,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
|
||||
WARN_ON(cache->caching_ctl);
|
||||
cache->caching_ctl = caching_ctl;
|
||||
cache->cached = BTRFS_CACHE_STARTED;
|
||||
cache->has_caching_ctl = 1;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
@ -784,8 +774,10 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
|
||||
|
||||
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
|
||||
out:
|
||||
/* REVIEW */
|
||||
if (wait && caching_ctl)
|
||||
ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
|
||||
/* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */
|
||||
if (caching_ctl)
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
|
||||
@ -988,32 +980,31 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
kobject_put(kobj);
|
||||
}
|
||||
|
||||
if (block_group->has_caching_ctl)
|
||||
caching_ctl = btrfs_get_caching_control(block_group);
|
||||
if (block_group->cached == BTRFS_CACHE_STARTED)
|
||||
btrfs_wait_block_group_cache_done(block_group);
|
||||
if (block_group->has_caching_ctl) {
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
if (!caching_ctl) {
|
||||
struct btrfs_caching_control *ctl;
|
||||
|
||||
list_for_each_entry(ctl,
|
||||
&fs_info->caching_block_groups, list)
|
||||
if (ctl->block_group == block_group) {
|
||||
caching_ctl = ctl;
|
||||
refcount_inc(&caching_ctl->count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (caching_ctl)
|
||||
list_del_init(&caching_ctl->list);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
if (caching_ctl) {
|
||||
/* Once for the caching bgs list and once for us. */
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
caching_ctl = btrfs_get_caching_control(block_group);
|
||||
if (!caching_ctl) {
|
||||
struct btrfs_caching_control *ctl;
|
||||
|
||||
list_for_each_entry(ctl, &fs_info->caching_block_groups, list) {
|
||||
if (ctl->block_group == block_group) {
|
||||
caching_ctl = ctl;
|
||||
refcount_inc(&caching_ctl->count);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (caching_ctl)
|
||||
list_del_init(&caching_ctl->list);
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
|
||||
if (caching_ctl) {
|
||||
/* Once for the caching bgs list and once for us. */
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
}
|
||||
|
||||
spin_lock(&trans->transaction->dirty_bgs_lock);
|
||||
WARN_ON(!list_empty(&block_group->dirty_list));
|
||||
@ -1034,12 +1025,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
< block_group->zone_unusable);
|
||||
WARN_ON(block_group->space_info->disk_total
|
||||
< block_group->length * factor);
|
||||
WARN_ON(block_group->zone_is_active &&
|
||||
WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
&block_group->runtime_flags) &&
|
||||
block_group->space_info->active_total_bytes
|
||||
< block_group->length);
|
||||
}
|
||||
block_group->space_info->total_bytes -= block_group->length;
|
||||
if (block_group->zone_is_active)
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
|
||||
block_group->space_info->active_total_bytes -= block_group->length;
|
||||
block_group->space_info->bytes_readonly -=
|
||||
(block_group->length - block_group->zone_unusable);
|
||||
@ -1069,7 +1061,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
block_group->removed = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags);
|
||||
|
||||
/*
|
||||
* At this point trimming or scrub can't start on this block group,
|
||||
* because we removed the block group from the rbtree
|
||||
@ -1304,6 +1297,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
return;
|
||||
|
||||
if (btrfs_fs_closing(fs_info))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Long running balances can keep us blocked here for eternity, so
|
||||
* simply skip deletion if we're unable to get the mutex.
|
||||
@ -1543,6 +1539,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
return;
|
||||
|
||||
if (btrfs_fs_closing(fs_info))
|
||||
return;
|
||||
|
||||
if (!btrfs_should_reclaim(fs_info))
|
||||
return;
|
||||
|
||||
@ -1890,16 +1889,6 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void link_block_group(struct btrfs_block_group *cache)
|
||||
{
|
||||
struct btrfs_space_info *space_info = cache->space_info;
|
||||
int index = btrfs_bg_flags_to_raid_index(cache->flags);
|
||||
|
||||
down_write(&space_info->groups_sem);
|
||||
list_add_tail(&cache->list, &space_info->block_groups[index]);
|
||||
up_write(&space_info->groups_sem);
|
||||
}
|
||||
|
||||
static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
struct btrfs_fs_info *fs_info, u64 start)
|
||||
{
|
||||
@ -1937,7 +1926,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
|
||||
atomic_set(&cache->frozen, 0);
|
||||
mutex_init(&cache->free_space_lock);
|
||||
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
|
||||
cache->full_stripe_locks_root.root = RB_ROOT;
|
||||
mutex_init(&cache->full_stripe_locks_root.lock);
|
||||
|
||||
return cache;
|
||||
}
|
||||
@ -2002,7 +1992,6 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
int need_clear)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
struct btrfs_space_info *space_info;
|
||||
const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
|
||||
int ret;
|
||||
|
||||
@ -2078,11 +2067,9 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
/* Should not have any excluded extents. Just in case, though. */
|
||||
btrfs_free_excluded_extents(cache);
|
||||
} else if (cache->length == cache->used) {
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
btrfs_free_excluded_extents(cache);
|
||||
} else if (cache->used == 0) {
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
add_new_free_space(cache, cache->start,
|
||||
cache->start + cache->length);
|
||||
@ -2095,14 +2082,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
goto error;
|
||||
}
|
||||
trace_btrfs_add_block_group(info, cache, 0);
|
||||
btrfs_update_space_info(info, cache->flags, cache->length,
|
||||
cache->used, cache->bytes_super,
|
||||
cache->zone_unusable, cache->zone_is_active,
|
||||
&space_info);
|
||||
|
||||
cache->space_info = space_info;
|
||||
|
||||
link_block_group(cache);
|
||||
btrfs_add_bg_to_space_info(info, cache);
|
||||
|
||||
set_avail_alloc_bits(info, cache->flags);
|
||||
if (btrfs_chunk_writeable(info, cache->start)) {
|
||||
@ -2126,7 +2106,6 @@ error:
|
||||
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
|
||||
struct btrfs_space_info *space_info;
|
||||
struct rb_node *node;
|
||||
int ret = 0;
|
||||
|
||||
@ -2146,7 +2125,6 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
/* Fill dummy cache as FULL */
|
||||
bg->length = em->len;
|
||||
bg->flags = map->type;
|
||||
bg->last_byte_to_unpin = (u64)-1;
|
||||
bg->cached = BTRFS_CACHE_FINISHED;
|
||||
bg->used = em->len;
|
||||
bg->flags = map->type;
|
||||
@ -2167,10 +2145,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
|
||||
break;
|
||||
}
|
||||
|
||||
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
|
||||
0, 0, false, &space_info);
|
||||
bg->space_info = space_info;
|
||||
link_block_group(bg);
|
||||
btrfs_add_bg_to_space_info(fs_info, bg);
|
||||
|
||||
set_avail_alloc_bits(fs_info, bg->flags);
|
||||
}
|
||||
@ -2190,7 +2165,16 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
|
||||
int need_clear = 0;
|
||||
u64 cache_gen;
|
||||
|
||||
if (!root)
|
||||
/*
|
||||
* Either no extent root (with ibadroots rescue option) or we have
|
||||
* unsupported RO options. The fs can never be mounted read-write, so no
|
||||
* need to waste time searching block group items.
|
||||
*
|
||||
* This also allows new extent tree related changes to be RO compat,
|
||||
* no need for a full incompat flag.
|
||||
*/
|
||||
if (!root || (btrfs_super_compat_ro_flags(info->super_copy) &
|
||||
~BTRFS_FEATURE_COMPAT_RO_SUPP))
|
||||
return fill_dummy_bgs(info);
|
||||
|
||||
key.objectid = 0;
|
||||
@ -2425,7 +2409,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
|
||||
ret = insert_block_group_item(trans, block_group);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
if (!block_group->chunk_item_inserted) {
|
||||
if (!test_bit(BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
|
||||
&block_group->runtime_flags)) {
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
ret = btrfs_chunk_alloc_add_chunk_item(trans, block_group);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
@ -2494,7 +2479,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
set_free_space_tree_thresholds(cache);
|
||||
cache->used = bytes_used;
|
||||
cache->flags = type;
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
|
||||
|
||||
@ -2519,14 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
|
||||
btrfs_free_excluded_extents(cache);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(cache)) {
|
||||
u64 new_bytes_used = size - bytes_used;
|
||||
|
||||
bytes_used += new_bytes_used >> 1;
|
||||
fragment_free_space(cache);
|
||||
}
|
||||
#endif
|
||||
/*
|
||||
* Ensure the corresponding space_info object is created and
|
||||
* assigned to our block group. We want our bg to be added to the rbtree
|
||||
@ -2547,12 +2523,17 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
* the rbtree, update the space info's counters.
|
||||
*/
|
||||
trace_btrfs_add_block_group(fs_info, cache, 1);
|
||||
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
|
||||
cache->bytes_super, cache->zone_unusable,
|
||||
cache->zone_is_active, &cache->space_info);
|
||||
btrfs_add_bg_to_space_info(fs_info, cache);
|
||||
btrfs_update_global_block_rsv(fs_info);
|
||||
|
||||
link_block_group(cache);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (btrfs_should_fragment_free_space(cache)) {
|
||||
u64 new_bytes_used = size - bytes_used;
|
||||
|
||||
cache->space_info->bytes_used += new_bytes_used >> 1;
|
||||
fragment_free_space(cache);
|
||||
}
|
||||
#endif
|
||||
|
||||
list_add_tail(&cache->bg_list, &trans->new_bgs);
|
||||
trans->delayed_ref_updates++;
|
||||
@ -2869,7 +2850,7 @@ again:
|
||||
cache_size *= fs_info->sectorsize;
|
||||
|
||||
ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
|
||||
cache_size);
|
||||
cache_size, false);
|
||||
if (ret)
|
||||
goto out_put;
|
||||
|
||||
@ -3965,35 +3946,24 @@ void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
u64 last = 0;
|
||||
|
||||
while (1) {
|
||||
struct inode *inode;
|
||||
block_group = btrfs_lookup_first_block_group(info, 0);
|
||||
while (block_group) {
|
||||
btrfs_wait_block_group_cache_done(block_group);
|
||||
spin_lock(&block_group->lock);
|
||||
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF,
|
||||
&block_group->runtime_flags)) {
|
||||
struct inode *inode = block_group->inode;
|
||||
|
||||
block_group = btrfs_lookup_first_block_group(info, last);
|
||||
while (block_group) {
|
||||
btrfs_wait_block_group_cache_done(block_group);
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->iref)
|
||||
break;
|
||||
block_group->inode = NULL;
|
||||
spin_unlock(&block_group->lock);
|
||||
block_group = btrfs_next_block_group(block_group);
|
||||
}
|
||||
if (!block_group) {
|
||||
if (last == 0)
|
||||
break;
|
||||
last = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
inode = block_group->inode;
|
||||
block_group->iref = 0;
|
||||
block_group->inode = NULL;
|
||||
spin_unlock(&block_group->lock);
|
||||
ASSERT(block_group->io_ctl.inode == NULL);
|
||||
iput(inode);
|
||||
last = block_group->start + block_group->length;
|
||||
btrfs_put_block_group(block_group);
|
||||
ASSERT(block_group->io_ctl.inode == NULL);
|
||||
iput(inode);
|
||||
} else {
|
||||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
block_group = btrfs_next_block_group(block_group);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4129,7 +4099,7 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
cleanup = (atomic_dec_and_test(&block_group->frozen) &&
|
||||
block_group->removed);
|
||||
test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags));
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
if (cleanup) {
|
||||
@ -4150,7 +4120,7 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
|
||||
* tasks trimming this block group have left 1 entry each one.
|
||||
* Free them if any.
|
||||
*/
|
||||
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(block_group);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,19 +46,44 @@ enum btrfs_chunk_alloc_enum {
|
||||
CHUNK_ALLOC_FORCE_FOR_EXTENT,
|
||||
};
|
||||
|
||||
/* Block group flags set at runtime */
|
||||
enum btrfs_block_group_flags {
|
||||
BLOCK_GROUP_FLAG_IREF,
|
||||
BLOCK_GROUP_FLAG_REMOVED,
|
||||
BLOCK_GROUP_FLAG_TO_COPY,
|
||||
BLOCK_GROUP_FLAG_RELOCATING_REPAIR,
|
||||
BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
|
||||
BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
|
||||
};
|
||||
|
||||
enum btrfs_caching_type {
|
||||
BTRFS_CACHE_NO,
|
||||
BTRFS_CACHE_STARTED,
|
||||
BTRFS_CACHE_FINISHED,
|
||||
BTRFS_CACHE_ERROR,
|
||||
};
|
||||
|
||||
struct btrfs_caching_control {
|
||||
struct list_head list;
|
||||
struct mutex mutex;
|
||||
wait_queue_head_t wait;
|
||||
struct btrfs_work work;
|
||||
struct btrfs_block_group *block_group;
|
||||
u64 progress;
|
||||
refcount_t count;
|
||||
};
|
||||
|
||||
/* Once caching_thread() finds this much free space, it will wake up waiters. */
|
||||
#define CACHING_CTL_WAKE_UP SZ_2M
|
||||
|
||||
/*
|
||||
* Tree to record all locked full stripes of a RAID5/6 block group
|
||||
*/
|
||||
struct btrfs_full_stripe_locks_tree {
|
||||
struct rb_root root;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
struct btrfs_block_group {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct inode *inode;
|
||||
@ -95,23 +120,15 @@ struct btrfs_block_group {
|
||||
|
||||
/* For raid56, this is a full stripe, without parity */
|
||||
unsigned long full_stripe_len;
|
||||
unsigned long runtime_flags;
|
||||
|
||||
unsigned int ro;
|
||||
unsigned int iref:1;
|
||||
unsigned int has_caching_ctl:1;
|
||||
unsigned int removed:1;
|
||||
unsigned int to_copy:1;
|
||||
unsigned int relocating_repair:1;
|
||||
unsigned int chunk_item_inserted:1;
|
||||
unsigned int zone_is_active:1;
|
||||
unsigned int zoned_data_reloc_ongoing:1;
|
||||
|
||||
int disk_cache_state;
|
||||
|
||||
/* Cache tracking stuff */
|
||||
int cached;
|
||||
struct btrfs_caching_control *caching_ctl;
|
||||
u64 last_byte_to_unpin;
|
||||
|
||||
struct btrfs_space_info *space_info;
|
||||
|
||||
@ -305,8 +322,6 @@ void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
|
||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
|
||||
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
|
||||
int btrfs_free_block_groups(struct btrfs_fs_info *info);
|
||||
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
|
||||
struct btrfs_caching_control *caching_ctl);
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
struct block_device *bdev, u64 physical, u64 **logical,
|
||||
int *naddrs, int *stripe_len);
|
||||
|
@ -286,7 +286,7 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
if (block_rsv == delayed_rsv)
|
||||
target = global_rsv;
|
||||
else if (block_rsv != global_rsv && !delayed_rsv->full)
|
||||
else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv))
|
||||
target = delayed_rsv;
|
||||
|
||||
if (target && block_rsv->space_info != target->space_info)
|
||||
@ -424,6 +424,7 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
|
||||
case BTRFS_CSUM_TREE_OBJECTID:
|
||||
case BTRFS_EXTENT_TREE_OBJECTID:
|
||||
case BTRFS_FREE_SPACE_TREE_OBJECTID:
|
||||
case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
|
||||
root->block_rsv = &fs_info->delayed_refs_rsv;
|
||||
break;
|
||||
case BTRFS_ROOT_TREE_OBJECTID:
|
||||
|
@ -92,4 +92,13 @@ static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
|
||||
btrfs_block_rsv_release(fs_info, block_rsv, 0, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Fast path to check if the reserve is full, may be carefully used outside of
|
||||
* locks.
|
||||
*/
|
||||
static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv)
|
||||
{
|
||||
return data_race(rsv->full);
|
||||
}
|
||||
|
||||
#endif /* BTRFS_BLOCK_RSV_H */
|
||||
|
@ -65,6 +65,8 @@ enum {
|
||||
* on the same file.
|
||||
*/
|
||||
BTRFS_INODE_VERITY_IN_PROGRESS,
|
||||
/* Set when this inode is a free space inode. */
|
||||
BTRFS_INODE_FREE_SPACE_INODE,
|
||||
};
|
||||
|
||||
/* in memory btrfs inode */
|
||||
@ -94,7 +96,8 @@ struct btrfs_inode {
|
||||
/* special utility tree used to record which mirrors have already been
|
||||
* tried when checksums fail for a given block
|
||||
*/
|
||||
struct extent_io_tree io_failure_tree;
|
||||
struct rb_root io_failure_tree;
|
||||
spinlock_t io_failure_lock;
|
||||
|
||||
/*
|
||||
* Keep track of where the inode has extent items mapped in order to
|
||||
@ -250,11 +253,6 @@ struct btrfs_inode {
|
||||
struct inode vfs_inode;
|
||||
};
|
||||
|
||||
static inline u32 btrfs_inode_sectorsize(const struct btrfs_inode *inode)
|
||||
{
|
||||
return inode->root->fs_info->sectorsize;
|
||||
}
|
||||
|
||||
static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
|
||||
{
|
||||
return container_of(inode, struct btrfs_inode, vfs_inode);
|
||||
@ -272,13 +270,6 @@ static inline unsigned long btrfs_inode_hash(u64 objectid,
|
||||
return (unsigned long)h;
|
||||
}
|
||||
|
||||
static inline void btrfs_insert_inode_hash(struct inode *inode)
|
||||
{
|
||||
unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
|
||||
|
||||
__insert_inode_hash(inode, h);
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
|
||||
/*
|
||||
@ -312,13 +303,7 @@ static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
|
||||
|
||||
static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
|
||||
if (root == root->fs_info->tree_root &&
|
||||
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
return test_bit(BTRFS_INODE_FREE_SPACE_INODE, &inode->runtime_flags);
|
||||
}
|
||||
|
||||
static inline bool is_data_inode(struct inode *inode)
|
||||
|
@ -152,9 +152,7 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
|
||||
}
|
||||
|
||||
/* Do io completion on the original bio */
|
||||
if (cb->status != BLK_STS_OK)
|
||||
cb->orig_bio->bi_status = cb->status;
|
||||
bio_endio(cb->orig_bio);
|
||||
btrfs_bio_end_io(btrfs_bio(cb->orig_bio), cb->status);
|
||||
|
||||
/* Finally free the cb struct */
|
||||
kfree(cb->compressed_pages);
|
||||
@ -166,16 +164,15 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
|
||||
* before decompressing it into the original bio and freeing the uncompressed
|
||||
* pages.
|
||||
*/
|
||||
static void end_compressed_bio_read(struct bio *bio)
|
||||
static void end_compressed_bio_read(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct compressed_bio *cb = bbio->private;
|
||||
struct inode *inode = cb->inode;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_inode *bi = BTRFS_I(inode);
|
||||
bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
|
||||
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
|
||||
blk_status_t status = bio->bi_status;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
blk_status_t status = bbio->bio.bi_status;
|
||||
struct bvec_iter iter;
|
||||
struct bio_vec bv;
|
||||
u32 offset;
|
||||
@ -186,9 +183,8 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
if (!status &&
|
||||
(!csum || !btrfs_check_data_csum(inode, bbio, offset,
|
||||
bv.bv_page, bv.bv_offset))) {
|
||||
clean_io_failure(fs_info, &bi->io_failure_tree,
|
||||
&bi->io_tree, start, bv.bv_page,
|
||||
btrfs_ino(bi), bv.bv_offset);
|
||||
btrfs_clean_io_failure(bi, start, bv.bv_page,
|
||||
bv.bv_offset);
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
@ -209,7 +205,7 @@ static void end_compressed_bio_read(struct bio *bio)
|
||||
if (refcount_dec_and_test(&cb->pending_ios))
|
||||
finish_compressed_bio_read(cb);
|
||||
btrfs_bio_free_csum(bbio);
|
||||
bio_put(bio);
|
||||
bio_put(&bbio->bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -301,20 +297,20 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
|
||||
* This also calls the writeback end hooks for the file pages so that metadata
|
||||
* and checksums can be updated in the file.
|
||||
*/
|
||||
static void end_compressed_bio_write(struct bio *bio)
|
||||
static void end_compressed_bio_write(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = bio->bi_private;
|
||||
struct compressed_bio *cb = bbio->private;
|
||||
|
||||
if (bio->bi_status)
|
||||
cb->status = bio->bi_status;
|
||||
if (bbio->bio.bi_status)
|
||||
cb->status = bbio->bio.bi_status;
|
||||
|
||||
if (refcount_dec_and_test(&cb->pending_ios)) {
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
|
||||
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
|
||||
btrfs_record_physical_zoned(cb->inode, cb->start, &bbio->bio);
|
||||
queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
|
||||
}
|
||||
bio_put(bio);
|
||||
bio_put(&bbio->bio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -335,7 +331,8 @@ static void end_compressed_bio_write(struct bio *bio)
|
||||
|
||||
|
||||
static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
|
||||
blk_opf_t opf, bio_end_io_t endio_func,
|
||||
blk_opf_t opf,
|
||||
btrfs_bio_end_io_t endio_func,
|
||||
u64 *next_stripe_start)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
|
||||
@ -344,12 +341,8 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS);
|
||||
|
||||
bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, endio_func, cb);
|
||||
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||
bio->bi_opf = opf;
|
||||
bio->bi_private = cb;
|
||||
bio->bi_end_io = endio_func;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
@ -478,8 +471,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
|
||||
if (!skip_sum) {
|
||||
ret = btrfs_csum_one_bio(inode, bio, start, true);
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
btrfs_bio_end_io(btrfs_bio(bio), ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -596,7 +588,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
}
|
||||
|
||||
page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
|
||||
lock_extent(tree, cur, page_end);
|
||||
lock_extent(tree, cur, page_end, NULL);
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
|
||||
read_unlock(&em_tree->lock);
|
||||
@ -610,7 +602,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
|
||||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
|
||||
free_extent_map(em);
|
||||
unlock_extent(tree, cur, page_end);
|
||||
unlock_extent(tree, cur, page_end, NULL);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
@ -630,7 +622,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
add_size = min(em->start + em->len, page_end + 1) - cur;
|
||||
ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur));
|
||||
if (ret != add_size) {
|
||||
unlock_extent(tree, cur, page_end);
|
||||
unlock_extent(tree, cur, page_end, NULL);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
@ -799,8 +791,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
|
||||
if (ret) {
|
||||
comp_bio->bi_status = ret;
|
||||
bio_endio(comp_bio);
|
||||
btrfs_bio_end_io(btrfs_bio(comp_bio), ret);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -826,8 +817,7 @@ fail:
|
||||
kfree(cb);
|
||||
out:
|
||||
free_extent_map(em);
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
btrfs_bio_end_io(btrfs_bio(bio), ret);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1447,6 +1447,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (p->nowait) {
|
||||
free_extent_buffer(tmp);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (unlock_up)
|
||||
btrfs_unlock_up_safe(p, level + 1);
|
||||
|
||||
@ -1467,6 +1472,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
ret = -EAGAIN;
|
||||
|
||||
goto out;
|
||||
} else if (p->nowait) {
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (unlock_up) {
|
||||
@ -1634,7 +1641,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
|
||||
* We don't know the level of the root node until we actually
|
||||
* have it read locked
|
||||
*/
|
||||
b = btrfs_read_lock_root_node(root);
|
||||
if (p->nowait) {
|
||||
b = btrfs_try_read_lock_root_node(root);
|
||||
if (IS_ERR(b))
|
||||
return b;
|
||||
} else {
|
||||
b = btrfs_read_lock_root_node(root);
|
||||
}
|
||||
level = btrfs_header_level(b);
|
||||
if (level > write_lock_level)
|
||||
goto out;
|
||||
@ -1910,6 +1923,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
WARN_ON(p->nodes[0] != NULL);
|
||||
BUG_ON(!cow && ins_len);
|
||||
|
||||
/*
|
||||
* For now only allow nowait for read only operations. There's no
|
||||
* strict reason why we can't, we just only need it for reads so it's
|
||||
* only implemented for reads.
|
||||
*/
|
||||
ASSERT(!p->nowait || !cow);
|
||||
|
||||
if (ins_len < 0) {
|
||||
lowest_unlock = 2;
|
||||
|
||||
@ -1936,7 +1956,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
|
||||
if (p->need_commit_sem) {
|
||||
ASSERT(p->search_commit_root);
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
if (p->nowait) {
|
||||
if (!down_read_trylock(&fs_info->commit_root_sem))
|
||||
return -EAGAIN;
|
||||
} else {
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
}
|
||||
}
|
||||
|
||||
again:
|
||||
@ -2082,7 +2107,15 @@ cow_done:
|
||||
btrfs_tree_lock(b);
|
||||
p->locks[level] = BTRFS_WRITE_LOCK;
|
||||
} else {
|
||||
btrfs_tree_read_lock(b);
|
||||
if (p->nowait) {
|
||||
if (!btrfs_try_tree_read_lock(b)) {
|
||||
free_extent_buffer(b);
|
||||
ret = -EAGAIN;
|
||||
goto done;
|
||||
}
|
||||
} else {
|
||||
btrfs_tree_read_lock(b);
|
||||
}
|
||||
p->locks[level] = BTRFS_READ_LOCK;
|
||||
}
|
||||
p->nodes[level] = b;
|
||||
@ -2131,6 +2164,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
|
||||
|
||||
lowest_level = p->lowest_level;
|
||||
WARN_ON(p->nodes[0] != NULL);
|
||||
ASSERT(!p->nowait);
|
||||
|
||||
if (p->search_commit_root) {
|
||||
BUG_ON(time_seq);
|
||||
@ -4432,6 +4466,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
|
||||
int ret = 1;
|
||||
int keep_locks = path->keep_locks;
|
||||
|
||||
ASSERT(!path->nowait);
|
||||
path->keep_locks = 1;
|
||||
again:
|
||||
cur = btrfs_read_lock_root_node(root);
|
||||
@ -4612,6 +4647,8 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ASSERT(!path->nowait);
|
||||
|
||||
nritems = btrfs_header_nritems(path->nodes[0]);
|
||||
if (nritems == 0)
|
||||
return 1;
|
||||
|
370
fs/btrfs/ctree.h
370
fs/btrfs/ctree.h
@ -42,7 +42,6 @@ struct btrfs_delayed_ref_root;
|
||||
struct btrfs_space_info;
|
||||
struct btrfs_block_group;
|
||||
extern struct kmem_cache *btrfs_trans_handle_cachep;
|
||||
extern struct kmem_cache *btrfs_bit_radix_cachep;
|
||||
extern struct kmem_cache *btrfs_path_cachep;
|
||||
extern struct kmem_cache *btrfs_free_space_cachep;
|
||||
extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
|
||||
@ -50,6 +49,11 @@ struct btrfs_ordered_sum;
|
||||
struct btrfs_ref;
|
||||
struct btrfs_bio;
|
||||
struct btrfs_ioctl_encoded_io_args;
|
||||
struct btrfs_device;
|
||||
struct btrfs_fs_devices;
|
||||
struct btrfs_balance_control;
|
||||
struct btrfs_delayed_root;
|
||||
struct reloc_control;
|
||||
|
||||
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
|
||||
|
||||
@ -280,14 +284,9 @@ struct btrfs_super_block {
|
||||
/* the UUID written into btree blocks */
|
||||
u8 metadata_uuid[BTRFS_FSID_SIZE];
|
||||
|
||||
/* Extent tree v2 */
|
||||
__le64 block_group_root;
|
||||
__le64 block_group_root_generation;
|
||||
u8 block_group_root_level;
|
||||
|
||||
/* future expansion */
|
||||
u8 reserved8[7];
|
||||
__le64 reserved[25];
|
||||
u8 reserved8[8];
|
||||
__le64 reserved[27];
|
||||
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
|
||||
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
|
||||
|
||||
@ -307,7 +306,8 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
|
||||
(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
|
||||
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
|
||||
BTRFS_FEATURE_COMPAT_RO_VERITY)
|
||||
BTRFS_FEATURE_COMPAT_RO_VERITY | \
|
||||
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE)
|
||||
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
|
||||
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
|
||||
@ -443,9 +443,10 @@ struct btrfs_path {
|
||||
* header (ie. sizeof(struct btrfs_item) is not included).
|
||||
*/
|
||||
unsigned int search_for_extension:1;
|
||||
/* Stop search if any locks need to be taken (for read) */
|
||||
unsigned int nowait:1;
|
||||
};
|
||||
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
|
||||
sizeof(struct btrfs_item))
|
||||
|
||||
struct btrfs_dev_replace {
|
||||
u64 replace_state; /* see #define above */
|
||||
time64_t time_started; /* seconds since 1-Jan-1970 */
|
||||
@ -502,21 +503,6 @@ struct btrfs_free_cluster {
|
||||
struct list_head block_group_list;
|
||||
};
|
||||
|
||||
enum btrfs_caching_type {
|
||||
BTRFS_CACHE_NO,
|
||||
BTRFS_CACHE_STARTED,
|
||||
BTRFS_CACHE_FINISHED,
|
||||
BTRFS_CACHE_ERROR,
|
||||
};
|
||||
|
||||
/*
|
||||
* Tree to record all locked full stripes of a RAID5/6 block group
|
||||
*/
|
||||
struct btrfs_full_stripe_locks_tree {
|
||||
struct rb_root root;
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
/* Discard control. */
|
||||
/*
|
||||
* Async discard uses multiple lists to differentiate the discard filter
|
||||
@ -548,42 +534,6 @@ struct btrfs_discard_ctl {
|
||||
atomic64_t discard_bytes_saved;
|
||||
};
|
||||
|
||||
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/* fs_info */
|
||||
struct reloc_control;
|
||||
struct btrfs_device;
|
||||
struct btrfs_fs_devices;
|
||||
struct btrfs_balance_control;
|
||||
struct btrfs_delayed_root;
|
||||
|
||||
/*
|
||||
* Block group or device which contains an active swapfile. Used for preventing
|
||||
* unsafe operations while a swapfile is active.
|
||||
*
|
||||
* These are sorted on (ptr, inode) (note that a block group or device can
|
||||
* contain more than one swapfile). We compare the pointer values because we
|
||||
* don't actually care what the object is, we just need a quick check whether
|
||||
* the object exists in the rbtree.
|
||||
*/
|
||||
struct btrfs_swapfile_pin {
|
||||
struct rb_node node;
|
||||
void *ptr;
|
||||
struct inode *inode;
|
||||
/*
|
||||
* If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
|
||||
* points to a struct btrfs_device.
|
||||
*/
|
||||
bool is_block_group;
|
||||
/*
|
||||
* Only used when 'is_block_group' is true and it is the number of
|
||||
* extents used by a swapfile for this block group ('ptr' field).
|
||||
*/
|
||||
int bg_extent_count;
|
||||
};
|
||||
|
||||
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
|
||||
|
||||
enum {
|
||||
BTRFS_FS_CLOSING_START,
|
||||
BTRFS_FS_CLOSING_DONE,
|
||||
@ -890,6 +840,7 @@ struct btrfs_fs_info {
|
||||
|
||||
struct kobject *space_info_kobj;
|
||||
struct kobject *qgroups_kobj;
|
||||
struct kobject *discard_kobj;
|
||||
|
||||
/* used to keep from writing metadata until there is a nice batch */
|
||||
struct percpu_counter dirty_metadata_bytes;
|
||||
@ -1005,6 +956,7 @@ struct btrfs_fs_info {
|
||||
struct completion qgroup_rescan_completion;
|
||||
struct btrfs_work qgroup_rescan_work;
|
||||
bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
|
||||
u8 qgroup_drop_subtree_thres;
|
||||
|
||||
/* filesystem state */
|
||||
unsigned long fs_state;
|
||||
@ -1092,6 +1044,23 @@ struct btrfs_fs_info {
|
||||
/* Updates are not protected by any lock */
|
||||
struct btrfs_commit_stats commit_stats;
|
||||
|
||||
/*
|
||||
* Last generation where we dropped a non-relocation root.
|
||||
* Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
|
||||
* to change it and to read it, respectively.
|
||||
*/
|
||||
u64 last_root_drop_gen;
|
||||
|
||||
/*
|
||||
* Annotations for transaction events (structures are empty when
|
||||
* compiled without lockdep).
|
||||
*/
|
||||
struct lockdep_map btrfs_trans_num_writers_map;
|
||||
struct lockdep_map btrfs_trans_num_extwriters_map;
|
||||
struct lockdep_map btrfs_state_change_map[4];
|
||||
struct lockdep_map btrfs_trans_pending_ordered_map;
|
||||
struct lockdep_map btrfs_ordered_extent_map;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
|
||||
spinlock_t ref_verify_lock;
|
||||
struct rb_root block_tree;
|
||||
@ -1099,7 +1068,6 @@ struct btrfs_fs_info {
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
struct kobject *debug_kobj;
|
||||
struct kobject *discard_debug_kobj;
|
||||
struct list_head allocated_roots;
|
||||
|
||||
spinlock_t eb_leak_lock;
|
||||
@ -1107,11 +1075,84 @@ struct btrfs_fs_info {
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info,
|
||||
u64 gen)
|
||||
{
|
||||
WRITE_ONCE(fs_info->last_root_drop_gen, gen);
|
||||
}
|
||||
|
||||
static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return READ_ONCE(fs_info->last_root_drop_gen);
|
||||
}
|
||||
|
||||
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
|
||||
{
|
||||
return sb->s_fs_info;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the number of bytes to be checksummed and figure out how many leaves
|
||||
* it would require to store the csums for that many bytes.
|
||||
*/
|
||||
static inline u64 btrfs_csum_bytes_to_leaves(
|
||||
const struct btrfs_fs_info *fs_info, u64 csum_bytes)
|
||||
{
|
||||
const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
|
||||
|
||||
return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use this if we would be adding new items, as we could split nodes as we cow
|
||||
* down the tree.
|
||||
*/
|
||||
static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned num_items)
|
||||
{
|
||||
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
|
||||
}
|
||||
|
||||
/*
|
||||
* Doing a truncate or a modification won't result in new nodes or leaves, just
|
||||
* what we need for COW.
|
||||
*/
|
||||
static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned num_items)
|
||||
{
|
||||
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
|
||||
}
|
||||
|
||||
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
|
||||
sizeof(struct btrfs_item))
|
||||
|
||||
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return fs_info->zone_size > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Count how many fs_info->max_extent_size cover the @size
|
||||
*/
|
||||
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
if (!fs_info)
|
||||
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
|
||||
#endif
|
||||
|
||||
return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
|
||||
}
|
||||
|
||||
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type);
|
||||
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type);
|
||||
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation op);
|
||||
|
||||
/*
|
||||
* The state of btrfs root
|
||||
*/
|
||||
@ -1174,6 +1215,82 @@ enum {
|
||||
BTRFS_ROOT_RESET_LOCKDEP_CLASS,
|
||||
};
|
||||
|
||||
enum btrfs_lockdep_trans_states {
|
||||
BTRFS_LOCKDEP_TRANS_COMMIT_START,
|
||||
BTRFS_LOCKDEP_TRANS_UNBLOCKED,
|
||||
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
|
||||
BTRFS_LOCKDEP_TRANS_COMPLETED,
|
||||
};
|
||||
|
||||
/*
|
||||
* Lockdep annotation for wait events.
|
||||
*
|
||||
* @owner: The struct where the lockdep map is defined
|
||||
* @lock: The lockdep map corresponding to a wait event
|
||||
*
|
||||
* This macro is used to annotate a wait event. In this case a thread acquires
|
||||
* the lockdep map as writer (exclusive lock) because it has to block until all
|
||||
* the threads that hold the lock as readers signal the condition for the wait
|
||||
* event and release their locks.
|
||||
*/
|
||||
#define btrfs_might_wait_for_event(owner, lock) \
|
||||
do { \
|
||||
rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_); \
|
||||
rwsem_release(&owner->lock##_map, _THIS_IP_); \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* Protection for the resource/condition of a wait event.
|
||||
*
|
||||
* @owner: The struct where the lockdep map is defined
|
||||
* @lock: The lockdep map corresponding to a wait event
|
||||
*
|
||||
* Many threads can modify the condition for the wait event at the same time
|
||||
* and signal the threads that block on the wait event. The threads that modify
|
||||
* the condition and do the signaling acquire the lock as readers (shared
|
||||
* lock).
|
||||
*/
|
||||
#define btrfs_lockdep_acquire(owner, lock) \
|
||||
rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_)
|
||||
|
||||
/*
|
||||
* Used after signaling the condition for a wait event to release the lockdep
|
||||
* map held by a reader thread.
|
||||
*/
|
||||
#define btrfs_lockdep_release(owner, lock) \
|
||||
rwsem_release(&owner->lock##_map, _THIS_IP_)
|
||||
|
||||
/*
|
||||
* Macros for the transaction states wait events, similar to the generic wait
|
||||
* event macros.
|
||||
*/
|
||||
#define btrfs_might_wait_for_state(owner, i) \
|
||||
do { \
|
||||
rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \
|
||||
rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_); \
|
||||
} while (0)
|
||||
|
||||
#define btrfs_trans_state_lockdep_acquire(owner, i) \
|
||||
rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_)
|
||||
|
||||
#define btrfs_trans_state_lockdep_release(owner, i) \
|
||||
rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_)
|
||||
|
||||
/* Initialization of the lockdep map */
|
||||
#define btrfs_lockdep_init_map(owner, lock) \
|
||||
do { \
|
||||
static struct lock_class_key lock##_key; \
|
||||
lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0); \
|
||||
} while (0)
|
||||
|
||||
/* Initialization of the transaction states lockdep maps. */
|
||||
#define btrfs_state_lockdep_init_map(owner, lock, state) \
|
||||
do { \
|
||||
static struct lock_class_key lock##_key; \
|
||||
lockdep_init_map(&owner->btrfs_state_change_map[state], #lock, \
|
||||
&lock##_key, 0); \
|
||||
} while (0)
|
||||
|
||||
static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
|
||||
@ -2391,17 +2508,6 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
|
||||
num_devices, 64);
|
||||
|
||||
/*
|
||||
* For extent tree v2 we overload the extent root with the block group root, as
|
||||
* we will have multiple extent roots.
|
||||
*/
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
|
||||
extent_root, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
|
||||
extent_root_gen, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
|
||||
struct btrfs_root_backup, extent_root_level, 8);
|
||||
|
||||
/* struct btrfs_balance_item */
|
||||
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
|
||||
|
||||
@ -2534,13 +2640,6 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
|
||||
BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
|
||||
uuid_tree_generation, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
|
||||
block_group_root, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
|
||||
struct btrfs_super_block,
|
||||
block_group_root_generation, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
|
||||
block_group_root_level, 8);
|
||||
|
||||
int btrfs_super_csum_size(const struct btrfs_super_block *s);
|
||||
const char *btrfs_super_csum_name(u16 csum_type);
|
||||
@ -2761,45 +2860,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
|
||||
enum btrfs_inline_ref_type is_data);
|
||||
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
|
||||
|
||||
static inline u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums,
|
||||
u64 offset)
|
||||
{
|
||||
u64 offset_in_sectors = offset >> fs_info->sectorsize_bits;
|
||||
|
||||
return csums + offset_in_sectors * fs_info->csum_size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Take the number of bytes to be checksummed and figure out how many leaves
|
||||
* it would require to store the csums for that many bytes.
|
||||
*/
|
||||
static inline u64 btrfs_csum_bytes_to_leaves(
|
||||
const struct btrfs_fs_info *fs_info, u64 csum_bytes)
|
||||
{
|
||||
const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
|
||||
|
||||
return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use this if we would be adding new items, as we could split nodes as we cow
|
||||
* down the tree.
|
||||
*/
|
||||
static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned num_items)
|
||||
{
|
||||
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
|
||||
}
|
||||
|
||||
/*
|
||||
* Doing a truncate or a modification won't result in new nodes or leaves, just
|
||||
* what we need for COW.
|
||||
*/
|
||||
static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned num_items)
|
||||
{
|
||||
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
|
||||
}
|
||||
|
||||
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 num_bytes);
|
||||
@ -3257,12 +3317,9 @@ int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
|
||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 bytenr, u64 len);
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 objectid, u64 pos,
|
||||
u64 disk_offset, u64 disk_num_bytes,
|
||||
u64 num_bytes, u64 offset, u64 ram_bytes,
|
||||
u8 compression, u8 encryption, u16 other_encoding);
|
||||
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 objectid, u64 pos,
|
||||
u64 num_bytes);
|
||||
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 objectid,
|
||||
@ -3273,7 +3330,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
|
||||
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
|
||||
u64 offset, bool one_ordered);
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct list_head *list, int search_commit);
|
||||
struct list_head *list, int search_commit,
|
||||
bool nowait);
|
||||
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
const struct btrfs_path *path,
|
||||
struct btrfs_file_extent_item *fi,
|
||||
@ -3299,11 +3357,9 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
|
||||
u64 start, u64 end);
|
||||
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
|
||||
u32 bio_offset, struct page *page, u32 pgoff);
|
||||
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
|
||||
u64 start, u64 len);
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
u64 *orig_start, u64 *orig_block_len,
|
||||
u64 *ram_bytes, bool strict);
|
||||
u64 *ram_bytes, bool nowait, bool strict);
|
||||
|
||||
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
|
||||
struct btrfs_inode *inode);
|
||||
@ -3358,7 +3414,6 @@ void btrfs_split_delalloc_extent(struct inode *inode,
|
||||
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
|
||||
void btrfs_evict_inode(struct inode *inode);
|
||||
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
|
||||
struct inode *btrfs_alloc_inode(struct super_block *sb);
|
||||
void btrfs_destroy_inode(struct inode *inode);
|
||||
void btrfs_free_inode(struct inode *inode);
|
||||
@ -3439,15 +3494,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
|
||||
struct btrfs_ioctl_space_info *space);
|
||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type);
|
||||
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation type);
|
||||
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_exclusive_operation op);
|
||||
|
||||
|
||||
/* file.c */
|
||||
int __init btrfs_auto_defrag_init(void);
|
||||
@ -3457,8 +3503,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
|
||||
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
|
||||
void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
int skip_pinned);
|
||||
extern const struct file_operations btrfs_file_operations;
|
||||
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct btrfs_inode *inode,
|
||||
@ -3478,8 +3522,10 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
struct extent_state **cached, bool noreserve);
|
||||
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
|
||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes);
|
||||
size_t *write_bytes, bool nowait);
|
||||
void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
|
||||
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
u64 *delalloc_start_ret, u64 *delalloc_end_ret);
|
||||
|
||||
/* tree-defrag.c */
|
||||
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
|
||||
@ -3745,7 +3791,7 @@ const char * __attribute_const__ btrfs_decode_error(int errno);
|
||||
__cold
|
||||
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
const char *function,
|
||||
unsigned int line, int errno);
|
||||
unsigned int line, int errno, bool first_hit);
|
||||
|
||||
/*
|
||||
* Call btrfs_abort_transaction as early as possible when an error condition is
|
||||
@ -3753,9 +3799,11 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
#define btrfs_abort_transaction(trans, errno) \
|
||||
do { \
|
||||
bool first = false; \
|
||||
/* Report first abort since mount */ \
|
||||
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
|
||||
&((trans)->fs_info->fs_state))) { \
|
||||
first = true; \
|
||||
if ((errno) != -EIO && (errno) != -EROFS) { \
|
||||
WARN(1, KERN_DEBUG \
|
||||
"BTRFS: Transaction aborted (error %d)\n", \
|
||||
@ -3767,7 +3815,7 @@ do { \
|
||||
} \
|
||||
} \
|
||||
__btrfs_abort_transaction((trans), __func__, \
|
||||
__LINE__, (errno)); \
|
||||
__LINE__, (errno), first); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_PRINTK_INDEX
|
||||
@ -3984,16 +4032,9 @@ int btrfs_scrub_cancel(struct btrfs_fs_info *info);
|
||||
int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
|
||||
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
|
||||
struct btrfs_scrub_progress *progress);
|
||||
static inline void btrfs_init_full_stripe_locks_tree(
|
||||
struct btrfs_full_stripe_locks_tree *locks_root)
|
||||
{
|
||||
locks_root->root = RB_ROOT;
|
||||
mutex_init(&locks_root->lock);
|
||||
}
|
||||
|
||||
/* dev-replace.c */
|
||||
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount);
|
||||
|
||||
static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
|
||||
@ -4020,6 +4061,7 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
|
||||
|
||||
extern const struct fsverity_operations btrfs_verityops;
|
||||
int btrfs_drop_verity_items(struct btrfs_inode *inode);
|
||||
int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size);
|
||||
|
||||
BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item,
|
||||
encryption, 8);
|
||||
@ -4037,6 +4079,12 @@ static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
|
||||
size_t buf_size)
|
||||
{
|
||||
return -EPERM;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Sanity test specific functions */
|
||||
@ -4053,24 +4101,6 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return fs_info->zone_size > 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Count how many fs_info->max_extent_size cover the @size
|
||||
*/
|
||||
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
if (!fs_info)
|
||||
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
|
||||
#endif
|
||||
|
||||
return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
|
||||
}
|
||||
|
||||
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
|
||||
{
|
||||
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
|
||||
|
@ -127,9 +127,11 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
|
||||
}
|
||||
|
||||
int btrfs_check_data_free_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len)
|
||||
struct extent_changeset **reserved, u64 start,
|
||||
u64 len, bool noflush)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA;
|
||||
int ret;
|
||||
|
||||
/* align the range */
|
||||
@ -137,7 +139,12 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
|
||||
round_down(start, fs_info->sectorsize);
|
||||
start = round_down(start, fs_info->sectorsize);
|
||||
|
||||
ret = btrfs_alloc_data_chunk_ondemand(inode, len);
|
||||
if (noflush)
|
||||
flush = BTRFS_RESERVE_NO_FLUSH;
|
||||
else if (btrfs_is_free_space_inode(inode))
|
||||
flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE;
|
||||
|
||||
ret = btrfs_reserve_data_bytes(fs_info, len, flush);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -454,7 +461,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = btrfs_check_data_free_space(inode, reserved, start, len);
|
||||
ret = btrfs_check_data_free_space(inode, reserved, start, len, false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = btrfs_delalloc_reserve_metadata(inode, len, len, false);
|
||||
|
@ -7,7 +7,8 @@ struct extent_changeset;
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
|
||||
int btrfs_check_data_free_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len);
|
||||
struct extent_changeset **reserved, u64 start, u64 len,
|
||||
bool noflush);
|
||||
void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset *reserved, u64 start, u64 len);
|
||||
void btrfs_delalloc_release_space(struct btrfs_inode *inode,
|
||||
|
@ -302,15 +302,21 @@ static inline void btrfs_release_prepared_delayed_node(
|
||||
__btrfs_release_delayed_node(node, 1);
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
|
||||
static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
|
||||
struct btrfs_delayed_node *node,
|
||||
enum btrfs_delayed_item_type type)
|
||||
{
|
||||
struct btrfs_delayed_item *item;
|
||||
|
||||
item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
|
||||
if (item) {
|
||||
item->data_len = data_len;
|
||||
item->ins_or_del = 0;
|
||||
item->type = type;
|
||||
item->bytes_reserved = 0;
|
||||
item->delayed_node = NULL;
|
||||
item->delayed_node = node;
|
||||
RB_CLEAR_NODE(&item->rb_node);
|
||||
INIT_LIST_HEAD(&item->log_list);
|
||||
item->logged = false;
|
||||
refcount_set(&item->refs, 1);
|
||||
}
|
||||
return item;
|
||||
@ -319,72 +325,32 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
|
||||
/*
|
||||
* __btrfs_lookup_delayed_item - look up the delayed item by key
|
||||
* @delayed_node: pointer to the delayed node
|
||||
* @key: the key to look up
|
||||
* @prev: used to store the prev item if the right item isn't found
|
||||
* @next: used to store the next item if the right item isn't found
|
||||
* @index: the dir index value to lookup (offset of a dir index key)
|
||||
*
|
||||
* Note: if we don't find the right item, we will return the prev item and
|
||||
* the next item.
|
||||
*/
|
||||
static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
|
||||
struct rb_root *root,
|
||||
struct btrfs_key *key,
|
||||
struct btrfs_delayed_item **prev,
|
||||
struct btrfs_delayed_item **next)
|
||||
u64 index)
|
||||
{
|
||||
struct rb_node *node, *prev_node = NULL;
|
||||
struct rb_node *node = root->rb_node;
|
||||
struct btrfs_delayed_item *delayed_item = NULL;
|
||||
int ret = 0;
|
||||
|
||||
node = root->rb_node;
|
||||
|
||||
while (node) {
|
||||
delayed_item = rb_entry(node, struct btrfs_delayed_item,
|
||||
rb_node);
|
||||
prev_node = node;
|
||||
ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
|
||||
if (ret < 0)
|
||||
if (delayed_item->index < index)
|
||||
node = node->rb_right;
|
||||
else if (ret > 0)
|
||||
else if (delayed_item->index > index)
|
||||
node = node->rb_left;
|
||||
else
|
||||
return delayed_item;
|
||||
}
|
||||
|
||||
if (prev) {
|
||||
if (!prev_node)
|
||||
*prev = NULL;
|
||||
else if (ret < 0)
|
||||
*prev = delayed_item;
|
||||
else if ((node = rb_prev(prev_node)) != NULL) {
|
||||
*prev = rb_entry(node, struct btrfs_delayed_item,
|
||||
rb_node);
|
||||
} else
|
||||
*prev = NULL;
|
||||
}
|
||||
|
||||
if (next) {
|
||||
if (!prev_node)
|
||||
*next = NULL;
|
||||
else if (ret > 0)
|
||||
*next = delayed_item;
|
||||
else if ((node = rb_next(prev_node)) != NULL) {
|
||||
*next = rb_entry(node, struct btrfs_delayed_item,
|
||||
rb_node);
|
||||
} else
|
||||
*next = NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
|
||||
struct btrfs_delayed_node *delayed_node,
|
||||
struct btrfs_key *key)
|
||||
{
|
||||
return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
|
||||
NULL, NULL);
|
||||
}
|
||||
|
||||
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
struct btrfs_delayed_item *ins)
|
||||
{
|
||||
@ -392,15 +358,13 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
struct rb_node *parent_node = NULL;
|
||||
struct rb_root_cached *root;
|
||||
struct btrfs_delayed_item *item;
|
||||
int cmp;
|
||||
bool leftmost = true;
|
||||
|
||||
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
|
||||
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM)
|
||||
root = &delayed_node->ins_root;
|
||||
else if (ins->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
|
||||
root = &delayed_node->del_root;
|
||||
else
|
||||
BUG();
|
||||
root = &delayed_node->del_root;
|
||||
|
||||
p = &root->rb_root.rb_node;
|
||||
node = &ins->rb_node;
|
||||
|
||||
@ -409,11 +373,10 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
item = rb_entry(parent_node, struct btrfs_delayed_item,
|
||||
rb_node);
|
||||
|
||||
cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
|
||||
if (cmp < 0) {
|
||||
if (item->index < ins->index) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else if (cmp > 0) {
|
||||
} else if (item->index > ins->index) {
|
||||
p = &(*p)->rb_left;
|
||||
} else {
|
||||
return -EEXIST;
|
||||
@ -422,14 +385,10 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
|
||||
|
||||
rb_link_node(node, parent_node, p);
|
||||
rb_insert_color_cached(node, root, leftmost);
|
||||
ins->delayed_node = delayed_node;
|
||||
|
||||
/* Delayed items are always for dir index items. */
|
||||
ASSERT(ins->key.type == BTRFS_DIR_INDEX_KEY);
|
||||
|
||||
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM &&
|
||||
ins->key.offset >= delayed_node->index_cnt)
|
||||
delayed_node->index_cnt = ins->key.offset + 1;
|
||||
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM &&
|
||||
ins->index >= delayed_node->index_cnt)
|
||||
delayed_node->index_cnt = ins->index + 1;
|
||||
|
||||
delayed_node->count++;
|
||||
atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
|
||||
@ -451,21 +410,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
|
||||
struct rb_root_cached *root;
|
||||
struct btrfs_delayed_root *delayed_root;
|
||||
|
||||
/* Not associated with any delayed_node */
|
||||
if (!delayed_item->delayed_node)
|
||||
/* Not inserted, ignore it. */
|
||||
if (RB_EMPTY_NODE(&delayed_item->rb_node))
|
||||
return;
|
||||
|
||||
delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
|
||||
|
||||
BUG_ON(!delayed_root);
|
||||
BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
|
||||
delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
|
||||
|
||||
if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
|
||||
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
|
||||
root = &delayed_item->delayed_node->ins_root;
|
||||
else
|
||||
root = &delayed_item->delayed_node->del_root;
|
||||
|
||||
rb_erase_cached(&delayed_item->rb_node, root);
|
||||
RB_CLEAR_NODE(&delayed_item->rb_node);
|
||||
delayed_item->delayed_node->count--;
|
||||
|
||||
finish_one_item(delayed_root);
|
||||
@ -520,12 +479,11 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item(
|
||||
}
|
||||
|
||||
static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_delayed_item *item)
|
||||
{
|
||||
struct btrfs_block_rsv *src_rsv;
|
||||
struct btrfs_block_rsv *dst_rsv;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
u64 num_bytes;
|
||||
int ret;
|
||||
|
||||
@ -545,14 +503,14 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
|
||||
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
|
||||
if (!ret) {
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item",
|
||||
item->key.objectid,
|
||||
item->delayed_node->inode_id,
|
||||
num_bytes, 1);
|
||||
/*
|
||||
* For insertions we track reserved metadata space by accounting
|
||||
* for the number of leaves that will be used, based on the delayed
|
||||
* node's index_items_size field.
|
||||
*/
|
||||
if (item->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
|
||||
if (item->type == BTRFS_DELAYED_DELETION_ITEM)
|
||||
item->bytes_reserved = num_bytes;
|
||||
}
|
||||
|
||||
@ -574,8 +532,8 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
|
||||
* to release/reserve qgroup space.
|
||||
*/
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item",
|
||||
item->key.objectid, item->bytes_reserved,
|
||||
0);
|
||||
item->delayed_node->inode_id,
|
||||
item->bytes_reserved, 0);
|
||||
btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
|
||||
}
|
||||
|
||||
@ -688,6 +646,8 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_item *next;
|
||||
const int max_size = BTRFS_LEAF_DATA_SIZE(fs_info);
|
||||
struct btrfs_item_batch batch;
|
||||
struct btrfs_key first_key;
|
||||
const u32 first_data_size = first_item->data_len;
|
||||
int total_size;
|
||||
char *ins_data = NULL;
|
||||
int ret;
|
||||
@ -716,9 +676,9 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
ASSERT(first_item->bytes_reserved == 0);
|
||||
|
||||
list_add_tail(&first_item->tree_list, &item_list);
|
||||
batch.total_data_size = first_item->data_len;
|
||||
batch.total_data_size = first_data_size;
|
||||
batch.nr = 1;
|
||||
total_size = first_item->data_len + sizeof(struct btrfs_item);
|
||||
total_size = first_data_size + sizeof(struct btrfs_item);
|
||||
curr = first_item;
|
||||
|
||||
while (true) {
|
||||
@ -732,8 +692,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
* We cannot allow gaps in the key space if we're doing log
|
||||
* replay.
|
||||
*/
|
||||
if (continuous_keys_only &&
|
||||
(next->key.offset != curr->key.offset + 1))
|
||||
if (continuous_keys_only && (next->index != curr->index + 1))
|
||||
break;
|
||||
|
||||
ASSERT(next->bytes_reserved == 0);
|
||||
@ -750,8 +709,11 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
if (batch.nr == 1) {
|
||||
batch.keys = &first_item->key;
|
||||
batch.data_sizes = &first_item->data_len;
|
||||
first_key.objectid = node->inode_id;
|
||||
first_key.type = BTRFS_DIR_INDEX_KEY;
|
||||
first_key.offset = first_item->index;
|
||||
batch.keys = &first_key;
|
||||
batch.data_sizes = &first_data_size;
|
||||
} else {
|
||||
struct btrfs_key *ins_keys;
|
||||
u32 *ins_sizes;
|
||||
@ -768,7 +730,9 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
|
||||
batch.keys = ins_keys;
|
||||
batch.data_sizes = ins_sizes;
|
||||
list_for_each_entry(curr, &item_list, tree_list) {
|
||||
ins_keys[i] = curr->key;
|
||||
ins_keys[i].objectid = node->inode_id;
|
||||
ins_keys[i].type = BTRFS_DIR_INDEX_KEY;
|
||||
ins_keys[i].offset = curr->index;
|
||||
ins_sizes[i] = curr->data_len;
|
||||
i++;
|
||||
}
|
||||
@ -864,6 +828,7 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_delayed_item *item)
|
||||
{
|
||||
const u64 ino = item->delayed_node->inode_id;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_delayed_item *curr, *next;
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
@ -902,7 +867,9 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
|
||||
|
||||
slot++;
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
if (btrfs_comp_cpu_keys(&next->key, &key) != 0)
|
||||
if (key.objectid != ino ||
|
||||
key.type != BTRFS_DIR_INDEX_KEY ||
|
||||
key.offset != next->index)
|
||||
break;
|
||||
nitems++;
|
||||
curr = next;
|
||||
@ -920,9 +887,8 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
|
||||
* Check btrfs_delayed_item_reserve_metadata() to see why we
|
||||
* don't need to release/reserve qgroup space.
|
||||
*/
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item",
|
||||
item->key.objectid, total_reserved_size,
|
||||
0);
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_item", ino,
|
||||
total_reserved_size, 0);
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv,
|
||||
total_reserved_size, NULL);
|
||||
}
|
||||
@ -940,8 +906,12 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_delayed_node *node)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
int ret = 0;
|
||||
|
||||
key.objectid = node->inode_id;
|
||||
key.type = BTRFS_DIR_INDEX_KEY;
|
||||
|
||||
while (ret == 0) {
|
||||
struct btrfs_delayed_item *item;
|
||||
|
||||
@ -952,7 +922,8 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
ret = btrfs_search_slot(trans, root, &item->key, path, -1, 1);
|
||||
key.offset = item->index;
|
||||
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
|
||||
if (ret > 0) {
|
||||
/*
|
||||
* There's no matching item in the leaf. This means we
|
||||
@ -1457,16 +1428,15 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
if (IS_ERR(delayed_node))
|
||||
return PTR_ERR(delayed_node);
|
||||
|
||||
delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
|
||||
delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len,
|
||||
delayed_node,
|
||||
BTRFS_DELAYED_INSERTION_ITEM);
|
||||
if (!delayed_item) {
|
||||
ret = -ENOMEM;
|
||||
goto release_node;
|
||||
}
|
||||
|
||||
delayed_item->key.objectid = btrfs_ino(dir);
|
||||
delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
|
||||
delayed_item->key.offset = index;
|
||||
delayed_item->ins_or_del = BTRFS_DELAYED_INSERTION_ITEM;
|
||||
delayed_item->index = index;
|
||||
|
||||
dir_item = (struct btrfs_dir_item *)delayed_item->data;
|
||||
dir_item->location = *disk_key;
|
||||
@ -1490,8 +1460,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
if (reserve_leaf_space) {
|
||||
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root,
|
||||
delayed_item);
|
||||
ret = btrfs_delayed_item_reserve_metadata(trans, delayed_item);
|
||||
/*
|
||||
* Space was reserved for a dir index item insertion when we
|
||||
* started the transaction, so getting a failure here should be
|
||||
@ -1538,12 +1507,12 @@ release_node:
|
||||
|
||||
static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_node *node,
|
||||
struct btrfs_key *key)
|
||||
u64 index)
|
||||
{
|
||||
struct btrfs_delayed_item *item;
|
||||
|
||||
mutex_lock(&node->mutex);
|
||||
item = __btrfs_lookup_delayed_insertion_item(node, key);
|
||||
item = __btrfs_lookup_delayed_item(&node->ins_root.rb_root, index);
|
||||
if (!item) {
|
||||
mutex_unlock(&node->mutex);
|
||||
return 1;
|
||||
@ -1589,32 +1558,25 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
struct btrfs_delayed_node *node;
|
||||
struct btrfs_delayed_item *item;
|
||||
struct btrfs_key item_key;
|
||||
int ret;
|
||||
|
||||
node = btrfs_get_or_create_delayed_node(dir);
|
||||
if (IS_ERR(node))
|
||||
return PTR_ERR(node);
|
||||
|
||||
item_key.objectid = btrfs_ino(dir);
|
||||
item_key.type = BTRFS_DIR_INDEX_KEY;
|
||||
item_key.offset = index;
|
||||
|
||||
ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
|
||||
&item_key);
|
||||
ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, index);
|
||||
if (!ret)
|
||||
goto end;
|
||||
|
||||
item = btrfs_alloc_delayed_item(0);
|
||||
item = btrfs_alloc_delayed_item(0, node, BTRFS_DELAYED_DELETION_ITEM);
|
||||
if (!item) {
|
||||
ret = -ENOMEM;
|
||||
goto end;
|
||||
}
|
||||
|
||||
item->key = item_key;
|
||||
item->ins_or_del = BTRFS_DELAYED_DELETION_ITEM;
|
||||
item->index = index;
|
||||
|
||||
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
|
||||
ret = btrfs_delayed_item_reserve_metadata(trans, item);
|
||||
/*
|
||||
* we have reserved enough space when we start a new transaction,
|
||||
* so reserving metadata failure is impossible.
|
||||
@ -1743,9 +1705,9 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
int ret = 0;
|
||||
|
||||
list_for_each_entry(curr, del_list, readdir_list) {
|
||||
if (curr->key.offset > index)
|
||||
if (curr->index > index)
|
||||
break;
|
||||
if (curr->key.offset == index) {
|
||||
if (curr->index == index) {
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
@ -1779,13 +1741,13 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
|
||||
list_del(&curr->readdir_list);
|
||||
|
||||
if (curr->key.offset < ctx->pos) {
|
||||
if (curr->index < ctx->pos) {
|
||||
if (refcount_dec_and_test(&curr->refs))
|
||||
kfree(curr);
|
||||
continue;
|
||||
}
|
||||
|
||||
ctx->pos = curr->key.offset;
|
||||
ctx->pos = curr->index;
|
||||
|
||||
di = (struct btrfs_dir_item *)curr->data;
|
||||
name = (char *)(di + 1);
|
||||
@ -2085,3 +2047,113 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
}
|
||||
|
||||
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list)
|
||||
{
|
||||
struct btrfs_delayed_node *node;
|
||||
struct btrfs_delayed_item *item;
|
||||
|
||||
node = btrfs_get_delayed_node(inode);
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
mutex_lock(&node->mutex);
|
||||
item = __btrfs_first_delayed_insertion_item(node);
|
||||
while (item) {
|
||||
/*
|
||||
* It's possible that the item is already in a log list. This
|
||||
* can happen in case two tasks are trying to log the same
|
||||
* directory. For example if we have tasks A and task B:
|
||||
*
|
||||
* Task A collected the delayed items into a log list while
|
||||
* under the inode's log_mutex (at btrfs_log_inode()), but it
|
||||
* only releases the items after logging the inodes they point
|
||||
* to (if they are new inodes), which happens after unlocking
|
||||
* the log mutex;
|
||||
*
|
||||
* Task B enters btrfs_log_inode() and acquires the log_mutex
|
||||
* of the same directory inode, before task B releases the
|
||||
* delayed items. This can happen for example when logging some
|
||||
* inode we need to trigger logging of its parent directory, so
|
||||
* logging two files that have the same parent directory can
|
||||
* lead to this.
|
||||
*
|
||||
* If this happens, just ignore delayed items already in a log
|
||||
* list. All the tasks logging the directory are under a log
|
||||
* transaction and whichever finishes first can not sync the log
|
||||
* before the other completes and leaves the log transaction.
|
||||
*/
|
||||
if (!item->logged && list_empty(&item->log_list)) {
|
||||
refcount_inc(&item->refs);
|
||||
list_add_tail(&item->log_list, ins_list);
|
||||
}
|
||||
item = __btrfs_next_delayed_item(item);
|
||||
}
|
||||
|
||||
item = __btrfs_first_delayed_deletion_item(node);
|
||||
while (item) {
|
||||
/* It may be non-empty, for the same reason mentioned above. */
|
||||
if (!item->logged && list_empty(&item->log_list)) {
|
||||
refcount_inc(&item->refs);
|
||||
list_add_tail(&item->log_list, del_list);
|
||||
}
|
||||
item = __btrfs_next_delayed_item(item);
|
||||
}
|
||||
mutex_unlock(&node->mutex);
|
||||
|
||||
/*
|
||||
* We are called during inode logging, which means the inode is in use
|
||||
* and can not be evicted before we finish logging the inode. So we never
|
||||
* have the last reference on the delayed inode.
|
||||
* Also, we don't use btrfs_release_delayed_node() because that would
|
||||
* requeue the delayed inode (change its order in the list of prepared
|
||||
* nodes) and we don't want to do such change because we don't create or
|
||||
* delete delayed items.
|
||||
*/
|
||||
ASSERT(refcount_read(&node->refs) > 1);
|
||||
refcount_dec(&node->refs);
|
||||
}
|
||||
|
||||
void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list)
|
||||
{
|
||||
struct btrfs_delayed_node *node;
|
||||
struct btrfs_delayed_item *item;
|
||||
struct btrfs_delayed_item *next;
|
||||
|
||||
node = btrfs_get_delayed_node(inode);
|
||||
if (!node)
|
||||
return;
|
||||
|
||||
mutex_lock(&node->mutex);
|
||||
|
||||
list_for_each_entry_safe(item, next, ins_list, log_list) {
|
||||
item->logged = true;
|
||||
list_del_init(&item->log_list);
|
||||
if (refcount_dec_and_test(&item->refs))
|
||||
kfree(item);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe(item, next, del_list, log_list) {
|
||||
item->logged = true;
|
||||
list_del_init(&item->log_list);
|
||||
if (refcount_dec_and_test(&item->refs))
|
||||
kfree(item);
|
||||
}
|
||||
|
||||
mutex_unlock(&node->mutex);
|
||||
|
||||
/*
|
||||
* We are called during inode logging, which means the inode is in use
|
||||
* and can not be evicted before we finish logging the inode. So we never
|
||||
* have the last reference on the delayed inode.
|
||||
* Also, we don't use btrfs_release_delayed_node() because that would
|
||||
* requeue the delayed inode (change its order in the list of prepared
|
||||
* nodes) and we don't want to do such change because we don't create or
|
||||
* delete delayed items.
|
||||
*/
|
||||
ASSERT(refcount_read(&node->refs) > 1);
|
||||
refcount_dec(&node->refs);
|
||||
}
|
||||
|
@ -16,9 +16,10 @@
|
||||
#include <linux/refcount.h>
|
||||
#include "ctree.h"
|
||||
|
||||
/* types of the delayed item */
|
||||
#define BTRFS_DELAYED_INSERTION_ITEM 1
|
||||
#define BTRFS_DELAYED_DELETION_ITEM 2
|
||||
enum btrfs_delayed_item_type {
|
||||
BTRFS_DELAYED_INSERTION_ITEM,
|
||||
BTRFS_DELAYED_DELETION_ITEM
|
||||
};
|
||||
|
||||
struct btrfs_delayed_root {
|
||||
spinlock_t lock;
|
||||
@ -73,14 +74,27 @@ struct btrfs_delayed_node {
|
||||
|
||||
struct btrfs_delayed_item {
|
||||
struct rb_node rb_node;
|
||||
struct btrfs_key key;
|
||||
/* Offset value of the corresponding dir index key. */
|
||||
u64 index;
|
||||
struct list_head tree_list; /* used for batch insert/delete items */
|
||||
struct list_head readdir_list; /* used for readdir items */
|
||||
/*
|
||||
* Used when logging a directory.
|
||||
* Insertions and deletions to this list are protected by the parent
|
||||
* delayed node's mutex.
|
||||
*/
|
||||
struct list_head log_list;
|
||||
u64 bytes_reserved;
|
||||
struct btrfs_delayed_node *delayed_node;
|
||||
refcount_t refs;
|
||||
int ins_or_del;
|
||||
u32 data_len;
|
||||
enum btrfs_delayed_item_type type:8;
|
||||
/*
|
||||
* Track if this delayed item was already logged.
|
||||
* Protected by the mutex of the parent delayed inode.
|
||||
*/
|
||||
bool logged;
|
||||
/* The maximum leaf size is 64K, so u16 is more than enough. */
|
||||
u16 data_len;
|
||||
char data[];
|
||||
};
|
||||
|
||||
@ -144,6 +158,14 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
struct list_head *ins_list);
|
||||
|
||||
/* Used during directory logging. */
|
||||
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list);
|
||||
void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list);
|
||||
|
||||
/* for init */
|
||||
int __init btrfs_delayed_inode_init(void);
|
||||
void __cold btrfs_delayed_inode_exit(void);
|
||||
|
@ -545,10 +545,7 @@ static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
|
||||
if (!cache)
|
||||
continue;
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
cache->to_copy = 1;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
set_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
|
||||
btrfs_put_block_group(cache);
|
||||
}
|
||||
if (iter_ret < 0)
|
||||
@ -577,7 +574,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
return true;
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
if (cache->removed) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) {
|
||||
spin_unlock(&cache->lock);
|
||||
return true;
|
||||
}
|
||||
@ -610,9 +607,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
|
||||
}
|
||||
|
||||
/* Last stripe on this device */
|
||||
spin_lock(&cache->lock);
|
||||
cache->to_copy = 0;
|
||||
spin_unlock(&cache->lock);
|
||||
clear_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -1288,11 +1283,6 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
|
||||
return 1;
|
||||
}
|
||||
|
||||
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
percpu_counter_inc(&fs_info->dev_replace.bio_counter);
|
||||
}
|
||||
|
||||
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
|
||||
{
|
||||
percpu_counter_sub(&fs_info->dev_replace.bio_counter, amount);
|
||||
|
@ -7,6 +7,10 @@
|
||||
#define BTRFS_DEV_REPLACE_H
|
||||
|
||||
struct btrfs_ioctl_dev_replace_args;
|
||||
struct btrfs_fs_info;
|
||||
struct btrfs_trans_handle;
|
||||
struct btrfs_dev_replace;
|
||||
struct btrfs_block_group;
|
||||
|
||||
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);
|
||||
|
@ -131,8 +131,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||
if (atomic)
|
||||
return -EAGAIN;
|
||||
|
||||
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
|
||||
&cached_state);
|
||||
lock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state);
|
||||
if (extent_buffer_uptodate(eb) &&
|
||||
btrfs_header_generation(eb) == parent_transid) {
|
||||
ret = 0;
|
||||
@ -145,8 +144,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
|
||||
ret = 1;
|
||||
clear_extent_buffer_uptodate(eb);
|
||||
out:
|
||||
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
|
||||
&cached_state);
|
||||
unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
|
||||
&cached_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -647,16 +646,14 @@ static void run_one_async_start(struct btrfs_work *work)
|
||||
*/
|
||||
static void run_one_async_done(struct btrfs_work *work)
|
||||
{
|
||||
struct async_submit_bio *async;
|
||||
struct inode *inode;
|
||||
|
||||
async = container_of(work, struct async_submit_bio, work);
|
||||
inode = async->inode;
|
||||
struct async_submit_bio *async =
|
||||
container_of(work, struct async_submit_bio, work);
|
||||
struct inode *inode = async->inode;
|
||||
struct btrfs_bio *bbio = btrfs_bio(async->bio);
|
||||
|
||||
/* If an error occurred we just want to clean up the bio and move on */
|
||||
if (async->status) {
|
||||
async->bio->bi_status = async->status;
|
||||
bio_endio(async->bio);
|
||||
btrfs_bio_end_io(bbio, async->status);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -757,6 +754,7 @@ static bool should_async_write(struct btrfs_fs_info *fs_info,
|
||||
void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
blk_status_t ret;
|
||||
|
||||
bio->bi_opf |= REQ_META;
|
||||
@ -776,8 +774,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
|
||||
|
||||
ret = btree_csum_one_bio(bio);
|
||||
if (ret) {
|
||||
bio->bi_status = ret;
|
||||
bio_endio(bio);
|
||||
btrfs_bio_end_io(bbio, ret);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1524,6 +1521,9 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
|
||||
if (objectid == BTRFS_UUID_TREE_OBJECTID)
|
||||
return btrfs_grab_root(fs_info->uuid_root) ?
|
||||
fs_info->uuid_root : ERR_PTR(-ENOENT);
|
||||
if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID)
|
||||
return btrfs_grab_root(fs_info->block_group_root) ?
|
||||
fs_info->block_group_root : ERR_PTR(-ENOENT);
|
||||
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
|
||||
struct btrfs_root *root = btrfs_global_root(fs_info, &key);
|
||||
|
||||
@ -1980,14 +1980,7 @@ static void backup_super_roots(struct btrfs_fs_info *info)
|
||||
btrfs_set_backup_chunk_root_level(root_backup,
|
||||
btrfs_header_level(info->chunk_root->node));
|
||||
|
||||
if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
|
||||
btrfs_set_backup_block_group_root(root_backup,
|
||||
info->block_group_root->node->start);
|
||||
btrfs_set_backup_block_group_root_gen(root_backup,
|
||||
btrfs_header_generation(info->block_group_root->node));
|
||||
btrfs_set_backup_block_group_root_level(root_backup,
|
||||
btrfs_header_level(info->block_group_root->node));
|
||||
} else {
|
||||
if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
|
||||
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
|
||||
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
|
||||
|
||||
@ -2225,6 +2218,8 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
|
||||
static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct inode *inode = fs_info->btree_inode;
|
||||
unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID,
|
||||
fs_info->tree_root);
|
||||
|
||||
inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
|
||||
set_nlink(inode, 1);
|
||||
@ -2238,8 +2233,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
||||
|
||||
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
|
||||
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
|
||||
IO_TREE_BTREE_INODE_IO, inode);
|
||||
BTRFS_I(inode)->io_tree.track_uptodate = false;
|
||||
IO_TREE_BTREE_INODE_IO, NULL);
|
||||
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
|
||||
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
|
||||
@ -2247,7 +2241,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
|
||||
BTRFS_I(inode)->location.type = 0;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
|
||||
btrfs_insert_inode_hash(inode);
|
||||
__insert_inode_hash(inode, hash);
|
||||
}
|
||||
|
||||
static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
|
||||
@ -2266,6 +2260,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
|
||||
fs_info->qgroup_seq = 1;
|
||||
fs_info->qgroup_ulist = NULL;
|
||||
fs_info->qgroup_rescan_running = false;
|
||||
fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
|
||||
mutex_init(&fs_info->qgroup_rescan_lock);
|
||||
}
|
||||
|
||||
@ -2529,10 +2524,24 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
location.objectid = BTRFS_DEV_TREE_OBJECTID;
|
||||
location.type = BTRFS_ROOT_ITEM_KEY;
|
||||
location.offset = 0;
|
||||
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
|
||||
location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID;
|
||||
root = btrfs_read_tree_root(tree_root, &location);
|
||||
if (IS_ERR(root)) {
|
||||
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
|
||||
ret = PTR_ERR(root);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
|
||||
fs_info->block_group_root = root;
|
||||
}
|
||||
}
|
||||
|
||||
location.objectid = BTRFS_DEV_TREE_OBJECTID;
|
||||
root = btrfs_read_tree_root(tree_root, &location);
|
||||
if (IS_ERR(root)) {
|
||||
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
|
||||
@ -2600,8 +2609,8 @@ out:
|
||||
* 1, 2 2nd and 3rd backup copy
|
||||
* -1 skip bytenr check
|
||||
*/
|
||||
static int validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num)
|
||||
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num)
|
||||
{
|
||||
u64 nodesize = btrfs_super_nodesize(sb);
|
||||
u64 sectorsize = btrfs_super_sectorsize(sb);
|
||||
@ -2703,6 +2712,18 @@ static int validate_super(struct btrfs_fs_info *fs_info,
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Artificial requirement for block-group-tree to force newer features
|
||||
* (free-space-tree, no-holes) so the test matrix is smaller.
|
||||
*/
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
|
||||
(!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
|
||||
!btrfs_fs_incompat(fs_info, NO_HOLES))) {
|
||||
btrfs_err(fs_info,
|
||||
"block-group-tree feature requires fres-space-tree and no-holes");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
|
||||
BTRFS_FSID_SIZE) != 0) {
|
||||
btrfs_err(fs_info,
|
||||
@ -2785,7 +2806,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return validate_super(fs_info, fs_info->super_copy, 0);
|
||||
return btrfs_validate_super(fs_info, fs_info->super_copy, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2799,7 +2820,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = validate_super(fs_info, sb, -1);
|
||||
ret = btrfs_validate_super(fs_info, sb, -1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
|
||||
@ -2860,17 +2881,7 @@ static int load_important_roots(struct btrfs_fs_info *fs_info)
|
||||
btrfs_warn(fs_info, "couldn't read tree root");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
return 0;
|
||||
|
||||
bytenr = btrfs_super_block_group_root(sb);
|
||||
gen = btrfs_super_block_group_root_generation(sb);
|
||||
level = btrfs_super_block_group_root_level(sb);
|
||||
ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
|
||||
if (ret)
|
||||
btrfs_warn(fs_info, "couldn't read block group root");
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
@ -2882,16 +2893,6 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
struct btrfs_root *root;
|
||||
|
||||
root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
|
||||
GFP_KERNEL);
|
||||
if (!root)
|
||||
return -ENOMEM;
|
||||
fs_info->block_group_root = root;
|
||||
}
|
||||
|
||||
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
|
||||
if (handle_error) {
|
||||
if (!IS_ERR(tree_root->node))
|
||||
@ -2990,6 +2991,19 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
mutex_init(&fs_info->zoned_data_reloc_io_lock);
|
||||
seqlock_init(&fs_info->profiles_lock);
|
||||
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers);
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
|
||||
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
|
||||
BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
|
||||
BTRFS_LOCKDEP_TRANS_UNBLOCKED);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
|
||||
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
|
||||
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed,
|
||||
BTRFS_LOCKDEP_TRANS_COMPLETED);
|
||||
|
||||
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
|
||||
INIT_LIST_HEAD(&fs_info->space_info);
|
||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||
@ -3279,6 +3293,112 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do various sanity and dependency checks of different features.
|
||||
*
|
||||
* This is the place for less strict checks (like for subpage or artificial
|
||||
* feature dependencies).
|
||||
*
|
||||
* For strict checks or possible corruption detection, see
|
||||
* btrfs_validate_super().
|
||||
*
|
||||
* This should be called after btrfs_parse_options(), as some mount options
|
||||
* (space cache related) can modify on-disk format like free space tree and
|
||||
* screw up certain feature dependencies.
|
||||
*/
|
||||
int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
|
||||
{
|
||||
struct btrfs_super_block *disk_super = fs_info->super_copy;
|
||||
u64 incompat = btrfs_super_incompat_flags(disk_super);
|
||||
const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super);
|
||||
const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP);
|
||||
|
||||
if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount because of unknown incompat features (0x%llx)",
|
||||
incompat);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Runtime limitation for mixed block groups. */
|
||||
if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
|
||||
(fs_info->sectorsize != fs_info->nodesize)) {
|
||||
btrfs_err(fs_info,
|
||||
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
|
||||
fs_info->nodesize, fs_info->sectorsize);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Mixed backref is an always-enabled feature. */
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
|
||||
|
||||
/* Set compression related flags just in case. */
|
||||
if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
|
||||
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
|
||||
|
||||
/*
|
||||
* An ancient flag, which should really be marked deprecated.
|
||||
* Such runtime limitation doesn't really need a incompat flag.
|
||||
*/
|
||||
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
|
||||
incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
|
||||
|
||||
if (compat_ro_unsupp && !sb_rdonly(sb)) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount read-write because of unknown compat_ro features (0x%llx)",
|
||||
compat_ro);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We have unsupported RO compat features, although RO mounted, we
|
||||
* should not cause any metadata writes, including log replay.
|
||||
* Or we could screw up whatever the new feature requires.
|
||||
*/
|
||||
if (compat_ro_unsupp && btrfs_super_log_root(disk_super) &&
|
||||
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
|
||||
compat_ro);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Artificial limitations for block group tree, to force
|
||||
* block-group-tree to rely on no-holes and free-space-tree.
|
||||
*/
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
|
||||
(!btrfs_fs_incompat(fs_info, NO_HOLES) ||
|
||||
!btrfs_test_opt(fs_info, FREE_SPACE_TREE))) {
|
||||
btrfs_err(fs_info,
|
||||
"block-group-tree feature requires no-holes and free-space-tree features");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Subpage runtime limitation on v1 cache.
|
||||
*
|
||||
* V1 space cache still has some hard codeed PAGE_SIZE usage, while
|
||||
* we're already defaulting to v2 cache, no need to bother v1 as it's
|
||||
* going to be deprecated anyway.
|
||||
*/
|
||||
if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
|
||||
btrfs_warn(fs_info,
|
||||
"v1 space cache is not supported for page size %lu with sectorsize %u",
|
||||
PAGE_SIZE, fs_info->sectorsize);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* This can be called by remount, we need to protect the super block. */
|
||||
spin_lock(&fs_info->super_lock);
|
||||
btrfs_set_super_incompat_flags(disk_super, incompat);
|
||||
spin_unlock(&fs_info->super_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
|
||||
char *options)
|
||||
{
|
||||
@ -3428,72 +3548,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(disk_super) &
|
||||
~BTRFS_FEATURE_INCOMPAT_SUPP;
|
||||
if (features) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount because of unsupported optional features (0x%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
ret = btrfs_check_features(fs_info, sb);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
features = btrfs_super_incompat_flags(disk_super);
|
||||
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
|
||||
if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
|
||||
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
|
||||
|
||||
/*
|
||||
* Flag our filesystem as having big metadata blocks if they are bigger
|
||||
* than the page size.
|
||||
*/
|
||||
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
|
||||
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
|
||||
|
||||
/*
|
||||
* mixed block groups end up with duplicate but slightly offset
|
||||
* extent buffers for the same range. It leads to corruptions
|
||||
*/
|
||||
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
|
||||
(sectorsize != nodesize)) {
|
||||
btrfs_err(fs_info,
|
||||
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
|
||||
nodesize, sectorsize);
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Needn't use the lock because there is no other task which will
|
||||
* update the flag.
|
||||
*/
|
||||
btrfs_set_super_incompat_flags(disk_super, features);
|
||||
|
||||
features = btrfs_super_compat_ro_flags(disk_super) &
|
||||
~BTRFS_FEATURE_COMPAT_RO_SUPP;
|
||||
if (!sb_rdonly(sb) && features) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot mount read-write because of unsupported optional features (0x%llx)",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
/*
|
||||
* We have unsupported RO compat features, although RO mounted, we
|
||||
* should not cause any metadata write, including log replay.
|
||||
* Or we could screw up whatever the new feature requires.
|
||||
*/
|
||||
if (unlikely(features && btrfs_super_log_root(disk_super) &&
|
||||
!btrfs_test_opt(fs_info, NOLOGREPLAY))) {
|
||||
btrfs_err(fs_info,
|
||||
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
|
||||
features);
|
||||
err = -EINVAL;
|
||||
goto fail_alloc;
|
||||
}
|
||||
|
||||
|
||||
if (sectorsize < PAGE_SIZE) {
|
||||
struct btrfs_subpage_info *subpage_info;
|
||||
|
||||
@ -3833,7 +3893,7 @@ static void btrfs_end_super_write(struct bio *bio)
|
||||
}
|
||||
|
||||
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
int copy_num)
|
||||
int copy_num, bool drop_cache)
|
||||
{
|
||||
struct btrfs_super_block *super;
|
||||
struct page *page;
|
||||
@ -3851,6 +3911,19 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
|
||||
return ERR_PTR(-EINVAL);
|
||||
|
||||
if (drop_cache) {
|
||||
/* This should only be called with the primary sb. */
|
||||
ASSERT(copy_num == 0);
|
||||
|
||||
/*
|
||||
* Drop the page of the primary superblock, so later read will
|
||||
* always read from the device.
|
||||
*/
|
||||
invalidate_inode_pages2_range(mapping,
|
||||
bytenr >> PAGE_SHIFT,
|
||||
(bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
|
||||
}
|
||||
|
||||
page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
|
||||
if (IS_ERR(page))
|
||||
return ERR_CAST(page);
|
||||
@ -3882,7 +3955,7 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
|
||||
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
||||
*/
|
||||
for (i = 0; i < 1; i++) {
|
||||
super = btrfs_read_dev_one_super(bdev, i);
|
||||
super = btrfs_read_dev_one_super(bdev, i, false);
|
||||
if (IS_ERR(super))
|
||||
continue;
|
||||
|
||||
|
@ -46,10 +46,13 @@ int __cold open_ctree(struct super_block *sb,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
char *options);
|
||||
void __cold close_ctree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num);
|
||||
int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb);
|
||||
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
|
||||
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
|
||||
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
int copy_num);
|
||||
int copy_num, bool drop_cache);
|
||||
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
|
||||
struct btrfs_key *key);
|
||||
@ -103,7 +106,7 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
|
||||
|
||||
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
|
||||
return fs_info->block_group_root;
|
||||
return btrfs_extent_root(fs_info, 0);
|
||||
}
|
||||
|
1673
fs/btrfs/extent-io-tree.c
Normal file
1673
fs/btrfs/extent-io-tree.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -17,7 +17,6 @@ struct io_failure_record;
|
||||
#define EXTENT_NODATASUM (1U << 7)
|
||||
#define EXTENT_CLEAR_META_RESV (1U << 8)
|
||||
#define EXTENT_NEED_WAIT (1U << 9)
|
||||
#define EXTENT_DAMAGED (1U << 10)
|
||||
#define EXTENT_NORESERVE (1U << 11)
|
||||
#define EXTENT_QGROUP_RESERVED (1U << 12)
|
||||
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
|
||||
@ -35,10 +34,18 @@ struct io_failure_record;
|
||||
* delalloc bytes decremented, in an atomic way to prevent races with stat(2).
|
||||
*/
|
||||
#define EXTENT_ADD_INODE_BYTES (1U << 15)
|
||||
|
||||
/*
|
||||
* Set during truncate when we're clearing an entire range and we just want the
|
||||
* extent states to go away.
|
||||
*/
|
||||
#define EXTENT_CLEAR_ALL_BITS (1U << 16)
|
||||
|
||||
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
|
||||
EXTENT_CLEAR_DATA_RESV)
|
||||
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | \
|
||||
EXTENT_ADD_INODE_BYTES)
|
||||
EXTENT_ADD_INODE_BYTES | \
|
||||
EXTENT_CLEAR_ALL_BITS)
|
||||
|
||||
/*
|
||||
* Redefined bits above which are used only in the device allocation tree,
|
||||
@ -56,7 +63,6 @@ enum {
|
||||
IO_TREE_FS_EXCLUDED_EXTENTS,
|
||||
IO_TREE_BTREE_INODE_IO,
|
||||
IO_TREE_INODE_IO,
|
||||
IO_TREE_INODE_IO_FAILURE,
|
||||
IO_TREE_RELOC_BLOCKS,
|
||||
IO_TREE_TRANS_DIRTY_PAGES,
|
||||
IO_TREE_ROOT_DIRTY_LOG_PAGES,
|
||||
@ -70,8 +76,6 @@ struct extent_io_tree {
|
||||
struct rb_root state;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
void *private_data;
|
||||
u64 dirty_bytes;
|
||||
bool track_uptodate;
|
||||
|
||||
/* Who owns this io tree, should be one of IO_TREE_* */
|
||||
u8 owner;
|
||||
@ -89,33 +93,23 @@ struct extent_state {
|
||||
refcount_t refs;
|
||||
u32 state;
|
||||
|
||||
struct io_failure_record *failrec;
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
struct list_head leak_list;
|
||||
#endif
|
||||
};
|
||||
|
||||
int __init extent_state_cache_init(void);
|
||||
void __cold extent_state_cache_exit(void);
|
||||
|
||||
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *tree, unsigned int owner,
|
||||
void *private_data);
|
||||
void extent_io_tree_release(struct extent_io_tree *tree);
|
||||
|
||||
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached);
|
||||
|
||||
static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
|
||||
{
|
||||
return lock_extent_bits(tree, start, end, NULL);
|
||||
}
|
||||
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached);
|
||||
|
||||
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
|
||||
|
||||
int __init extent_io_init(void);
|
||||
void __cold extent_io_exit(void);
|
||||
int __init extent_state_init_cachep(void);
|
||||
void __cold extent_state_free_cachep(void);
|
||||
|
||||
u64 count_range_bits(struct extent_io_tree *tree,
|
||||
u64 *start, u64 search_end,
|
||||
@ -126,72 +120,66 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, int filled, struct extent_state *cached_state);
|
||||
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, struct extent_changeset *changeset);
|
||||
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, int wake, int delete,
|
||||
struct extent_state **cached);
|
||||
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, int wake, int delete,
|
||||
struct extent_state **cached, gfp_t mask,
|
||||
struct extent_changeset *changeset);
|
||||
u32 bits, struct extent_state **cached, gfp_t mask,
|
||||
struct extent_changeset *changeset);
|
||||
|
||||
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
|
||||
static inline int clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, u32 bits,
|
||||
struct extent_state **cached)
|
||||
{
|
||||
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
|
||||
return __clear_extent_bit(tree, start, end, bits, cached,
|
||||
GFP_NOFS, NULL);
|
||||
}
|
||||
|
||||
static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, struct extent_state **cached)
|
||||
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
struct extent_state **cached)
|
||||
{
|
||||
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
|
||||
GFP_NOFS, NULL);
|
||||
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
|
||||
GFP_NOFS, NULL);
|
||||
}
|
||||
|
||||
static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
|
||||
u64 start, u64 end, struct extent_state **cached)
|
||||
static inline int unlock_extent_atomic(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, struct extent_state **cached)
|
||||
{
|
||||
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
|
||||
GFP_ATOMIC, NULL);
|
||||
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
|
||||
GFP_ATOMIC, NULL);
|
||||
}
|
||||
|
||||
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, u32 bits)
|
||||
{
|
||||
int wake = 0;
|
||||
|
||||
if (bits & EXTENT_LOCKED)
|
||||
wake = 1;
|
||||
|
||||
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
|
||||
return clear_extent_bit(tree, start, end, bits, NULL);
|
||||
}
|
||||
|
||||
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, struct extent_changeset *changeset);
|
||||
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, unsigned exclusive_bits, u64 *failed_start,
|
||||
struct extent_state **cached_state, gfp_t mask,
|
||||
struct extent_changeset *changeset);
|
||||
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits);
|
||||
u32 bits, struct extent_state **cached_state, gfp_t mask);
|
||||
|
||||
static inline int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, u32 bits)
|
||||
{
|
||||
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOWAIT);
|
||||
}
|
||||
|
||||
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, u32 bits)
|
||||
{
|
||||
return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
|
||||
NULL);
|
||||
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOFS);
|
||||
}
|
||||
|
||||
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, struct extent_state **cached_state)
|
||||
{
|
||||
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
|
||||
cached_state, GFP_NOFS, NULL);
|
||||
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
|
||||
cached_state, GFP_NOFS, NULL);
|
||||
}
|
||||
|
||||
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, gfp_t mask)
|
||||
{
|
||||
return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, NULL,
|
||||
mask, NULL);
|
||||
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, mask);
|
||||
}
|
||||
|
||||
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
|
||||
@ -199,7 +187,7 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
|
||||
{
|
||||
return clear_extent_bit(tree, start, end,
|
||||
EXTENT_DIRTY | EXTENT_DELALLOC |
|
||||
EXTENT_DO_ACCOUNTING, 0, 0, cached);
|
||||
EXTENT_DO_ACCOUNTING, cached);
|
||||
}
|
||||
|
||||
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
@ -211,30 +199,29 @@ static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
return set_extent_bit(tree, start, end,
|
||||
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
|
||||
0, NULL, cached_state, GFP_NOFS, NULL);
|
||||
EXTENT_DELALLOC | extra_bits,
|
||||
cached_state, GFP_NOFS);
|
||||
}
|
||||
|
||||
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, struct extent_state **cached_state)
|
||||
{
|
||||
return set_extent_bit(tree, start, end,
|
||||
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
|
||||
0, NULL, cached_state, GFP_NOFS, NULL);
|
||||
EXTENT_DELALLOC | EXTENT_DEFRAG,
|
||||
cached_state, GFP_NOFS);
|
||||
}
|
||||
|
||||
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
|
||||
u64 end)
|
||||
{
|
||||
return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, NULL,
|
||||
GFP_NOFS, NULL);
|
||||
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS);
|
||||
}
|
||||
|
||||
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
|
||||
u64 end, struct extent_state **cached_state, gfp_t mask)
|
||||
{
|
||||
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
|
||||
cached_state, mask, NULL);
|
||||
return set_extent_bit(tree, start, end, EXTENT_UPTODATE,
|
||||
cached_state, mask);
|
||||
}
|
||||
|
||||
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
@ -244,24 +231,9 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits);
|
||||
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits);
|
||||
int extent_invalidate_folio(struct extent_io_tree *tree,
|
||||
struct folio *folio, size_t offset);
|
||||
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
|
||||
u64 *end, u64 max_bytes,
|
||||
struct extent_state **cached_state);
|
||||
|
||||
/* This should be reworked in the future and put elsewhere. */
|
||||
struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start);
|
||||
int set_state_failrec(struct extent_io_tree *tree, u64 start,
|
||||
struct io_failure_record *failrec);
|
||||
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
|
||||
u64 end);
|
||||
int free_io_failure(struct extent_io_tree *failure_tree,
|
||||
struct extent_io_tree *io_tree,
|
||||
struct io_failure_record *rec);
|
||||
int clean_io_failure(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *failure_tree,
|
||||
struct extent_io_tree *io_tree, u64 start,
|
||||
struct page *page, u64 ino, unsigned int pg_offset);
|
||||
void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits);
|
||||
|
||||
#endif /* BTRFS_EXTENT_IO_TREE_H */
|
||||
|
@ -2220,6 +2220,12 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
||||
}
|
||||
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
if (path->nowait) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
btrfs_put_transaction(cur_trans);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
refcount_inc(&head->refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
@ -2686,13 +2692,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
|
||||
len = cache->start + cache->length - start;
|
||||
len = min(len, end + 1 - start);
|
||||
|
||||
down_read(&fs_info->commit_root_sem);
|
||||
if (start < cache->last_byte_to_unpin && return_free_space) {
|
||||
u64 add_len = min(len, cache->last_byte_to_unpin - start);
|
||||
|
||||
btrfs_add_free_space(cache, start, add_len);
|
||||
}
|
||||
up_read(&fs_info->commit_root_sem);
|
||||
if (return_free_space)
|
||||
btrfs_add_free_space(cache, start, len);
|
||||
|
||||
start += len;
|
||||
total_unpinned += len;
|
||||
@ -3804,7 +3805,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
block_group->start == fs_info->data_reloc_bg ||
|
||||
fs_info->data_reloc_bg == 0);
|
||||
|
||||
if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
|
||||
if (block_group->ro ||
|
||||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
@ -3881,7 +3883,7 @@ out:
|
||||
* regular extents) at the same time to the same zone, which
|
||||
* easily break the write pointer.
|
||||
*/
|
||||
block_group->zoned_data_reloc_ongoing = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
|
||||
fs_info->data_reloc_bg = 0;
|
||||
}
|
||||
spin_unlock(&fs_info->relocation_bg_lock);
|
||||
@ -4888,6 +4890,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
!test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
|
||||
lockdep_owner = BTRFS_FS_TREE_OBJECTID;
|
||||
|
||||
/* btrfs_clean_tree_block() accesses generation field. */
|
||||
btrfs_set_header_generation(buf, trans->transid);
|
||||
|
||||
/*
|
||||
* This needs to stay, because we could allocate a freed block from an
|
||||
* old tree into a new tree, so we need to make sure this new block is
|
||||
@ -5639,6 +5644,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||
{
|
||||
const bool is_reloc_root = (root->root_key.objectid ==
|
||||
BTRFS_TREE_RELOC_OBJECTID);
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_trans_handle *trans;
|
||||
@ -5798,6 +5805,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||
goto out_end_trans;
|
||||
}
|
||||
|
||||
if (!is_reloc_root)
|
||||
btrfs_set_last_root_drop_gen(fs_info, trans->transid);
|
||||
|
||||
btrfs_end_transaction_throttle(trans);
|
||||
if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
|
||||
btrfs_debug(fs_info,
|
||||
@ -5832,7 +5842,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||
goto out_end_trans;
|
||||
}
|
||||
|
||||
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
|
||||
if (!is_reloc_root) {
|
||||
ret = btrfs_find_root(tree_root, &root->root_key, path,
|
||||
NULL, NULL);
|
||||
if (ret < 0) {
|
||||
@ -5864,6 +5874,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
|
||||
btrfs_put_root(root);
|
||||
root_dropped = true;
|
||||
out_end_trans:
|
||||
if (!is_reloc_root)
|
||||
btrfs_set_last_root_drop_gen(fs_info, trans->transid);
|
||||
|
||||
btrfs_end_transaction_throttle(trans);
|
||||
out_free:
|
||||
kfree(wc);
|
||||
|
2881
fs/btrfs/extent_io.c
2881
fs/btrfs/extent_io.c
File diff suppressed because it is too large
Load Diff
@ -60,11 +60,13 @@ enum {
|
||||
struct btrfs_bio;
|
||||
struct btrfs_root;
|
||||
struct btrfs_inode;
|
||||
struct btrfs_io_bio;
|
||||
struct btrfs_fs_info;
|
||||
struct io_failure_record;
|
||||
struct extent_io_tree;
|
||||
|
||||
int __init extent_buffer_init_cachep(void);
|
||||
void __cold extent_buffer_free_cachep(void);
|
||||
|
||||
typedef void (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
|
||||
int mirror_num,
|
||||
enum btrfs_compression_type compress_type);
|
||||
@ -240,10 +242,10 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
|
||||
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct page *locked_page,
|
||||
u32 bits_to_clear, unsigned long page_ops);
|
||||
int extent_invalidate_folio(struct extent_io_tree *tree,
|
||||
struct folio *folio, size_t offset);
|
||||
|
||||
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
|
||||
|
||||
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
||||
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
|
||||
@ -257,8 +259,12 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
|
||||
* bio end_io callback is called to indicate things have failed.
|
||||
*/
|
||||
struct io_failure_record {
|
||||
/* Use rb_simple_node for search/insert */
|
||||
struct {
|
||||
struct rb_node rb_node;
|
||||
u64 bytenr;
|
||||
};
|
||||
struct page *page;
|
||||
u64 start;
|
||||
u64 len;
|
||||
u64 logical;
|
||||
int this_mirror;
|
||||
@ -269,6 +275,9 @@ struct io_failure_record {
|
||||
int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
|
||||
u32 bio_offset, struct page *page, unsigned int pgoff,
|
||||
submit_bio_hook_t *submit_bio_hook);
|
||||
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
|
||||
struct page *page, unsigned int pg_offset);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
bool find_lock_delalloc_range(struct inode *inode,
|
||||
|
@ -7,6 +7,7 @@
|
||||
#include "volumes.h"
|
||||
#include "extent_map.h"
|
||||
#include "compression.h"
|
||||
#include "btrfs_inode.h"
|
||||
|
||||
|
||||
static struct kmem_cache *extent_map_cache;
|
||||
@ -54,9 +55,7 @@ struct extent_map *alloc_extent_map(void)
|
||||
if (!em)
|
||||
return NULL;
|
||||
RB_CLEAR_NODE(&em->rb_node);
|
||||
em->flags = 0;
|
||||
em->compress_type = BTRFS_COMPRESS_NONE;
|
||||
em->generation = 0;
|
||||
refcount_set(&em->refs, 1);
|
||||
INIT_LIST_HEAD(&em->list);
|
||||
return em;
|
||||
@ -73,7 +72,6 @@ void free_extent_map(struct extent_map *em)
|
||||
{
|
||||
if (!em)
|
||||
return;
|
||||
WARN_ON(refcount_read(&em->refs) == 0);
|
||||
if (refcount_dec_and_test(&em->refs)) {
|
||||
WARN_ON(extent_map_in_tree(em));
|
||||
WARN_ON(!list_empty(&em->list));
|
||||
@ -143,8 +141,7 @@ static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
|
||||
* it can't be found, try to find some neighboring extents
|
||||
*/
|
||||
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||
struct rb_node **prev_ret,
|
||||
struct rb_node **next_ret)
|
||||
struct rb_node **prev_or_next_ret)
|
||||
{
|
||||
struct rb_node *n = root->rb_node;
|
||||
struct rb_node *prev = NULL;
|
||||
@ -152,6 +149,8 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||
struct extent_map *entry;
|
||||
struct extent_map *prev_entry = NULL;
|
||||
|
||||
ASSERT(prev_or_next_ret);
|
||||
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct extent_map, rb_node);
|
||||
prev = n;
|
||||
@ -165,24 +164,29 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||
return n;
|
||||
}
|
||||
|
||||
if (prev_ret) {
|
||||
orig_prev = prev;
|
||||
while (prev && offset >= extent_map_end(prev_entry)) {
|
||||
prev = rb_next(prev);
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
}
|
||||
*prev_ret = prev;
|
||||
prev = orig_prev;
|
||||
orig_prev = prev;
|
||||
while (prev && offset >= extent_map_end(prev_entry)) {
|
||||
prev = rb_next(prev);
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
}
|
||||
|
||||
if (next_ret) {
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
while (prev && offset < prev_entry->start) {
|
||||
prev = rb_prev(prev);
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
}
|
||||
*next_ret = prev;
|
||||
/*
|
||||
* Previous extent map found, return as in this case the caller does not
|
||||
* care about the next one.
|
||||
*/
|
||||
if (prev) {
|
||||
*prev_or_next_ret = prev;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
prev = orig_prev;
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
while (prev && offset < prev_entry->start) {
|
||||
prev = rb_prev(prev);
|
||||
prev_entry = rb_entry(prev, struct extent_map, rb_node);
|
||||
}
|
||||
*prev_or_next_ret = prev;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -336,6 +340,8 @@ out:
|
||||
|
||||
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
|
||||
{
|
||||
lockdep_assert_held_write(&tree->lock);
|
||||
|
||||
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
|
||||
if (extent_map_in_tree(em))
|
||||
try_merge_map(tree, em);
|
||||
@ -382,7 +388,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
|
||||
|
||||
__clear_extent_bit(&device->alloc_state, stripe->physical,
|
||||
stripe->physical + stripe_size - 1, bits,
|
||||
0, 0, NULL, GFP_NOWAIT, NULL);
|
||||
NULL, GFP_NOWAIT, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -425,16 +431,13 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct rb_node *rb_node;
|
||||
struct rb_node *prev = NULL;
|
||||
struct rb_node *next = NULL;
|
||||
struct rb_node *prev_or_next = NULL;
|
||||
u64 end = range_end(start, len);
|
||||
|
||||
rb_node = __tree_search(&tree->map.rb_root, start, &prev, &next);
|
||||
rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
|
||||
if (!rb_node) {
|
||||
if (prev)
|
||||
rb_node = prev;
|
||||
else if (next)
|
||||
rb_node = next;
|
||||
if (prev_or_next)
|
||||
rb_node = prev_or_next;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
@ -658,3 +661,293 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
|
||||
ASSERT(ret == 0 || ret == -EEXIST);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop all extent maps from a tree in the fastest possible way, rescheduling
|
||||
* if needed. This avoids searching the tree, from the root down to the first
|
||||
* extent map, before each deletion.
|
||||
*/
|
||||
static void drop_all_extent_maps_fast(struct extent_map_tree *tree)
|
||||
{
|
||||
write_lock(&tree->lock);
|
||||
while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
|
||||
struct extent_map *em;
|
||||
struct rb_node *node;
|
||||
|
||||
node = rb_first_cached(&tree->map);
|
||||
em = rb_entry(node, struct extent_map, rb_node);
|
||||
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
|
||||
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
|
||||
remove_extent_mapping(tree, em);
|
||||
free_extent_map(em);
|
||||
cond_resched_rwlock_write(&tree->lock);
|
||||
}
|
||||
write_unlock(&tree->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop all extent maps in a given range.
|
||||
*
|
||||
* @inode: The target inode.
|
||||
* @start: Start offset of the range.
|
||||
* @end: End offset of the range (inclusive value).
|
||||
* @skip_pinned: Indicate if pinned extent maps should be ignored or not.
|
||||
*
|
||||
* This drops all the extent maps that intersect the given range [@start, @end].
|
||||
* Extent maps that partially overlap the range and extend behind or beyond it,
|
||||
* are split.
|
||||
* The caller should have locked an appropriate file range in the inode's io
|
||||
* tree before calling this function.
|
||||
*/
|
||||
void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
bool skip_pinned)
|
||||
{
|
||||
struct extent_map *split;
|
||||
struct extent_map *split2;
|
||||
struct extent_map *em;
|
||||
struct extent_map_tree *em_tree = &inode->extent_tree;
|
||||
u64 len = end - start + 1;
|
||||
|
||||
WARN_ON(end < start);
|
||||
if (end == (u64)-1) {
|
||||
if (start == 0 && !skip_pinned) {
|
||||
drop_all_extent_maps_fast(em_tree);
|
||||
return;
|
||||
}
|
||||
len = (u64)-1;
|
||||
} else {
|
||||
/* Make end offset exclusive for use in the loop below. */
|
||||
end++;
|
||||
}
|
||||
|
||||
/*
|
||||
* It's ok if we fail to allocate the extent maps, see the comment near
|
||||
* the bottom of the loop below. We only need two spare extent maps in
|
||||
* the worst case, where the first extent map that intersects our range
|
||||
* starts before the range and the last extent map that intersects our
|
||||
* range ends after our range (and they might be the same extent map),
|
||||
* because we need to split those two extent maps at the boundaries.
|
||||
*/
|
||||
split = alloc_extent_map();
|
||||
split2 = alloc_extent_map();
|
||||
|
||||
write_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, start, len);
|
||||
|
||||
while (em) {
|
||||
/* extent_map_end() returns exclusive value (last byte + 1). */
|
||||
const u64 em_end = extent_map_end(em);
|
||||
struct extent_map *next_em = NULL;
|
||||
u64 gen;
|
||||
unsigned long flags;
|
||||
bool modified;
|
||||
bool compressed;
|
||||
|
||||
if (em_end < end) {
|
||||
next_em = next_extent_map(em);
|
||||
if (next_em) {
|
||||
if (next_em->start < end)
|
||||
refcount_inc(&next_em->refs);
|
||||
else
|
||||
next_em = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
|
||||
start = em_end;
|
||||
if (end != (u64)-1)
|
||||
len = start + len - em_end;
|
||||
goto next;
|
||||
}
|
||||
|
||||
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
|
||||
clear_bit(EXTENT_FLAG_LOGGING, &flags);
|
||||
modified = !list_empty(&em->list);
|
||||
|
||||
/*
|
||||
* The extent map does not cross our target range, so no need to
|
||||
* split it, we can remove it directly.
|
||||
*/
|
||||
if (em->start >= start && em_end <= end)
|
||||
goto remove_em;
|
||||
|
||||
flags = em->flags;
|
||||
gen = em->generation;
|
||||
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
|
||||
if (em->start < start) {
|
||||
if (!split) {
|
||||
split = split2;
|
||||
split2 = NULL;
|
||||
if (!split)
|
||||
goto remove_em;
|
||||
}
|
||||
split->start = em->start;
|
||||
split->len = start - em->start;
|
||||
|
||||
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
|
||||
split->orig_start = em->orig_start;
|
||||
split->block_start = em->block_start;
|
||||
|
||||
if (compressed)
|
||||
split->block_len = em->block_len;
|
||||
else
|
||||
split->block_len = split->len;
|
||||
split->orig_block_len = max(split->block_len,
|
||||
em->orig_block_len);
|
||||
split->ram_bytes = em->ram_bytes;
|
||||
} else {
|
||||
split->orig_start = split->start;
|
||||
split->block_len = 0;
|
||||
split->block_start = em->block_start;
|
||||
split->orig_block_len = 0;
|
||||
split->ram_bytes = split->len;
|
||||
}
|
||||
|
||||
split->generation = gen;
|
||||
split->flags = flags;
|
||||
split->compress_type = em->compress_type;
|
||||
replace_extent_mapping(em_tree, em, split, modified);
|
||||
free_extent_map(split);
|
||||
split = split2;
|
||||
split2 = NULL;
|
||||
}
|
||||
if (em_end > end) {
|
||||
if (!split) {
|
||||
split = split2;
|
||||
split2 = NULL;
|
||||
if (!split)
|
||||
goto remove_em;
|
||||
}
|
||||
split->start = start + len;
|
||||
split->len = em_end - (start + len);
|
||||
split->block_start = em->block_start;
|
||||
split->flags = flags;
|
||||
split->compress_type = em->compress_type;
|
||||
split->generation = gen;
|
||||
|
||||
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
|
||||
split->orig_block_len = max(em->block_len,
|
||||
em->orig_block_len);
|
||||
|
||||
split->ram_bytes = em->ram_bytes;
|
||||
if (compressed) {
|
||||
split->block_len = em->block_len;
|
||||
split->orig_start = em->orig_start;
|
||||
} else {
|
||||
const u64 diff = start + len - em->start;
|
||||
|
||||
split->block_len = split->len;
|
||||
split->block_start += diff;
|
||||
split->orig_start = em->orig_start;
|
||||
}
|
||||
} else {
|
||||
split->ram_bytes = split->len;
|
||||
split->orig_start = split->start;
|
||||
split->block_len = 0;
|
||||
split->orig_block_len = 0;
|
||||
}
|
||||
|
||||
if (extent_map_in_tree(em)) {
|
||||
replace_extent_mapping(em_tree, em, split,
|
||||
modified);
|
||||
} else {
|
||||
int ret;
|
||||
|
||||
ret = add_extent_mapping(em_tree, split,
|
||||
modified);
|
||||
/* Logic error, shouldn't happen. */
|
||||
ASSERT(ret == 0);
|
||||
if (WARN_ON(ret != 0) && modified)
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
}
|
||||
free_extent_map(split);
|
||||
split = NULL;
|
||||
}
|
||||
remove_em:
|
||||
if (extent_map_in_tree(em)) {
|
||||
/*
|
||||
* If the extent map is still in the tree it means that
|
||||
* either of the following is true:
|
||||
*
|
||||
* 1) It fits entirely in our range (doesn't end beyond
|
||||
* it or starts before it);
|
||||
*
|
||||
* 2) It starts before our range and/or ends after our
|
||||
* range, and we were not able to allocate the extent
|
||||
* maps for split operations, @split and @split2.
|
||||
*
|
||||
* If we are at case 2) then we just remove the entire
|
||||
* extent map - this is fine since if anyone needs it to
|
||||
* access the subranges outside our range, will just
|
||||
* load it again from the subvolume tree's file extent
|
||||
* item. However if the extent map was in the list of
|
||||
* modified extents, then we must mark the inode for a
|
||||
* full fsync, otherwise a fast fsync will miss this
|
||||
* extent if it's new and needs to be logged.
|
||||
*/
|
||||
if ((em->start < start || em_end > end) && modified) {
|
||||
ASSERT(!split);
|
||||
btrfs_set_inode_full_sync(inode);
|
||||
}
|
||||
remove_extent_mapping(em_tree, em);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once for the tree reference (we replaced or removed the
|
||||
* extent map from the tree).
|
||||
*/
|
||||
free_extent_map(em);
|
||||
next:
|
||||
/* Once for us (for our lookup reference). */
|
||||
free_extent_map(em);
|
||||
|
||||
em = next_em;
|
||||
}
|
||||
|
||||
write_unlock(&em_tree->lock);
|
||||
|
||||
free_extent_map(split);
|
||||
free_extent_map(split2);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replace a range in the inode's extent map tree with a new extent map.
|
||||
*
|
||||
* @inode: The target inode.
|
||||
* @new_em: The new extent map to add to the inode's extent map tree.
|
||||
* @modified: Indicate if the new extent map should be added to the list of
|
||||
* modified extents (for fast fsync tracking).
|
||||
*
|
||||
* Drops all the extent maps in the inode's extent map tree that intersect the
|
||||
* range of the new extent map and adds the new extent map to the tree.
|
||||
* The caller should have locked an appropriate file range in the inode's io
|
||||
* tree before calling this function.
|
||||
*/
|
||||
int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
|
||||
struct extent_map *new_em,
|
||||
bool modified)
|
||||
{
|
||||
const u64 end = new_em->start + new_em->len - 1;
|
||||
struct extent_map_tree *tree = &inode->extent_tree;
|
||||
int ret;
|
||||
|
||||
ASSERT(!extent_map_in_tree(new_em));
|
||||
|
||||
/*
|
||||
* The caller has locked an appropriate file range in the inode's io
|
||||
* tree, but getting -EEXIST when adding the new extent map can still
|
||||
* happen in case there are extents that partially cover the range, and
|
||||
* this is due to two tasks operating on different parts of the extent.
|
||||
* See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
|
||||
* btrfs_get_extent") for an example and details.
|
||||
*/
|
||||
do {
|
||||
btrfs_drop_extent_map_range(inode, new_em->start, end, false);
|
||||
write_lock(&tree->lock);
|
||||
ret = add_extent_mapping(tree, new_em, modified);
|
||||
write_unlock(&tree->lock);
|
||||
} while (ret == -EEXIST);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -63,6 +63,8 @@ struct extent_map_tree {
|
||||
rwlock_t lock;
|
||||
};
|
||||
|
||||
struct btrfs_inode;
|
||||
|
||||
static inline int extent_map_in_tree(const struct extent_map *em)
|
||||
{
|
||||
return !RB_EMPTY_NODE(&em->rb_node);
|
||||
@ -104,5 +106,11 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
|
||||
int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
|
||||
struct extent_map_tree *em_tree,
|
||||
struct extent_map **em_in, u64 start, u64 len);
|
||||
void btrfs_drop_extent_map_range(struct btrfs_inode *inode,
|
||||
u64 start, u64 end,
|
||||
bool skip_pinned);
|
||||
int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
|
||||
struct extent_map *new_em,
|
||||
bool modified);
|
||||
|
||||
#endif
|
||||
|
@ -118,7 +118,7 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
|
||||
return 0;
|
||||
return clear_extent_bit(&inode->file_extent_tree, start,
|
||||
start + len - 1, EXTENT_DIRTY, 0, 0, NULL);
|
||||
start + len - 1, EXTENT_DIRTY, NULL);
|
||||
}
|
||||
|
||||
static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
|
||||
@ -129,12 +129,20 @@ static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
|
||||
return ncsums * fs_info->sectorsize;
|
||||
}
|
||||
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
/*
|
||||
* Calculate the total size needed to allocate for an ordered sum structure
|
||||
* spanning @bytes in the file.
|
||||
*/
|
||||
static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes)
|
||||
{
|
||||
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
|
||||
|
||||
return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size;
|
||||
}
|
||||
|
||||
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 objectid, u64 pos,
|
||||
u64 disk_offset, u64 disk_num_bytes,
|
||||
u64 num_bytes, u64 offset, u64 ram_bytes,
|
||||
u8 compression, u8 encryption, u16 other_encoding)
|
||||
u64 objectid, u64 pos, u64 num_bytes)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_file_extent_item *item;
|
||||
@ -157,16 +165,16 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
leaf = path->nodes[0];
|
||||
item = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
|
||||
btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
|
||||
btrfs_set_file_extent_offset(leaf, item, offset);
|
||||
btrfs_set_file_extent_disk_bytenr(leaf, item, 0);
|
||||
btrfs_set_file_extent_disk_num_bytes(leaf, item, 0);
|
||||
btrfs_set_file_extent_offset(leaf, item, 0);
|
||||
btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
|
||||
btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
|
||||
btrfs_set_file_extent_ram_bytes(leaf, item, num_bytes);
|
||||
btrfs_set_file_extent_generation(leaf, item, trans->transid);
|
||||
btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
|
||||
btrfs_set_file_extent_compression(leaf, item, compression);
|
||||
btrfs_set_file_extent_encryption(leaf, item, encryption);
|
||||
btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
|
||||
btrfs_set_file_extent_compression(leaf, item, 0);
|
||||
btrfs_set_file_extent_encryption(leaf, item, 0);
|
||||
btrfs_set_file_extent_other_encoding(leaf, item, 0);
|
||||
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
out:
|
||||
@ -503,7 +511,8 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
|
||||
}
|
||||
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct list_head *list, int search_commit)
|
||||
struct list_head *list, int search_commit,
|
||||
bool nowait)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
@ -525,6 +534,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
path->nowait = nowait;
|
||||
if (search_commit) {
|
||||
path->skip_locking = 1;
|
||||
path->reada = READA_FORWARD;
|
||||
|
807
fs/btrfs/file.c
807
fs/btrfs/file.c
File diff suppressed because it is too large
Load Diff
@ -48,6 +48,24 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
|
||||
struct btrfs_free_space *info, u64 offset,
|
||||
u64 bytes, bool update_stats);
|
||||
|
||||
static void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
|
||||
{
|
||||
struct btrfs_free_space *info;
|
||||
struct rb_node *node;
|
||||
|
||||
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
|
||||
info = rb_entry(node, struct btrfs_free_space, offset_index);
|
||||
if (!info->bitmap) {
|
||||
unlink_free_space(ctl, info, true);
|
||||
kmem_cache_free(btrfs_free_space_cachep, info);
|
||||
} else {
|
||||
free_bitmap(ctl, info);
|
||||
}
|
||||
|
||||
cond_resched_lock(&ctl->tree_lock);
|
||||
}
|
||||
}
|
||||
|
||||
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
u64 offset)
|
||||
@ -126,10 +144,8 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
||||
block_group->disk_cache_state = BTRFS_DC_CLEAR;
|
||||
}
|
||||
|
||||
if (!block_group->iref) {
|
||||
if (!test_and_set_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags))
|
||||
block_group->inode = igrab(inode);
|
||||
block_group->iref = 1;
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
return inode;
|
||||
@ -241,8 +257,7 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
|
||||
clear_nlink(inode);
|
||||
/* One for the block groups ref */
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->iref) {
|
||||
block_group->iref = 0;
|
||||
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags)) {
|
||||
block_group->inode = NULL;
|
||||
spin_unlock(&block_group->lock);
|
||||
iput(inode);
|
||||
@ -333,8 +348,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
|
||||
btrfs_i_size_write(inode, 0);
|
||||
truncate_pagecache(vfs_inode, 0);
|
||||
|
||||
lock_extent_bits(&inode->io_tree, 0, (u64)-1, &cached_state);
|
||||
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
|
||||
lock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
|
||||
btrfs_drop_extent_map_range(inode, 0, (u64)-1, false);
|
||||
|
||||
/*
|
||||
* We skip the throttling logic for free space cache inodes, so we don't
|
||||
@ -345,7 +360,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
|
||||
inode_sub_bytes(&inode->vfs_inode, control.sub_bytes);
|
||||
btrfs_inode_safe_disk_i_size_write(inode, control.last_size);
|
||||
|
||||
unlock_extent_cached(&inode->io_tree, 0, (u64)-1, &cached_state);
|
||||
unlock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
|
||||
if (ret)
|
||||
goto fail;
|
||||
|
||||
@ -693,6 +708,12 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
|
||||
|
||||
max_bitmaps = max_t(u64, max_bitmaps, 1);
|
||||
|
||||
if (ctl->total_bitmaps > max_bitmaps)
|
||||
btrfs_err(block_group->fs_info,
|
||||
"invalid free space control: bg start=%llu len=%llu total_bitmaps=%u unit=%u max_bitmaps=%llu bytes_per_bg=%llu",
|
||||
block_group->start, block_group->length,
|
||||
ctl->total_bitmaps, ctl->unit, max_bitmaps,
|
||||
bytes_per_bg);
|
||||
ASSERT(ctl->total_bitmaps <= max_bitmaps);
|
||||
|
||||
/*
|
||||
@ -875,7 +896,10 @@ out:
|
||||
return ret;
|
||||
free_cache:
|
||||
io_ctl_drop_pages(&io_ctl);
|
||||
|
||||
spin_lock(&ctl->tree_lock);
|
||||
__btrfs_remove_free_space_cache(ctl);
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -914,6 +938,8 @@ static int copy_free_space_cache(struct btrfs_block_group *block_group,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct lock_class_key btrfs_free_space_inode_key;
|
||||
|
||||
int load_free_space_cache(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
@ -983,6 +1009,14 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
/*
|
||||
* Reinitialize the class of struct inode's mapping->invalidate_lock for
|
||||
* free space inodes to prevent false positives related to locks for normal
|
||||
* inodes.
|
||||
*/
|
||||
lockdep_set_class(&(&inode->i_data)->invalidate_lock,
|
||||
&btrfs_free_space_inode_key);
|
||||
|
||||
ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
|
||||
path, block_group->start);
|
||||
btrfs_free_path(path);
|
||||
@ -1001,7 +1035,13 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
|
||||
if (ret == 0)
|
||||
ret = 1;
|
||||
} else {
|
||||
/*
|
||||
* We need to call the _locked variant so we don't try to update
|
||||
* the discard counters.
|
||||
*/
|
||||
spin_lock(&tmp_ctl.tree_lock);
|
||||
__btrfs_remove_free_space_cache(&tmp_ctl);
|
||||
spin_unlock(&tmp_ctl.tree_lock);
|
||||
btrfs_warn(fs_info,
|
||||
"block group %llu has wrong amount of free space",
|
||||
block_group->start);
|
||||
@ -1123,7 +1163,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
|
||||
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
|
||||
if (ret < 0) {
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
|
||||
EXTENT_DELALLOC, 0, 0, NULL);
|
||||
EXTENT_DELALLOC, NULL);
|
||||
goto fail;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
@ -1135,8 +1175,8 @@ update_cache_item(struct btrfs_trans_handle *trans,
|
||||
if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
|
||||
found_key.offset != offset) {
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
|
||||
inode->i_size - 1, EXTENT_DELALLOC, 0,
|
||||
0, NULL);
|
||||
inode->i_size - 1, EXTENT_DELALLOC,
|
||||
NULL);
|
||||
btrfs_release_path(path);
|
||||
goto fail;
|
||||
}
|
||||
@ -1232,7 +1272,7 @@ static int flush_dirty_cache(struct inode *inode)
|
||||
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
||||
if (ret)
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
|
||||
EXTENT_DELALLOC, 0, 0, NULL);
|
||||
EXTENT_DELALLOC, NULL);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1252,8 +1292,8 @@ cleanup_write_cache_enospc(struct inode *inode,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
io_ctl_drop_pages(io_ctl);
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
|
||||
i_size_read(inode) - 1, cached_state);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
|
||||
cached_state);
|
||||
}
|
||||
|
||||
static int __btrfs_wait_cache_io(struct btrfs_root *root,
|
||||
@ -1378,8 +1418,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
|
||||
&cached_state);
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
|
||||
&cached_state);
|
||||
|
||||
io_ctl_set_generation(io_ctl, trans->transid);
|
||||
|
||||
@ -1434,8 +1474,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
|
||||
io_ctl_drop_pages(io_ctl);
|
||||
io_ctl_free(io_ctl);
|
||||
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
|
||||
i_size_read(inode) - 1, &cached_state);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
|
||||
&cached_state);
|
||||
|
||||
/*
|
||||
* at this point the pages are under IO and we're happy,
|
||||
@ -2860,7 +2900,8 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
||||
if (btrfs_is_zoned(fs_info)) {
|
||||
btrfs_info(fs_info, "free space %llu active %d",
|
||||
block_group->zone_capacity - block_group->alloc_offset,
|
||||
block_group->zone_is_active);
|
||||
test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
&block_group->runtime_flags));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2964,34 +3005,6 @@ static void __btrfs_return_cluster_to_free_space(
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
|
||||
static void __btrfs_remove_free_space_cache_locked(
|
||||
struct btrfs_free_space_ctl *ctl)
|
||||
{
|
||||
struct btrfs_free_space *info;
|
||||
struct rb_node *node;
|
||||
|
||||
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
|
||||
info = rb_entry(node, struct btrfs_free_space, offset_index);
|
||||
if (!info->bitmap) {
|
||||
unlink_free_space(ctl, info, true);
|
||||
kmem_cache_free(btrfs_free_space_cachep, info);
|
||||
} else {
|
||||
free_bitmap(ctl, info);
|
||||
}
|
||||
|
||||
cond_resched_lock(&ctl->tree_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
|
||||
{
|
||||
spin_lock(&ctl->tree_lock);
|
||||
__btrfs_remove_free_space_cache_locked(ctl);
|
||||
if (ctl->block_group)
|
||||
btrfs_discard_update_discardable(ctl->block_group);
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
}
|
||||
|
||||
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
||||
@ -3009,7 +3022,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
|
||||
|
||||
cond_resched_lock(&ctl->tree_lock);
|
||||
}
|
||||
__btrfs_remove_free_space_cache_locked(ctl);
|
||||
__btrfs_remove_free_space_cache(ctl);
|
||||
btrfs_discard_update_discardable(block_group);
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
|
||||
@ -3992,7 +4005,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
|
||||
*trimmed = 0;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->removed) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -4022,7 +4035,7 @@ int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
|
||||
*trimmed = 0;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->removed) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -4044,7 +4057,7 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
|
||||
*trimmed = 0;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->removed) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return 0;
|
||||
}
|
||||
|
@ -113,7 +113,6 @@ int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
|
||||
u64 bytenr, u64 size);
|
||||
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
|
||||
u64 bytenr, u64 size);
|
||||
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
|
||||
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
|
||||
bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
|
||||
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
||||
|
@ -1453,8 +1453,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
|
||||
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
|
||||
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
|
||||
|
||||
caching_ctl->progress = key.objectid;
|
||||
|
||||
offset = key.objectid;
|
||||
while (offset < key.objectid + key.offset) {
|
||||
bit = free_space_test_bit(block_group, path, offset);
|
||||
@ -1490,8 +1488,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
|
||||
goto out;
|
||||
}
|
||||
|
||||
caching_ctl->progress = (u64)-1;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
@ -1531,8 +1527,6 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
|
||||
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
|
||||
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
|
||||
|
||||
caching_ctl->progress = key.objectid;
|
||||
|
||||
total_found += add_new_free_space(block_group, key.objectid,
|
||||
key.objectid + key.offset);
|
||||
if (total_found > CACHING_CTL_WAKE_UP) {
|
||||
@ -1552,8 +1546,6 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
|
||||
goto out;
|
||||
}
|
||||
|
||||
caching_ctl->progress = (u64)-1;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
|
516
fs/btrfs/inode.c
516
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
@ -1218,10 +1218,10 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
|
||||
|
||||
/* get the big lock and read metadata off disk */
|
||||
if (!locked)
|
||||
lock_extent_bits(io_tree, start, end, &cached);
|
||||
lock_extent(io_tree, start, end, &cached);
|
||||
em = defrag_get_extent(BTRFS_I(inode), start, newer_than);
|
||||
if (!locked)
|
||||
unlock_extent_cached(io_tree, start, end, &cached);
|
||||
unlock_extent(io_tree, start, end, &cached);
|
||||
|
||||
if (IS_ERR(em))
|
||||
return NULL;
|
||||
@ -1333,10 +1333,10 @@ again:
|
||||
while (1) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
|
||||
lock_extent(&inode->io_tree, page_start, page_end, &cached_state);
|
||||
ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
|
||||
unlock_extent_cached(&inode->io_tree, page_start, page_end,
|
||||
&cached_state);
|
||||
unlock_extent(&inode->io_tree, page_start, page_end,
|
||||
&cached_state);
|
||||
if (!ordered)
|
||||
break;
|
||||
|
||||
@ -1616,7 +1616,7 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
|
||||
return ret;
|
||||
clear_extent_bit(&inode->io_tree, start, start + len - 1,
|
||||
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
|
||||
EXTENT_DEFRAG, 0, 0, cached_state);
|
||||
EXTENT_DEFRAG, cached_state);
|
||||
set_extent_defrag(&inode->io_tree, start, start + len - 1, cached_state);
|
||||
|
||||
/* Update the page status */
|
||||
@ -1666,9 +1666,9 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
|
||||
wait_on_page_writeback(pages[i]);
|
||||
|
||||
/* Lock the pages range */
|
||||
lock_extent_bits(&inode->io_tree, start_index << PAGE_SHIFT,
|
||||
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
|
||||
&cached_state);
|
||||
lock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
|
||||
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
|
||||
&cached_state);
|
||||
/*
|
||||
* Now we have a consistent view about the extent map, re-check
|
||||
* which range really needs to be defragged.
|
||||
@ -1694,9 +1694,9 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
|
||||
kfree(entry);
|
||||
}
|
||||
unlock_extent:
|
||||
unlock_extent_cached(&inode->io_tree, start_index << PAGE_SHIFT,
|
||||
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
|
||||
&cached_state);
|
||||
unlock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
|
||||
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
|
||||
&cached_state);
|
||||
free_pages:
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
if (pages[i]) {
|
||||
|
@ -285,6 +285,31 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
|
||||
return eb;
|
||||
}
|
||||
|
||||
/*
|
||||
* Loop around taking references on and locking the root node of the tree in
|
||||
* nowait mode until we end up with a lock on the root node or returning to
|
||||
* avoid blocking.
|
||||
*
|
||||
* Return: root extent buffer with read lock held or -EAGAIN.
|
||||
*/
|
||||
struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root)
|
||||
{
|
||||
struct extent_buffer *eb;
|
||||
|
||||
while (1) {
|
||||
eb = btrfs_root_node(root);
|
||||
if (!btrfs_try_tree_read_lock(eb)) {
|
||||
free_extent_buffer(eb);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
if (eb == root->node)
|
||||
break;
|
||||
btrfs_tree_read_unlock(eb);
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
return eb;
|
||||
}
|
||||
|
||||
/*
|
||||
* DREW locks
|
||||
* ==========
|
||||
|
@ -94,6 +94,7 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb);
|
||||
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
|
||||
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb)
|
||||
|
@ -88,6 +88,41 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search @root from an entry that starts or comes after @bytenr.
|
||||
*
|
||||
* @root: the root to search.
|
||||
* @bytenr: bytenr to search from.
|
||||
*
|
||||
* Return the rb_node that start at or after @bytenr. If there is no entry at
|
||||
* or after @bytner return NULL.
|
||||
*/
|
||||
static inline struct rb_node *rb_simple_search_first(struct rb_root *root,
|
||||
u64 bytenr)
|
||||
{
|
||||
struct rb_node *node = root->rb_node, *ret = NULL;
|
||||
struct rb_simple_node *entry, *ret_entry = NULL;
|
||||
|
||||
while (node) {
|
||||
entry = rb_entry(node, struct rb_simple_node, rb_node);
|
||||
|
||||
if (bytenr < entry->bytenr) {
|
||||
if (!ret || entry->bytenr < ret_entry->bytenr) {
|
||||
ret = node;
|
||||
ret_entry = entry;
|
||||
}
|
||||
|
||||
node = node->rb_left;
|
||||
} else if (bytenr > entry->bytenr) {
|
||||
node = node->rb_right;
|
||||
} else {
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
|
||||
struct rb_node *node)
|
||||
{
|
||||
|
@ -524,7 +524,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct rb_node *node;
|
||||
bool pending;
|
||||
bool freespace_inode;
|
||||
|
||||
/*
|
||||
* If this is a free space inode the thread has not acquired the ordered
|
||||
* extents lockdep map.
|
||||
*/
|
||||
freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
|
||||
|
||||
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
|
||||
/* This is paired with btrfs_add_ordered_extent. */
|
||||
spin_lock(&btrfs_inode->lock);
|
||||
btrfs_mod_outstanding_extents(btrfs_inode, -1);
|
||||
@ -580,6 +588,8 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
|
||||
}
|
||||
}
|
||||
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered);
|
||||
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
list_del_init(&entry->root_extent_list);
|
||||
root->nr_ordered_extents--;
|
||||
@ -594,6 +604,8 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
|
||||
}
|
||||
spin_unlock(&root->ordered_extent_lock);
|
||||
wake_up(&entry->wait);
|
||||
if (!freespace_inode)
|
||||
btrfs_lockdep_release(fs_info, btrfs_ordered_extent);
|
||||
}
|
||||
|
||||
static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
|
||||
@ -712,9 +724,16 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
|
||||
u64 start = entry->file_offset;
|
||||
u64 end = start + entry->num_bytes - 1;
|
||||
struct btrfs_inode *inode = BTRFS_I(entry->inode);
|
||||
bool freespace_inode;
|
||||
|
||||
trace_btrfs_ordered_extent_start(inode, entry);
|
||||
|
||||
/*
|
||||
* If this is a free space inode do not take the ordered extents lockdep
|
||||
* map.
|
||||
*/
|
||||
freespace_inode = btrfs_is_free_space_inode(inode);
|
||||
|
||||
/*
|
||||
* pages in the range can be dirty, clean or writeback. We
|
||||
* start IO on any dirty ones so the wait doesn't stall waiting
|
||||
@ -723,6 +742,8 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
|
||||
if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
|
||||
filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end);
|
||||
if (wait) {
|
||||
if (!freespace_inode)
|
||||
btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
|
||||
wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
|
||||
&entry->flags));
|
||||
}
|
||||
@ -1022,7 +1043,7 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
cachedp = cached_state;
|
||||
|
||||
while (1) {
|
||||
lock_extent_bits(&inode->io_tree, start, end, cachedp);
|
||||
lock_extent(&inode->io_tree, start, end, cachedp);
|
||||
ordered = btrfs_lookup_ordered_range(inode, start,
|
||||
end - start + 1);
|
||||
if (!ordered) {
|
||||
@ -1035,12 +1056,37 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
refcount_dec(&cache->refs);
|
||||
break;
|
||||
}
|
||||
unlock_extent_cached(&inode->io_tree, start, end, cachedp);
|
||||
unlock_extent(&inode->io_tree, start, end, cachedp);
|
||||
btrfs_start_ordered_extent(ordered, 1);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock the passed range and ensure all pending ordered extents in it are run
|
||||
* to completion in nowait mode.
|
||||
*
|
||||
* Return true if btrfs_lock_ordered_range does not return any extents,
|
||||
* otherwise false.
|
||||
*/
|
||||
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end)
|
||||
{
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
if (!try_lock_extent(&inode->io_tree, start, end))
|
||||
return false;
|
||||
|
||||
ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1);
|
||||
if (!ordered)
|
||||
return true;
|
||||
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent(&inode->io_tree, start, end, NULL);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
|
||||
u64 len)
|
||||
{
|
||||
|
@ -160,18 +160,6 @@ struct btrfs_ordered_extent {
|
||||
struct block_device *bdev;
|
||||
};
|
||||
|
||||
/*
|
||||
* calculates the total size you need to allocate for an ordered sum
|
||||
* structure spanning 'bytes' in the file
|
||||
*/
|
||||
static inline int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info,
|
||||
unsigned long bytes)
|
||||
{
|
||||
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
|
||||
|
||||
return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size;
|
||||
}
|
||||
|
||||
static inline void
|
||||
btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
|
||||
{
|
||||
@ -218,6 +206,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
struct extent_state **cached_state);
|
||||
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
|
||||
u64 post);
|
||||
int __init ordered_data_init(void);
|
||||
|
@ -270,11 +270,8 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
u64 ino = btrfs_ino(BTRFS_I(inode));
|
||||
int ret;
|
||||
|
||||
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
|
||||
|
||||
return ret;
|
||||
return iterate_object_props(root, path, ino, inode_prop_iterator, inode);
|
||||
}
|
||||
|
||||
static int prop_compression_validate(const struct btrfs_inode *inode,
|
||||
|
@ -275,7 +275,7 @@ static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *p
|
||||
}
|
||||
|
||||
/*
|
||||
* Add relation specified by two qgoup ids.
|
||||
* Add relation specified by two qgroup ids.
|
||||
*
|
||||
* Must be called with qgroup_lock held.
|
||||
*
|
||||
@ -333,6 +333,13 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
}
|
||||
#endif
|
||||
|
||||
static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
|
||||
BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
|
||||
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
|
||||
}
|
||||
|
||||
/*
|
||||
* The full config is read in one go, only called from open_ctree()
|
||||
* It doesn't use any locking, as at this point we're still single-threaded
|
||||
@ -401,7 +408,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
if (btrfs_qgroup_status_generation(l, ptr) !=
|
||||
fs_info->generation) {
|
||||
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
btrfs_err(fs_info,
|
||||
"qgroup generation mismatch, marked as inconsistent");
|
||||
}
|
||||
@ -419,7 +426,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
|
||||
if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
|
||||
(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
|
||||
btrfs_err(fs_info, "inconsistent qgroup config");
|
||||
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
}
|
||||
if (!qgroup) {
|
||||
qgroup = add_qgroup_rb(fs_info, found_key.offset);
|
||||
@ -878,7 +885,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
|
||||
l = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
|
||||
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
|
||||
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags &
|
||||
BTRFS_QGROUP_STATUS_FLAGS_MASK);
|
||||
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
|
||||
btrfs_set_qgroup_status_rescan(l, ptr,
|
||||
fs_info->qgroup_rescan_progress.objectid);
|
||||
@ -1052,7 +1060,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
|
||||
btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
|
||||
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
|
||||
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
|
||||
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags &
|
||||
BTRFS_QGROUP_STATUS_FLAGS_MASK);
|
||||
btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
|
||||
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
@ -1174,6 +1183,21 @@ out_add_root:
|
||||
fs_info->qgroup_rescan_running = true;
|
||||
btrfs_queue_work(fs_info->qgroup_rescan_workers,
|
||||
&fs_info->qgroup_rescan_work);
|
||||
} else {
|
||||
/*
|
||||
* We have set both BTRFS_FS_QUOTA_ENABLED and
|
||||
* BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with
|
||||
* -EINPROGRESS. That can happen because someone started the
|
||||
* rescan worker by calling quota rescan ioctl before we
|
||||
* attempted to initialize the rescan worker. Failure due to
|
||||
* quotas disabled in the meanwhile is not possible, because
|
||||
* we are holding a write lock on fs_info->subvol_sem, which
|
||||
* is also acquired when disabling quotas.
|
||||
* Ignore such error, and any other error would need to undo
|
||||
* everything we did in the transaction we just committed.
|
||||
*/
|
||||
ASSERT(ret == -EINPROGRESS);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
out_free_path:
|
||||
@ -1255,6 +1279,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
|
||||
quota_root = fs_info->quota_root;
|
||||
fs_info->quota_root = NULL;
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
|
||||
fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
btrfs_free_qgroup_config(fs_info);
|
||||
@ -1717,7 +1742,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
||||
|
||||
ret = update_qgroup_limit_item(trans, qgroup);
|
||||
if (ret) {
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
btrfs_info(fs_info, "unable to update quota limit for %llu",
|
||||
qgroupid);
|
||||
}
|
||||
@ -1790,10 +1815,13 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
ASSERT(trans != NULL);
|
||||
|
||||
if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
|
||||
return 0;
|
||||
|
||||
ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
|
||||
true);
|
||||
if (ret < 0) {
|
||||
trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(trans->fs_info);
|
||||
btrfs_warn(trans->fs_info,
|
||||
"error accounting new delayed refs extent (err code: %d), quota inconsistent",
|
||||
ret);
|
||||
@ -2269,7 +2297,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
|
||||
out:
|
||||
btrfs_free_path(dst_path);
|
||||
if (ret < 0)
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2280,6 +2308,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
int ret = 0;
|
||||
int level;
|
||||
u8 drop_subptree_thres;
|
||||
struct extent_buffer *eb = root_eb;
|
||||
struct btrfs_path *path = NULL;
|
||||
|
||||
@ -2289,6 +2318,23 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
return 0;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
drop_subptree_thres = fs_info->qgroup_drop_subtree_thres;
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
/*
|
||||
* This function only gets called for snapshot drop, if we hit a high
|
||||
* node here, it means we are going to change ownership for quite a lot
|
||||
* of extents, which will greatly slow down btrfs_commit_transaction().
|
||||
*
|
||||
* So here if we find a high tree here, we just skip the accounting and
|
||||
* mark qgroup inconsistent.
|
||||
*/
|
||||
if (root_level >= drop_subptree_thres) {
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!extent_buffer_uptodate(root_eb)) {
|
||||
ret = btrfs_read_extent_buffer(root_eb, root_gen, root_level, NULL);
|
||||
if (ret)
|
||||
@ -2604,7 +2650,8 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
* If quotas get disabled meanwhile, the resources need to be freed and
|
||||
* we can't just exit here.
|
||||
*/
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
|
||||
fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
|
||||
goto out_free;
|
||||
|
||||
if (new_roots) {
|
||||
@ -2700,7 +2747,8 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
|
||||
num_dirty_extents++;
|
||||
trace_btrfs_qgroup_account_extents(fs_info, record);
|
||||
|
||||
if (!ret) {
|
||||
if (!ret && !(fs_info->qgroup_flags &
|
||||
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) {
|
||||
/*
|
||||
* Old roots should be searched when inserting qgroup
|
||||
* extent record
|
||||
@ -2773,12 +2821,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
ret = update_qgroup_info_item(trans, qgroup);
|
||||
if (ret)
|
||||
fs_info->qgroup_flags |=
|
||||
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
ret = update_qgroup_limit_item(trans, qgroup);
|
||||
if (ret)
|
||||
fs_info->qgroup_flags |=
|
||||
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
}
|
||||
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
@ -2789,7 +2835,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
|
||||
|
||||
ret = update_qgroup_status_item(trans);
|
||||
if (ret)
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -2907,7 +2953,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
|
||||
ret = update_qgroup_limit_item(trans, dstgroup);
|
||||
if (ret) {
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
btrfs_info(fs_info,
|
||||
"unable to update quota limit for %llu",
|
||||
dstgroup->qgroupid);
|
||||
@ -3013,7 +3059,7 @@ out:
|
||||
if (!committing)
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
if (need_rescan)
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3286,7 +3332,8 @@ static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return btrfs_fs_closing(fs_info) ||
|
||||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
|
||||
!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
|
||||
!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
|
||||
fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
|
||||
}
|
||||
|
||||
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
|
||||
@ -3351,7 +3398,8 @@ out:
|
||||
}
|
||||
|
||||
mutex_lock(&fs_info->qgroup_rescan_lock);
|
||||
if (!stopped)
|
||||
if (!stopped ||
|
||||
fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN)
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
||||
if (trans) {
|
||||
ret = update_qgroup_status_item(trans);
|
||||
@ -3362,6 +3410,7 @@ out:
|
||||
}
|
||||
}
|
||||
fs_info->qgroup_rescan_running = false;
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
|
||||
complete_all(&fs_info->qgroup_rescan_completion);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
|
||||
@ -3372,6 +3421,8 @@ out:
|
||||
|
||||
if (stopped) {
|
||||
btrfs_info(fs_info, "qgroup scan paused");
|
||||
} else if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) {
|
||||
btrfs_info(fs_info, "qgroup scan cancelled");
|
||||
} else if (err >= 0) {
|
||||
btrfs_info(fs_info, "qgroup scan completed%s",
|
||||
err > 0 ? " (inconsistency flag cleared)" : "");
|
||||
@ -3434,6 +3485,8 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
|
||||
|
||||
memset(&fs_info->qgroup_rescan_progress, 0,
|
||||
sizeof(fs_info->qgroup_rescan_progress));
|
||||
fs_info->qgroup_flags &= ~(BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
|
||||
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
|
||||
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
|
||||
init_completion(&fs_info->qgroup_rescan_completion);
|
||||
mutex_unlock(&fs_info->qgroup_rescan_lock);
|
||||
@ -4231,8 +4284,7 @@ out_unlock:
|
||||
spin_unlock(&blocks->lock);
|
||||
out:
|
||||
if (ret < 0)
|
||||
fs_info->qgroup_flags |=
|
||||
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4319,7 +4371,7 @@ out:
|
||||
btrfs_err_rl(fs_info,
|
||||
"failed to account subtree at bytenr %llu: %d",
|
||||
subvol_eb->start, ret);
|
||||
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
@ -100,6 +100,9 @@
|
||||
* subtree rescan for them.
|
||||
*/
|
||||
|
||||
#define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1UL << 3)
|
||||
#define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING (1UL << 4)
|
||||
|
||||
/*
|
||||
* Record a dirty extent, and info qgroup to update quota on it
|
||||
* TODO: Use kmem cache to alloc it.
|
||||
|
@ -275,7 +275,6 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
|
||||
/* Also inherit the bitmaps from @victim. */
|
||||
bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
|
||||
dest->stripe_nsectors);
|
||||
dest->generic_bio_cnt += victim->generic_bio_cnt;
|
||||
bio_list_init(&victim->bio_list);
|
||||
}
|
||||
|
||||
@ -814,8 +813,6 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
|
||||
struct bio *cur = bio_list_get(&rbio->bio_list);
|
||||
struct bio *extra;
|
||||
|
||||
if (rbio->generic_bio_cnt)
|
||||
btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
|
||||
/*
|
||||
* Clear the data bitmap, as the rbio may be cached for later usage.
|
||||
* do this before before unlock_stripe() so there will be no new bio
|
||||
@ -946,6 +943,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
|
||||
spin_lock_init(&rbio->bio_list_lock);
|
||||
INIT_LIST_HEAD(&rbio->stripe_cache);
|
||||
INIT_LIST_HEAD(&rbio->hash_list);
|
||||
btrfs_get_bioc(bioc);
|
||||
rbio->bioc = bioc;
|
||||
rbio->nr_pages = num_pages;
|
||||
rbio->nr_sectors = num_sectors;
|
||||
@ -1813,15 +1811,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
|
||||
|
||||
rbio = alloc_rbio(fs_info, bioc);
|
||||
if (IS_ERR(rbio)) {
|
||||
btrfs_put_bioc(bioc);
|
||||
ret = PTR_ERR(rbio);
|
||||
goto out_dec_counter;
|
||||
goto fail;
|
||||
}
|
||||
rbio->operation = BTRFS_RBIO_WRITE;
|
||||
rbio_add_bio(rbio, bio);
|
||||
|
||||
rbio->generic_bio_cnt = 1;
|
||||
|
||||
/*
|
||||
* don't plug on full rbios, just get them out the door
|
||||
* as quickly as we can
|
||||
@ -1829,7 +1824,7 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
|
||||
if (rbio_is_full(rbio)) {
|
||||
ret = full_stripe_write(rbio);
|
||||
if (ret)
|
||||
goto out_dec_counter;
|
||||
goto fail;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1844,13 +1839,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
|
||||
} else {
|
||||
ret = __raid56_parity_write(rbio);
|
||||
if (ret)
|
||||
goto out_dec_counter;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
out_dec_counter:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
fail:
|
||||
bio->bi_status = errno_to_blk_status(ret);
|
||||
bio_endio(bio);
|
||||
}
|
||||
@ -2198,18 +2192,11 @@ cleanup:
|
||||
* of the drive.
|
||||
*/
|
||||
void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
int mirror_num, bool generic_io)
|
||||
int mirror_num)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
|
||||
if (generic_io) {
|
||||
ASSERT(bioc->mirror_num == mirror_num);
|
||||
btrfs_bio(bio)->mirror_num = mirror_num;
|
||||
} else {
|
||||
btrfs_get_bioc(bioc);
|
||||
}
|
||||
|
||||
rbio = alloc_rbio(fs_info, bioc);
|
||||
if (IS_ERR(rbio)) {
|
||||
bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
|
||||
@ -2225,14 +2212,11 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
|
||||
__func__, bio->bi_iter.bi_sector << 9,
|
||||
(u64)bio->bi_iter.bi_size, bioc->map_type);
|
||||
kfree(rbio);
|
||||
__free_raid_bio(rbio);
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
goto out_end_bio;
|
||||
}
|
||||
|
||||
if (generic_io)
|
||||
rbio->generic_bio_cnt = 1;
|
||||
|
||||
/*
|
||||
* Loop retry:
|
||||
* for 'mirror == 2', reconstruct from all other stripes.
|
||||
@ -2261,8 +2245,6 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
return;
|
||||
|
||||
out_end_bio:
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
btrfs_put_bioc(bioc);
|
||||
bio_endio(bio);
|
||||
}
|
||||
|
||||
@ -2326,13 +2308,6 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
|
||||
ASSERT(i < rbio->real_stripes);
|
||||
|
||||
bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
|
||||
|
||||
/*
|
||||
* We have already increased bio_counter when getting bioc, record it
|
||||
* so we can free it at rbio_orig_end_io().
|
||||
*/
|
||||
rbio->generic_bio_cnt = 1;
|
||||
|
||||
return rbio;
|
||||
}
|
||||
|
||||
@ -2772,12 +2747,6 @@ raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* When we get bioc, we have already increased bio_counter, record it
|
||||
* so we can free it at rbio_orig_end_io()
|
||||
*/
|
||||
rbio->generic_bio_cnt = 1;
|
||||
|
||||
return rbio;
|
||||
}
|
||||
|
||||
|
@ -89,8 +89,6 @@ struct btrfs_raid_bio {
|
||||
*/
|
||||
int bio_list_bytes;
|
||||
|
||||
int generic_bio_cnt;
|
||||
|
||||
refcount_t refs;
|
||||
|
||||
atomic_t stripes_pending;
|
||||
@ -166,7 +164,7 @@ static inline int nr_data_stripes(const struct map_lookup *map)
|
||||
struct btrfs_device;
|
||||
|
||||
void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
|
||||
int mirror_num, bool generic_io);
|
||||
int mirror_num);
|
||||
void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
|
||||
|
||||
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
|
||||
|
@ -92,7 +92,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
|
||||
|
||||
clear_extent_bit(&inode->io_tree, file_offset, range_end,
|
||||
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
|
||||
0, 0, NULL);
|
||||
NULL);
|
||||
ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
@ -615,8 +615,8 @@ out:
|
||||
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len)
|
||||
{
|
||||
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
|
||||
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
|
||||
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL);
|
||||
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL);
|
||||
}
|
||||
|
||||
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
@ -634,8 +634,8 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
swap(range1_end, range2_end);
|
||||
}
|
||||
|
||||
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end);
|
||||
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end);
|
||||
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL);
|
||||
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL);
|
||||
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end);
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end);
|
||||
|
@ -1124,10 +1124,10 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
if (!ret)
|
||||
continue;
|
||||
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode),
|
||||
key.offset, end, 1);
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode),
|
||||
key.offset, end, true);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree,
|
||||
key.offset, end);
|
||||
key.offset, end, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1566,9 +1566,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,
|
||||
}
|
||||
|
||||
/* the lock_extent waits for read_folio to complete */
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, true);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -2869,13 +2869,13 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
||||
else
|
||||
end = cluster->end - offset;
|
||||
|
||||
lock_extent(&inode->io_tree, start, end);
|
||||
lock_extent(&inode->io_tree, start, end, NULL);
|
||||
num_bytes = end + 1 - start;
|
||||
ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
|
||||
num_bytes, num_bytes,
|
||||
end + 1, &alloc_hint);
|
||||
cur_offset = end + 1;
|
||||
unlock_extent(&inode->io_tree, start, end);
|
||||
unlock_extent(&inode->io_tree, start, end, NULL);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@ -2890,7 +2890,6 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
||||
static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
|
||||
u64 start, u64 end, u64 block_start)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
struct extent_map *em;
|
||||
int ret = 0;
|
||||
|
||||
@ -2904,18 +2903,11 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod
|
||||
em->block_start = block_start;
|
||||
set_bit(EXTENT_FLAG_PINNED, &em->flags);
|
||||
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
|
||||
while (1) {
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret != -EEXIST) {
|
||||
free_extent_map(em);
|
||||
break;
|
||||
}
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
|
||||
}
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
|
||||
free_extent_map(em);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3006,7 +2998,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
||||
goto release_page;
|
||||
|
||||
/* Mark the range delalloc and dirty for later writeback */
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL);
|
||||
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
|
||||
clamped_end, 0, NULL);
|
||||
if (ret) {
|
||||
@ -3039,7 +3031,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
||||
boundary_start, boundary_end,
|
||||
EXTENT_BOUNDARY);
|
||||
}
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL);
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
|
||||
cur += clamped_len;
|
||||
|
||||
@ -4339,7 +4331,7 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
|
||||
disk_bytenr = file_pos + inode->index_cnt;
|
||||
csum_root = btrfs_csum_root(fs_info, disk_bytenr);
|
||||
ret = btrfs_lookup_csums_range(csum_root, disk_bytenr,
|
||||
disk_bytenr + len - 1, &list, 0);
|
||||
disk_bytenr + len - 1, &list, 0, false);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -337,7 +337,6 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_key key;
|
||||
unsigned long ptr;
|
||||
int err = 0;
|
||||
int ret;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
@ -350,7 +349,6 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
|
||||
again:
|
||||
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out;
|
||||
} else if (ret == 0) {
|
||||
leaf = path->nodes[0];
|
||||
@ -360,18 +358,18 @@ again:
|
||||
if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
|
||||
(btrfs_root_ref_name_len(leaf, ref) != name_len) ||
|
||||
memcmp_extent_buffer(leaf, name, ptr, name_len)) {
|
||||
err = -ENOENT;
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
*sequence = btrfs_root_ref_sequence(leaf, ref);
|
||||
|
||||
ret = btrfs_del_item(trans, tree_root, path);
|
||||
if (ret) {
|
||||
err = ret;
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
} else
|
||||
err = -ENOENT;
|
||||
} else {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
|
||||
btrfs_release_path(path);
|
||||
@ -383,7 +381,7 @@ again:
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
666
fs/btrfs/scrub.c
666
fs/btrfs/scrub.c
File diff suppressed because it is too large
Load Diff
461
fs/btrfs/send.c
461
fs/btrfs/send.c
@ -15,6 +15,7 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/fsverity.h>
|
||||
|
||||
#include "send.h"
|
||||
#include "ctree.h"
|
||||
@ -127,6 +128,8 @@ struct send_ctx {
|
||||
bool cur_inode_new_gen;
|
||||
bool cur_inode_deleted;
|
||||
bool ignore_cur_inode;
|
||||
bool cur_inode_needs_verity;
|
||||
void *verity_descriptor;
|
||||
|
||||
u64 send_progress;
|
||||
|
||||
@ -624,6 +627,7 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
|
||||
return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
|
||||
}
|
||||
|
||||
TLV_PUT_DEFINE_INT(8)
|
||||
TLV_PUT_DEFINE_INT(32)
|
||||
TLV_PUT_DEFINE_INT(64)
|
||||
|
||||
@ -842,17 +846,32 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct btrfs_inode_info {
|
||||
u64 size;
|
||||
u64 gen;
|
||||
u64 mode;
|
||||
u64 uid;
|
||||
u64 gid;
|
||||
u64 rdev;
|
||||
u64 fileattr;
|
||||
u64 nlink;
|
||||
};
|
||||
|
||||
/*
|
||||
* Helper function to retrieve some fields from an inode item.
|
||||
*/
|
||||
static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
|
||||
u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
|
||||
u64 *gid, u64 *rdev, u64 *fileattr)
|
||||
static int get_inode_info(struct btrfs_root *root, u64 ino,
|
||||
struct btrfs_inode_info *info)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_inode_item *ii;
|
||||
struct btrfs_key key;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
key.objectid = ino;
|
||||
key.type = BTRFS_INODE_ITEM_KEY;
|
||||
key.offset = 0;
|
||||
@ -860,47 +879,43 @@ static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
|
||||
if (ret) {
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!info)
|
||||
goto out;
|
||||
|
||||
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
|
||||
struct btrfs_inode_item);
|
||||
if (size)
|
||||
*size = btrfs_inode_size(path->nodes[0], ii);
|
||||
if (gen)
|
||||
*gen = btrfs_inode_generation(path->nodes[0], ii);
|
||||
if (mode)
|
||||
*mode = btrfs_inode_mode(path->nodes[0], ii);
|
||||
if (uid)
|
||||
*uid = btrfs_inode_uid(path->nodes[0], ii);
|
||||
if (gid)
|
||||
*gid = btrfs_inode_gid(path->nodes[0], ii);
|
||||
if (rdev)
|
||||
*rdev = btrfs_inode_rdev(path->nodes[0], ii);
|
||||
info->size = btrfs_inode_size(path->nodes[0], ii);
|
||||
info->gen = btrfs_inode_generation(path->nodes[0], ii);
|
||||
info->mode = btrfs_inode_mode(path->nodes[0], ii);
|
||||
info->uid = btrfs_inode_uid(path->nodes[0], ii);
|
||||
info->gid = btrfs_inode_gid(path->nodes[0], ii);
|
||||
info->rdev = btrfs_inode_rdev(path->nodes[0], ii);
|
||||
info->nlink = btrfs_inode_nlink(path->nodes[0], ii);
|
||||
/*
|
||||
* Transfer the unchanged u64 value of btrfs_inode_item::flags, that's
|
||||
* otherwise logically split to 32/32 parts.
|
||||
*/
|
||||
if (fileattr)
|
||||
*fileattr = btrfs_inode_flags(path->nodes[0], ii);
|
||||
info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
|
||||
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_inode_info(struct btrfs_root *root,
|
||||
u64 ino, u64 *size, u64 *gen,
|
||||
u64 *mode, u64 *uid, u64 *gid,
|
||||
u64 *rdev, u64 *fileattr)
|
||||
static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
|
||||
{
|
||||
struct btrfs_path *path;
|
||||
int ret;
|
||||
struct btrfs_inode_info info;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
|
||||
rdev, fileattr);
|
||||
btrfs_free_path(path);
|
||||
if (!gen)
|
||||
return -EPERM;
|
||||
|
||||
ret = get_inode_info(root, ino, &info);
|
||||
if (!ret)
|
||||
*gen = info.gen;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1643,21 +1658,22 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
|
||||
int right_ret;
|
||||
u64 left_gen;
|
||||
u64 right_gen;
|
||||
struct btrfs_inode_info info;
|
||||
|
||||
ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
|
||||
NULL, NULL, NULL);
|
||||
ret = get_inode_info(sctx->send_root, ino, &info);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
left_ret = ret;
|
||||
left_ret = (info.nlink == 0) ? -ENOENT : ret;
|
||||
left_gen = info.gen;
|
||||
|
||||
if (!sctx->parent_root) {
|
||||
right_ret = -ENOENT;
|
||||
} else {
|
||||
ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
|
||||
NULL, NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_info(sctx->parent_root, ino, &info);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
right_ret = ret;
|
||||
right_ret = (info.nlink == 0) ? -ENOENT : ret;
|
||||
right_gen = info.gen;
|
||||
}
|
||||
|
||||
if (!left_ret && !right_ret) {
|
||||
@ -1816,8 +1832,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
|
||||
btrfs_release_path(path);
|
||||
|
||||
if (dir_gen) {
|
||||
ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
|
||||
NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(root, parent_dir, dir_gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
@ -1874,6 +1889,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
||||
int ret = 0;
|
||||
u64 gen;
|
||||
u64 other_inode = 0;
|
||||
struct btrfs_inode_info info;
|
||||
|
||||
if (!sctx->parent_root)
|
||||
goto out;
|
||||
@ -1888,8 +1904,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
||||
* and we can just unlink this entry.
|
||||
*/
|
||||
if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
|
||||
NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->parent_root, dir, &gen);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
if (ret) {
|
||||
@ -1916,13 +1931,14 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
|
||||
*/
|
||||
if (other_inode > sctx->send_progress ||
|
||||
is_waiting_for_move(sctx, other_inode)) {
|
||||
ret = get_inode_info(sctx->parent_root, other_inode, NULL,
|
||||
who_gen, who_mode, NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_info(sctx->parent_root, other_inode, &info);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 1;
|
||||
*who_ino = other_inode;
|
||||
*who_gen = info.gen;
|
||||
*who_mode = info.mode;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
@ -1955,8 +1971,7 @@ static int did_overwrite_ref(struct send_ctx *sctx,
|
||||
goto out;
|
||||
|
||||
if (dir != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
|
||||
NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->send_root, dir, &gen);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
if (ret) {
|
||||
@ -1978,8 +1993,7 @@ static int did_overwrite_ref(struct send_ctx *sctx,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
|
||||
NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->send_root, ow_inode, &gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -2645,6 +2659,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
|
||||
int ret = 0;
|
||||
struct fs_path *p;
|
||||
int cmd;
|
||||
struct btrfs_inode_info info;
|
||||
u64 gen;
|
||||
u64 mode;
|
||||
u64 rdev;
|
||||
@ -2656,10 +2671,12 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
|
||||
return -ENOMEM;
|
||||
|
||||
if (ino != sctx->cur_ino) {
|
||||
ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
|
||||
NULL, NULL, &rdev, NULL);
|
||||
ret = get_inode_info(sctx->send_root, ino, &info);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
gen = info.gen;
|
||||
mode = info.mode;
|
||||
rdev = info.rdev;
|
||||
} else {
|
||||
gen = sctx->cur_inode_gen;
|
||||
mode = sctx->cur_inode_mode;
|
||||
@ -3359,8 +3376,7 @@ finish:
|
||||
/*
|
||||
* The parent inode might have been deleted in the send snapshot
|
||||
*/
|
||||
ret = get_inode_info(sctx->send_root, cur->dir, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_info(sctx->send_root, cur->dir, NULL);
|
||||
if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
continue;
|
||||
@ -3534,12 +3550,10 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
|
||||
&left_gen, NULL, NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
|
||||
&right_gen, NULL, NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
@ -3669,8 +3683,7 @@ static int is_ancestor(struct btrfs_root *root,
|
||||
cur_offset = item_size;
|
||||
}
|
||||
|
||||
ret = get_inode_info(root, parent, NULL, &parent_gen,
|
||||
NULL, NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(root, parent, &parent_gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = check_ino_in_path(root, ino1, ino1_gen,
|
||||
@ -3760,9 +3773,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
|
||||
memcmp(path_before->start, path_after->start, len1))) {
|
||||
u64 parent_ino_gen;
|
||||
|
||||
ret = get_inode_info(sctx->parent_root, ino, NULL,
|
||||
&parent_ino_gen, NULL, NULL, NULL,
|
||||
NULL, NULL);
|
||||
ret = get_inode_gen(sctx->parent_root, ino, &parent_ino_gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ino_gen == parent_ino_gen) {
|
||||
@ -4441,8 +4452,7 @@ static int record_new_ref_if_needed(int num, u64 dir, int index,
|
||||
struct recorded_ref *ref;
|
||||
u64 dir_gen;
|
||||
|
||||
ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
|
||||
NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->send_root, dir, &dir_gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -4472,8 +4482,7 @@ static int record_deleted_ref_if_needed(int num, u64 dir, int index,
|
||||
struct recorded_ref *ref;
|
||||
u64 dir_gen;
|
||||
|
||||
ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
|
||||
NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->parent_root, dir, &dir_gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -4886,6 +4895,84 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int send_verity(struct send_ctx *sctx, struct fs_path *path,
|
||||
struct fsverity_descriptor *desc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
|
||||
TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM,
|
||||
le8_to_cpu(desc->hash_algorithm));
|
||||
TLV_PUT_U32(sctx, BTRFS_SEND_A_VERITY_BLOCK_SIZE,
|
||||
1U << le8_to_cpu(desc->log_blocksize));
|
||||
TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SALT_DATA, desc->salt,
|
||||
le8_to_cpu(desc->salt_size));
|
||||
TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SIG_DATA, desc->signature,
|
||||
le32_to_cpu(desc->sig_size));
|
||||
|
||||
ret = send_cmd(sctx);
|
||||
|
||||
tlv_put_failure:
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int process_verity(struct send_ctx *sctx)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
|
||||
struct inode *inode;
|
||||
struct fs_path *p;
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
ret = btrfs_get_verity_descriptor(inode, NULL, 0);
|
||||
if (ret < 0)
|
||||
goto iput;
|
||||
|
||||
if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
|
||||
ret = -EMSGSIZE;
|
||||
goto iput;
|
||||
}
|
||||
if (!sctx->verity_descriptor) {
|
||||
sctx->verity_descriptor = kvmalloc(FS_VERITY_MAX_DESCRIPTOR_SIZE,
|
||||
GFP_KERNEL);
|
||||
if (!sctx->verity_descriptor) {
|
||||
ret = -ENOMEM;
|
||||
goto iput;
|
||||
}
|
||||
}
|
||||
|
||||
ret = btrfs_get_verity_descriptor(inode, sctx->verity_descriptor, ret);
|
||||
if (ret < 0)
|
||||
goto iput;
|
||||
|
||||
p = fs_path_alloc();
|
||||
if (!p) {
|
||||
ret = -ENOMEM;
|
||||
goto iput;
|
||||
}
|
||||
ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
|
||||
if (ret < 0)
|
||||
goto free_path;
|
||||
|
||||
ret = send_verity(sctx, p, sctx->verity_descriptor);
|
||||
if (ret < 0)
|
||||
goto free_path;
|
||||
|
||||
free_path:
|
||||
fs_path_free(p);
|
||||
iput:
|
||||
iput(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u64 max_send_read_size(const struct send_ctx *sctx)
|
||||
{
|
||||
return sctx->send_max_size - SZ_16K;
|
||||
@ -5056,8 +5143,7 @@ static int send_clone(struct send_ctx *sctx,
|
||||
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
|
||||
|
||||
if (clone_root->root == sctx->send_root) {
|
||||
ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
|
||||
&gen, NULL, NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = get_cur_path(sctx, clone_root->ino, gen, p);
|
||||
@ -5536,6 +5622,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
struct btrfs_inode_info info;
|
||||
u64 clone_src_i_size = 0;
|
||||
|
||||
/*
|
||||
@ -5565,12 +5652,11 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
|
||||
* There are inodes that have extents that lie behind its i_size. Don't
|
||||
* accept clones from these extents.
|
||||
*/
|
||||
ret = __get_inode_info(clone_root->root, path, clone_root->ino,
|
||||
&clone_src_i_size, NULL, NULL, NULL, NULL, NULL,
|
||||
NULL);
|
||||
ret = get_inode_info(clone_root->root, clone_root->ino, &info);
|
||||
btrfs_release_path(path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
clone_src_i_size = info.size;
|
||||
|
||||
/*
|
||||
* We can't send a clone operation for the entire range if we find
|
||||
@ -6259,6 +6345,7 @@ out:
|
||||
static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_inode_info info;
|
||||
u64 left_mode;
|
||||
u64 left_uid;
|
||||
u64 left_gid;
|
||||
@ -6301,11 +6388,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
|
||||
goto out;
|
||||
if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
|
||||
goto out;
|
||||
|
||||
ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
|
||||
&left_mode, &left_uid, &left_gid, NULL, &left_fileattr);
|
||||
ret = get_inode_info(sctx->send_root, sctx->cur_ino, &info);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
left_mode = info.mode;
|
||||
left_uid = info.uid;
|
||||
left_gid = info.gid;
|
||||
left_fileattr = info.fileattr;
|
||||
|
||||
if (!sctx->parent_root || sctx->cur_inode_new) {
|
||||
need_chown = 1;
|
||||
@ -6316,11 +6405,14 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
|
||||
} else {
|
||||
u64 old_size;
|
||||
|
||||
ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
|
||||
&old_size, NULL, &right_mode, &right_uid,
|
||||
&right_gid, NULL, &right_fileattr);
|
||||
ret = get_inode_info(sctx->parent_root, sctx->cur_ino, &info);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
old_size = info.size;
|
||||
right_mode = info.mode;
|
||||
right_uid = info.uid;
|
||||
right_gid = info.gid;
|
||||
right_fileattr = info.fileattr;
|
||||
|
||||
if (left_uid != right_uid || left_gid != right_gid)
|
||||
need_chown = 1;
|
||||
@ -6377,6 +6469,11 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
if (sctx->cur_inode_needs_verity) {
|
||||
ret = process_verity(sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = send_capabilities(sctx);
|
||||
if (ret < 0)
|
||||
@ -6407,86 +6504,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct parent_paths_ctx {
|
||||
struct list_head *refs;
|
||||
struct send_ctx *sctx;
|
||||
};
|
||||
|
||||
static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
|
||||
void *ctx)
|
||||
{
|
||||
struct parent_paths_ctx *ppctx = ctx;
|
||||
|
||||
/*
|
||||
* Pass 0 as the generation for the directory, we don't care about it
|
||||
* here as we have no new references to add, we just want to delete all
|
||||
* references for an inode.
|
||||
*/
|
||||
return record_ref_in_tree(&ppctx->sctx->rbtree_deleted_refs, ppctx->refs,
|
||||
name, dir, 0, ppctx->sctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Issue unlink operations for all paths of the current inode found in the
|
||||
* parent snapshot.
|
||||
*/
|
||||
static int btrfs_unlink_all_paths(struct send_ctx *sctx)
|
||||
{
|
||||
LIST_HEAD(deleted_refs);
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_root *root = sctx->parent_root;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key found_key;
|
||||
struct parent_paths_ctx ctx;
|
||||
int iter_ret = 0;
|
||||
int ret;
|
||||
|
||||
path = alloc_path_for_send();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
key.objectid = sctx->cur_ino;
|
||||
key.type = BTRFS_INODE_REF_KEY;
|
||||
key.offset = 0;
|
||||
|
||||
ctx.refs = &deleted_refs;
|
||||
ctx.sctx = sctx;
|
||||
|
||||
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
|
||||
if (found_key.objectid != key.objectid)
|
||||
break;
|
||||
if (found_key.type != key.type &&
|
||||
found_key.type != BTRFS_INODE_EXTREF_KEY)
|
||||
break;
|
||||
|
||||
ret = iterate_inode_ref(root, path, &found_key, 1,
|
||||
record_parent_ref, &ctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
/* Catch error found during iteration */
|
||||
if (iter_ret < 0) {
|
||||
ret = iter_ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (!list_empty(&deleted_refs)) {
|
||||
struct recorded_ref *ref;
|
||||
|
||||
ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
|
||||
ret = send_unlink(sctx, ref->full_path);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
recorded_ref_free(ref);
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
if (ret)
|
||||
__free_recorded_refs(&deleted_refs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void close_current_inode(struct send_ctx *sctx)
|
||||
{
|
||||
u64 i_size;
|
||||
@ -6577,25 +6594,36 @@ static int changed_inode(struct send_ctx *sctx,
|
||||
* file descriptor against it or turning a RO snapshot into RW mode,
|
||||
* keep an open file descriptor against a file, delete it and then
|
||||
* turn the snapshot back to RO mode before using it for a send
|
||||
* operation. So if we find such cases, ignore the inode and all its
|
||||
* items completely if it's a new inode, or if it's a changed inode
|
||||
* make sure all its previous paths (from the parent snapshot) are all
|
||||
* unlinked and all other the inode items are ignored.
|
||||
* operation. The former is what the receiver operation does.
|
||||
* Therefore, if we want to send these snapshots soon after they're
|
||||
* received, we need to handle orphan inodes as well. Moreover, orphans
|
||||
* can appear not only in the send snapshot but also in the parent
|
||||
* snapshot. Here are several cases:
|
||||
*
|
||||
* Case 1: BTRFS_COMPARE_TREE_NEW
|
||||
* | send snapshot | action
|
||||
* --------------------------------
|
||||
* nlink | 0 | ignore
|
||||
*
|
||||
* Case 2: BTRFS_COMPARE_TREE_DELETED
|
||||
* | parent snapshot | action
|
||||
* ----------------------------------
|
||||
* nlink | 0 | as usual
|
||||
* Note: No unlinks will be sent because there're no paths for it.
|
||||
*
|
||||
* Case 3: BTRFS_COMPARE_TREE_CHANGED
|
||||
* | | parent snapshot | send snapshot | action
|
||||
* -----------------------------------------------------------------------
|
||||
* subcase 1 | nlink | 0 | 0 | ignore
|
||||
* subcase 2 | nlink | >0 | 0 | new_gen(deletion)
|
||||
* subcase 3 | nlink | 0 | >0 | new_gen(creation)
|
||||
*
|
||||
*/
|
||||
if (result == BTRFS_COMPARE_TREE_NEW ||
|
||||
result == BTRFS_COMPARE_TREE_CHANGED) {
|
||||
u32 nlinks;
|
||||
|
||||
nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
|
||||
if (nlinks == 0) {
|
||||
if (result == BTRFS_COMPARE_TREE_NEW) {
|
||||
if (btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii) == 0) {
|
||||
sctx->ignore_cur_inode = true;
|
||||
if (result == BTRFS_COMPARE_TREE_CHANGED)
|
||||
ret = btrfs_unlink_all_paths(sctx);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (result == BTRFS_COMPARE_TREE_NEW) {
|
||||
sctx->cur_inode_gen = left_gen;
|
||||
sctx->cur_inode_new = true;
|
||||
sctx->cur_inode_deleted = false;
|
||||
@ -6616,6 +6644,16 @@ static int changed_inode(struct send_ctx *sctx,
|
||||
sctx->cur_inode_mode = btrfs_inode_mode(
|
||||
sctx->right_path->nodes[0], right_ii);
|
||||
} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
|
||||
u32 new_nlinks, old_nlinks;
|
||||
|
||||
new_nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
|
||||
old_nlinks = btrfs_inode_nlink(sctx->right_path->nodes[0], right_ii);
|
||||
if (new_nlinks == 0 && old_nlinks == 0) {
|
||||
sctx->ignore_cur_inode = true;
|
||||
goto out;
|
||||
} else if (new_nlinks == 0 || old_nlinks == 0) {
|
||||
sctx->cur_inode_new_gen = 1;
|
||||
}
|
||||
/*
|
||||
* We need to do some special handling in case the inode was
|
||||
* reported as changed with a changed generation number. This
|
||||
@ -6642,38 +6680,44 @@ static int changed_inode(struct send_ctx *sctx,
|
||||
/*
|
||||
* Now process the inode as if it was new.
|
||||
*/
|
||||
sctx->cur_inode_gen = left_gen;
|
||||
sctx->cur_inode_new = true;
|
||||
sctx->cur_inode_deleted = false;
|
||||
sctx->cur_inode_size = btrfs_inode_size(
|
||||
sctx->left_path->nodes[0], left_ii);
|
||||
sctx->cur_inode_mode = btrfs_inode_mode(
|
||||
sctx->left_path->nodes[0], left_ii);
|
||||
sctx->cur_inode_rdev = btrfs_inode_rdev(
|
||||
sctx->left_path->nodes[0], left_ii);
|
||||
ret = send_create_inode_if_needed(sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (new_nlinks > 0) {
|
||||
sctx->cur_inode_gen = left_gen;
|
||||
sctx->cur_inode_new = true;
|
||||
sctx->cur_inode_deleted = false;
|
||||
sctx->cur_inode_size = btrfs_inode_size(
|
||||
sctx->left_path->nodes[0],
|
||||
left_ii);
|
||||
sctx->cur_inode_mode = btrfs_inode_mode(
|
||||
sctx->left_path->nodes[0],
|
||||
left_ii);
|
||||
sctx->cur_inode_rdev = btrfs_inode_rdev(
|
||||
sctx->left_path->nodes[0],
|
||||
left_ii);
|
||||
ret = send_create_inode_if_needed(sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/*
|
||||
* Advance send_progress now as we did not get into
|
||||
* process_recorded_refs_if_needed in the new_gen case.
|
||||
*/
|
||||
sctx->send_progress = sctx->cur_ino + 1;
|
||||
ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/*
|
||||
* Advance send_progress now as we did not get
|
||||
* into process_recorded_refs_if_needed in the
|
||||
* new_gen case.
|
||||
*/
|
||||
sctx->send_progress = sctx->cur_ino + 1;
|
||||
|
||||
/*
|
||||
* Now process all extents and xattrs of the inode as if
|
||||
* they were all new.
|
||||
*/
|
||||
ret = process_all_extents(sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = process_all_new_xattrs(sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/*
|
||||
* Now process all extents and xattrs of the
|
||||
* inode as if they were all new.
|
||||
*/
|
||||
ret = process_all_extents(sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = process_all_new_xattrs(sctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
sctx->cur_inode_gen = left_gen;
|
||||
sctx->cur_inode_new = false;
|
||||
@ -6785,18 +6829,27 @@ static int changed_extent(struct send_ctx *sctx,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
|
||||
if (result == BTRFS_COMPARE_TREE_NEW)
|
||||
sctx->cur_inode_needs_verity = true;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int dir_changed(struct send_ctx *sctx, u64 dir)
|
||||
{
|
||||
u64 orig_gen, new_gen;
|
||||
int ret;
|
||||
|
||||
ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
|
||||
NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->send_root, dir, &new_gen);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
|
||||
NULL, NULL, NULL, NULL);
|
||||
ret = get_inode_gen(sctx->parent_root, dir, &orig_gen);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -6939,6 +6992,9 @@ static int changed_cb(struct btrfs_path *left_path,
|
||||
ret = changed_xattr(sctx, result);
|
||||
else if (key->type == BTRFS_EXTENT_DATA_KEY)
|
||||
ret = changed_extent(sctx, result);
|
||||
else if (key->type == BTRFS_VERITY_DESC_ITEM_KEY &&
|
||||
key->offset == 0)
|
||||
ret = changed_verity(sctx, result);
|
||||
}
|
||||
|
||||
out:
|
||||
@ -8036,6 +8092,7 @@ out:
|
||||
kvfree(sctx->clone_roots);
|
||||
kfree(sctx->send_buf_pages);
|
||||
kvfree(sctx->send_buf);
|
||||
kvfree(sctx->verity_descriptor);
|
||||
|
||||
name_cache_free(sctx);
|
||||
|
||||
|
@ -92,8 +92,11 @@ enum btrfs_send_cmd {
|
||||
BTRFS_SEND_C_ENCODED_WRITE = 25,
|
||||
BTRFS_SEND_C_MAX_V2 = 25,
|
||||
|
||||
/* Version 3 */
|
||||
BTRFS_SEND_C_ENABLE_VERITY = 26,
|
||||
BTRFS_SEND_C_MAX_V3 = 26,
|
||||
/* End */
|
||||
BTRFS_SEND_C_MAX = 25,
|
||||
BTRFS_SEND_C_MAX = 26,
|
||||
};
|
||||
|
||||
/* attributes in send stream */
|
||||
@ -160,8 +163,14 @@ enum {
|
||||
BTRFS_SEND_A_ENCRYPTION = 31,
|
||||
BTRFS_SEND_A_MAX_V2 = 31,
|
||||
|
||||
/* End */
|
||||
BTRFS_SEND_A_MAX = 31,
|
||||
/* Version 3 */
|
||||
BTRFS_SEND_A_VERITY_ALGORITHM = 32,
|
||||
BTRFS_SEND_A_VERITY_BLOCK_SIZE = 33,
|
||||
BTRFS_SEND_A_VERITY_SALT_DATA = 34,
|
||||
BTRFS_SEND_A_VERITY_SIG_DATA = 35,
|
||||
BTRFS_SEND_A_MAX_V3 = 35,
|
||||
|
||||
__BTRFS_SEND_A_MAX = 35,
|
||||
};
|
||||
|
||||
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
|
||||
|
@ -293,32 +293,36 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
u64 total_bytes, u64 bytes_used,
|
||||
u64 bytes_readonly, u64 bytes_zone_unusable,
|
||||
bool active, struct btrfs_space_info **space_info)
|
||||
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_space_info *found;
|
||||
int factor;
|
||||
int factor, index;
|
||||
|
||||
factor = btrfs_bg_type_to_factor(flags);
|
||||
factor = btrfs_bg_type_to_factor(block_group->flags);
|
||||
|
||||
found = btrfs_find_space_info(info, flags);
|
||||
found = btrfs_find_space_info(info, block_group->flags);
|
||||
ASSERT(found);
|
||||
spin_lock(&found->lock);
|
||||
found->total_bytes += total_bytes;
|
||||
if (active)
|
||||
found->active_total_bytes += total_bytes;
|
||||
found->disk_total += total_bytes * factor;
|
||||
found->bytes_used += bytes_used;
|
||||
found->disk_used += bytes_used * factor;
|
||||
found->bytes_readonly += bytes_readonly;
|
||||
found->bytes_zone_unusable += bytes_zone_unusable;
|
||||
if (total_bytes > 0)
|
||||
found->total_bytes += block_group->length;
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
|
||||
found->active_total_bytes += block_group->length;
|
||||
found->disk_total += block_group->length * factor;
|
||||
found->bytes_used += block_group->used;
|
||||
found->disk_used += block_group->used * factor;
|
||||
found->bytes_readonly += block_group->bytes_super;
|
||||
found->bytes_zone_unusable += block_group->zone_unusable;
|
||||
if (block_group->length > 0)
|
||||
found->full = 0;
|
||||
btrfs_try_granting_tickets(info, found);
|
||||
spin_unlock(&found->lock);
|
||||
*space_info = found;
|
||||
|
||||
block_group->space_info = found;
|
||||
|
||||
index = btrfs_bg_flags_to_raid_index(block_group->flags);
|
||||
down_write(&found->groups_sem);
|
||||
list_add_tail(&block_group->list, &found->block_groups[index]);
|
||||
up_write(&found->groups_sem);
|
||||
}
|
||||
|
||||
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
|
||||
@ -472,28 +476,47 @@ do { \
|
||||
spin_unlock(&__rsv->lock); \
|
||||
} while (0)
|
||||
|
||||
static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *info)
|
||||
static const char *space_info_flag_to_str(const struct btrfs_space_info *space_info)
|
||||
{
|
||||
lockdep_assert_held(&info->lock);
|
||||
|
||||
/* The free space could be negative in case of overcommit */
|
||||
btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull",
|
||||
info->flags,
|
||||
(s64)(info->total_bytes - btrfs_space_info_used(info, true)),
|
||||
info->full ? "" : "not ");
|
||||
btrfs_info(fs_info,
|
||||
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
|
||||
info->total_bytes, info->bytes_used, info->bytes_pinned,
|
||||
info->bytes_reserved, info->bytes_may_use,
|
||||
info->bytes_readonly, info->bytes_zone_unusable);
|
||||
switch (space_info->flags) {
|
||||
case BTRFS_BLOCK_GROUP_SYSTEM:
|
||||
return "SYSTEM";
|
||||
case BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA:
|
||||
return "DATA+METADATA";
|
||||
case BTRFS_BLOCK_GROUP_DATA:
|
||||
return "DATA";
|
||||
case BTRFS_BLOCK_GROUP_METADATA:
|
||||
return "METADATA";
|
||||
default:
|
||||
return "UNKNOWN";
|
||||
}
|
||||
}
|
||||
|
||||
static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
DUMP_BLOCK_RSV(fs_info, global_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
|
||||
DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
|
||||
}
|
||||
|
||||
static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *info)
|
||||
{
|
||||
const char *flag_str = space_info_flag_to_str(info);
|
||||
lockdep_assert_held(&info->lock);
|
||||
|
||||
/* The free space could be negative in case of overcommit */
|
||||
btrfs_info(fs_info, "space_info %s has %lld free, is %sfull",
|
||||
flag_str,
|
||||
(s64)(info->total_bytes - btrfs_space_info_used(info, true)),
|
||||
info->full ? "" : "not ");
|
||||
btrfs_info(fs_info,
|
||||
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
|
||||
info->total_bytes, info->bytes_used, info->bytes_pinned,
|
||||
info->bytes_reserved, info->bytes_may_use,
|
||||
info->bytes_readonly, info->bytes_zone_unusable);
|
||||
}
|
||||
|
||||
void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
@ -505,6 +528,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
|
||||
spin_lock(&info->lock);
|
||||
__btrfs_dump_space_info(fs_info, info);
|
||||
dump_global_block_rsv(fs_info);
|
||||
spin_unlock(&info->lock);
|
||||
|
||||
if (!dump_block_groups)
|
||||
@ -1662,7 +1686,6 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
|
||||
&space_info->priority_tickets);
|
||||
}
|
||||
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
|
||||
used += orig_bytes;
|
||||
/*
|
||||
* We will do the space reservation dance during log replay,
|
||||
* which means we won't have fs_info->fs_root set, so don't do
|
||||
@ -1737,7 +1760,8 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
|
||||
int ret;
|
||||
|
||||
ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
|
||||
flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE);
|
||||
flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE ||
|
||||
flush == BTRFS_RESERVE_NO_FLUSH);
|
||||
ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
|
||||
|
||||
ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush);
|
||||
@ -1749,3 +1773,17 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Dump all the space infos when we abort a transaction due to ENOSPC. */
|
||||
__cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_space_info *space_info;
|
||||
|
||||
btrfs_info(fs_info, "dumping space info:");
|
||||
list_for_each_entry(space_info, &fs_info->space_info, list) {
|
||||
spin_lock(&space_info->lock);
|
||||
__btrfs_dump_space_info(fs_info, space_info);
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
dump_global_block_rsv(fs_info);
|
||||
}
|
||||
|
@ -123,10 +123,8 @@ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
|
||||
DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
|
||||
|
||||
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
u64 total_bytes, u64 bytes_used,
|
||||
u64 bytes_readonly, u64 bytes_zone_unusable,
|
||||
bool active, struct btrfs_space_info **space_info);
|
||||
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
|
||||
struct btrfs_block_group *block_group);
|
||||
void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
|
||||
u64 chunk_size);
|
||||
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
|
||||
@ -159,4 +157,7 @@ static inline void btrfs_space_info_free_bytes_may_use(
|
||||
}
|
||||
int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
|
||||
|
||||
#endif /* BTRFS_SPACE_INFO_H */
|
||||
|
112
fs/btrfs/super.c
112
fs/btrfs/super.c
@ -346,12 +346,14 @@ void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
|
||||
__cold
|
||||
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
|
||||
const char *function,
|
||||
unsigned int line, int errno)
|
||||
unsigned int line, int errno, bool first_hit)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
||||
WRITE_ONCE(trans->aborted, errno);
|
||||
WRITE_ONCE(trans->transaction->aborted, errno);
|
||||
if (first_hit && errno == -ENOSPC)
|
||||
btrfs_dump_space_info_for_trans_abort(fs_info);
|
||||
/* Wake up anybody who may be waiting on this transaction */
|
||||
wake_up(&fs_info->transaction_wait);
|
||||
wake_up(&fs_info->transaction_blocked_wait);
|
||||
@ -626,6 +628,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
int saved_compress_level;
|
||||
bool saved_compress_force;
|
||||
int no_compress = 0;
|
||||
const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state);
|
||||
|
||||
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
|
||||
btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
|
||||
@ -1137,10 +1140,12 @@ out:
|
||||
}
|
||||
if (!ret)
|
||||
ret = btrfs_check_mountopts_zoned(info);
|
||||
if (!ret && btrfs_test_opt(info, SPACE_CACHE))
|
||||
btrfs_info(info, "disk space caching is enabled");
|
||||
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
|
||||
btrfs_info(info, "using free space tree");
|
||||
if (!ret && !remounting) {
|
||||
if (btrfs_test_opt(info, SPACE_CACHE))
|
||||
btrfs_info(info, "disk space caching is enabled");
|
||||
if (btrfs_test_opt(info, FREE_SPACE_TREE))
|
||||
btrfs_info(info, "using free space tree");
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2009,14 +2014,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
if (ret)
|
||||
goto restore;
|
||||
|
||||
/* V1 cache is not supported for subpage mount. */
|
||||
if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
|
||||
btrfs_warn(fs_info,
|
||||
"v1 space cache is not supported for page size %lu with sectorsize %u",
|
||||
PAGE_SIZE, fs_info->sectorsize);
|
||||
ret = -EINVAL;
|
||||
ret = btrfs_check_features(fs_info, sb);
|
||||
if (ret < 0)
|
||||
goto restore;
|
||||
}
|
||||
|
||||
btrfs_remount_begin(fs_info, old_opts, *flags);
|
||||
btrfs_resize_thread_pool(fs_info,
|
||||
fs_info->thread_pool_size, old_thread_pool_size);
|
||||
@ -2550,11 +2551,71 @@ static int btrfs_freeze(struct super_block *sb)
|
||||
return btrfs_commit_transaction(trans);
|
||||
}
|
||||
|
||||
static int check_dev_super(struct btrfs_device *dev)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = dev->fs_info;
|
||||
struct btrfs_super_block *sb;
|
||||
int ret = 0;
|
||||
|
||||
/* This should be called with fs still frozen. */
|
||||
ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags));
|
||||
|
||||
/* Missing dev, no need to check. */
|
||||
if (!dev->bdev)
|
||||
return 0;
|
||||
|
||||
/* Only need to check the primary super block. */
|
||||
sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
|
||||
if (IS_ERR(sb))
|
||||
return PTR_ERR(sb);
|
||||
|
||||
/* Btrfs_validate_super() includes fsid check against super->fsid. */
|
||||
ret = btrfs_validate_super(fs_info, sb, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (btrfs_super_generation(sb) != fs_info->last_trans_committed) {
|
||||
btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
|
||||
btrfs_super_generation(sb),
|
||||
fs_info->last_trans_committed);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
btrfs_release_disk_super(sb);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_unfreeze(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
struct btrfs_device *device;
|
||||
int ret = 0;
|
||||
|
||||
/*
|
||||
* Make sure the fs is not changed by accident (like hibernation then
|
||||
* modified by other OS).
|
||||
* If we found anything wrong, we mark the fs error immediately.
|
||||
*
|
||||
* And since the fs is frozen, no one can modify the fs yet, thus
|
||||
* we don't need to hold device_list_mutex.
|
||||
*/
|
||||
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
|
||||
ret = check_dev_super(device);
|
||||
if (ret < 0) {
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
"super block on devid %llu got modified unexpectedly",
|
||||
device->devid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
|
||||
|
||||
/*
|
||||
* We still return 0, to allow VFS layer to unfreeze the fs even the
|
||||
* above checks failed. Since the fs is either fine or read-only, we're
|
||||
* safe to continue, without causing further damage.
|
||||
*/
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2662,17 +2723,21 @@ static int __init init_btrfs_fs(void)
|
||||
if (err)
|
||||
goto free_compress;
|
||||
|
||||
err = extent_io_init();
|
||||
err = extent_state_init_cachep();
|
||||
if (err)
|
||||
goto free_cachep;
|
||||
|
||||
err = extent_state_cache_init();
|
||||
err = extent_buffer_init_cachep();
|
||||
if (err)
|
||||
goto free_extent_io;
|
||||
goto free_extent_cachep;
|
||||
|
||||
err = btrfs_bioset_init();
|
||||
if (err)
|
||||
goto free_eb_cachep;
|
||||
|
||||
err = extent_map_init();
|
||||
if (err)
|
||||
goto free_extent_state_cache;
|
||||
goto free_bioset;
|
||||
|
||||
err = ordered_data_init();
|
||||
if (err)
|
||||
@ -2724,10 +2789,12 @@ free_ordered_data:
|
||||
ordered_data_exit();
|
||||
free_extent_map:
|
||||
extent_map_exit();
|
||||
free_extent_state_cache:
|
||||
extent_state_cache_exit();
|
||||
free_extent_io:
|
||||
extent_io_exit();
|
||||
free_bioset:
|
||||
btrfs_bioset_exit();
|
||||
free_eb_cachep:
|
||||
extent_buffer_free_cachep();
|
||||
free_extent_cachep:
|
||||
extent_state_free_cachep();
|
||||
free_cachep:
|
||||
btrfs_destroy_cachep();
|
||||
free_compress:
|
||||
@ -2746,8 +2813,9 @@ static void __exit exit_btrfs_fs(void)
|
||||
btrfs_prelim_ref_exit();
|
||||
ordered_data_exit();
|
||||
extent_map_exit();
|
||||
extent_state_cache_exit();
|
||||
extent_io_exit();
|
||||
btrfs_bioset_exit();
|
||||
extent_state_free_cachep();
|
||||
extent_buffer_free_cachep();
|
||||
btrfs_interface_exit();
|
||||
unregister_filesystem(&btrfs_fs_type);
|
||||
btrfs_exit_sysfs();
|
||||
|
172
fs/btrfs/sysfs.c
172
fs/btrfs/sysfs.c
@ -35,12 +35,12 @@
|
||||
* qgroup_attrs /sys/fs/btrfs/<uuid>/qgroups/<level>_<qgroupid>
|
||||
* space_info_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>
|
||||
* raid_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>/<bg-profile>
|
||||
* discard_attrs /sys/fs/btrfs/<uuid>/discard
|
||||
*
|
||||
* When built with BTRFS_CONFIG_DEBUG:
|
||||
*
|
||||
* btrfs_debug_feature_attrs /sys/fs/btrfs/debug
|
||||
* btrfs_debug_mount_attrs /sys/fs/btrfs/<uuid>/debug
|
||||
* discard_debug_attrs /sys/fs/btrfs/<uuid>/debug/discard
|
||||
*/
|
||||
|
||||
struct btrfs_feature_attr {
|
||||
@ -286,6 +286,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
|
||||
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
|
||||
BTRFS_FEAT_ATTR_COMPAT_RO(block_group_tree, BLOCK_GROUP_TREE);
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
|
||||
@ -317,6 +318,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
|
||||
BTRFS_FEAT_ATTR_PTR(metadata_uuid),
|
||||
BTRFS_FEAT_ATTR_PTR(free_space_tree),
|
||||
BTRFS_FEAT_ATTR_PTR(raid1c34),
|
||||
BTRFS_FEAT_ATTR_PTR(block_group_tree),
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
BTRFS_FEAT_ATTR_PTR(zoned),
|
||||
#endif
|
||||
@ -429,12 +431,10 @@ static const struct attribute_group btrfs_static_feature_attr_group = {
|
||||
.attrs = btrfs_supported_static_feature_attrs,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
|
||||
/*
|
||||
* Discard statistics and tunables
|
||||
*/
|
||||
#define discard_to_fs_info(_kobj) to_fs_info((_kobj)->parent->parent)
|
||||
#define discard_to_fs_info(_kobj) to_fs_info(get_btrfs_kobj(_kobj))
|
||||
|
||||
static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
@ -583,11 +583,11 @@ BTRFS_ATTR_RW(discard, max_discard_size, btrfs_discard_max_discard_size_show,
|
||||
btrfs_discard_max_discard_size_store);
|
||||
|
||||
/*
|
||||
* Per-filesystem debugging of discard (when mounted with discard=async).
|
||||
* Per-filesystem stats for discard (when mounted with discard=async).
|
||||
*
|
||||
* Path: /sys/fs/btrfs/<uuid>/debug/discard/
|
||||
* Path: /sys/fs/btrfs/<uuid>/discard/
|
||||
*/
|
||||
static const struct attribute *discard_debug_attrs[] = {
|
||||
static const struct attribute *discard_attrs[] = {
|
||||
BTRFS_ATTR_PTR(discard, discardable_bytes),
|
||||
BTRFS_ATTR_PTR(discard, discardable_extents),
|
||||
BTRFS_ATTR_PTR(discard, discard_bitmap_bytes),
|
||||
@ -599,6 +599,8 @@ static const struct attribute *discard_debug_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
|
||||
/*
|
||||
* Per-filesystem runtime debugging exported via sysfs.
|
||||
*
|
||||
@ -837,11 +839,8 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
ssize_t ret;
|
||||
|
||||
ret = sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
|
||||
|
||||
return ret;
|
||||
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
|
||||
@ -1150,25 +1149,6 @@ static ssize_t btrfs_generation_show(struct kobject *kobj,
|
||||
}
|
||||
BTRFS_ATTR(, generation, btrfs_generation_show);
|
||||
|
||||
/*
|
||||
* Look for an exact string @string in @buffer with possible leading or
|
||||
* trailing whitespace
|
||||
*/
|
||||
static bool strmatch(const char *buffer, const char *string)
|
||||
{
|
||||
const size_t len = strlen(string);
|
||||
|
||||
/* Skip leading whitespace */
|
||||
buffer = skip_spaces(buffer);
|
||||
|
||||
/* Match entire string, check if the rest is whitespace or empty */
|
||||
if (strncmp(string, buffer, len) == 0 &&
|
||||
strlen(skip_spaces(buffer + len)) == 0)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char * const btrfs_read_policy_name[] = { "pid" };
|
||||
|
||||
static ssize_t btrfs_read_policy_show(struct kobject *kobj,
|
||||
@ -1202,7 +1182,7 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
|
||||
if (strmatch(buf, btrfs_read_policy_name[i])) {
|
||||
if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
|
||||
if (i != fs_devices->read_policy) {
|
||||
fs_devices->read_policy = i;
|
||||
btrfs_info(fs_devices->fs_info,
|
||||
@ -1222,11 +1202,8 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
ssize_t ret;
|
||||
|
||||
ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
|
||||
|
||||
return ret;
|
||||
return sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
|
||||
@ -1427,13 +1404,12 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
|
||||
kobject_del(fs_info->space_info_kobj);
|
||||
kobject_put(fs_info->space_info_kobj);
|
||||
}
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (fs_info->discard_debug_kobj) {
|
||||
sysfs_remove_files(fs_info->discard_debug_kobj,
|
||||
discard_debug_attrs);
|
||||
kobject_del(fs_info->discard_debug_kobj);
|
||||
kobject_put(fs_info->discard_debug_kobj);
|
||||
if (fs_info->discard_kobj) {
|
||||
sysfs_remove_files(fs_info->discard_kobj, discard_attrs);
|
||||
kobject_del(fs_info->discard_kobj);
|
||||
kobject_put(fs_info->discard_kobj);
|
||||
}
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (fs_info->debug_kobj) {
|
||||
sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
|
||||
kobject_del(fs_info->debug_kobj);
|
||||
@ -2001,20 +1977,18 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
|
||||
error = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
|
||||
if (error)
|
||||
goto failure;
|
||||
#endif
|
||||
|
||||
/* Discard directory */
|
||||
fs_info->discard_debug_kobj = kobject_create_and_add("discard",
|
||||
fs_info->debug_kobj);
|
||||
if (!fs_info->discard_debug_kobj) {
|
||||
fs_info->discard_kobj = kobject_create_and_add("discard", fsid_kobj);
|
||||
if (!fs_info->discard_kobj) {
|
||||
error = -ENOMEM;
|
||||
goto failure;
|
||||
}
|
||||
|
||||
error = sysfs_create_files(fs_info->discard_debug_kobj,
|
||||
discard_debug_attrs);
|
||||
error = sysfs_create_files(fs_info->discard_kobj, discard_attrs);
|
||||
if (error)
|
||||
goto failure;
|
||||
#endif
|
||||
|
||||
error = addrm_unknown_feature_attrs(fs_info, true);
|
||||
if (error)
|
||||
@ -2041,6 +2015,98 @@ failure:
|
||||
return error;
|
||||
}
|
||||
|
||||
static ssize_t qgroup_enabled_show(struct kobject *qgroups_kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
|
||||
bool enabled;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
enabled = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON;
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
return sysfs_emit(buf, "%d\n", enabled);
|
||||
}
|
||||
BTRFS_ATTR(qgroups, enabled, qgroup_enabled_show);
|
||||
|
||||
static ssize_t qgroup_inconsistent_show(struct kobject *qgroups_kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
|
||||
bool inconsistent;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
inconsistent = (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT);
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
return sysfs_emit(buf, "%d\n", inconsistent);
|
||||
}
|
||||
BTRFS_ATTR(qgroups, inconsistent, qgroup_inconsistent_show);
|
||||
|
||||
static ssize_t qgroup_drop_subtree_thres_show(struct kobject *qgroups_kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
|
||||
u8 result;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
result = fs_info->qgroup_drop_subtree_thres;
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
return sysfs_emit(buf, "%d\n", result);
|
||||
}
|
||||
|
||||
static ssize_t qgroup_drop_subtree_thres_store(struct kobject *qgroups_kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
|
||||
u8 new_thres;
|
||||
int ret;
|
||||
|
||||
ret = kstrtou8(buf, 10, &new_thres);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
if (new_thres > BTRFS_MAX_LEVEL)
|
||||
return -EINVAL;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
fs_info->qgroup_drop_subtree_thres = new_thres;
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
return len;
|
||||
}
|
||||
BTRFS_ATTR_RW(qgroups, drop_subtree_threshold, qgroup_drop_subtree_thres_show,
|
||||
qgroup_drop_subtree_thres_store);
|
||||
|
||||
/*
|
||||
* Qgroups global info
|
||||
*
|
||||
* Path: /sys/fs/btrfs/<uuid>/qgroups/
|
||||
*/
|
||||
static struct attribute *qgroups_attrs[] = {
|
||||
BTRFS_ATTR_PTR(qgroups, enabled),
|
||||
BTRFS_ATTR_PTR(qgroups, inconsistent),
|
||||
BTRFS_ATTR_PTR(qgroups, drop_subtree_threshold),
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(qgroups);
|
||||
|
||||
static void qgroups_release(struct kobject *kobj)
|
||||
{
|
||||
kfree(kobj);
|
||||
}
|
||||
|
||||
static struct kobj_type qgroups_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.default_groups = qgroups_groups,
|
||||
.release = qgroups_release,
|
||||
};
|
||||
|
||||
static inline struct btrfs_fs_info *qgroup_kobj_to_fs_info(struct kobject *kobj)
|
||||
{
|
||||
return to_fs_info(kobj->parent->parent);
|
||||
@ -2166,11 +2232,15 @@ int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info)
|
||||
if (fs_info->qgroups_kobj)
|
||||
return 0;
|
||||
|
||||
fs_info->qgroups_kobj = kobject_create_and_add("qgroups", fsid_kobj);
|
||||
if (!fs_info->qgroups_kobj) {
|
||||
ret = -ENOMEM;
|
||||
fs_info->qgroups_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
|
||||
if (!fs_info->qgroups_kobj)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = kobject_init_and_add(fs_info->qgroups_kobj, &qgroups_ktype,
|
||||
fsid_kobj, "qgroups");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
rbtree_postorder_for_each_entry_safe(qgroup, next,
|
||||
&fs_info->qgroup_tree, node) {
|
||||
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);
|
||||
|
@ -243,7 +243,7 @@ void btrfs_free_dummy_block_group(struct btrfs_block_group *cache)
|
||||
{
|
||||
if (!cache)
|
||||
return;
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
kfree(cache->free_space_ctl);
|
||||
kfree(cache);
|
||||
}
|
||||
|
@ -80,7 +80,6 @@ static void extent_flag_to_str(const struct extent_state *state, char *dest)
|
||||
PRINT_ONE_FLAG(state, dest, cur, NODATASUM);
|
||||
PRINT_ONE_FLAG(state, dest, cur, CLEAR_META_RESV);
|
||||
PRINT_ONE_FLAG(state, dest, cur, NEED_WAIT);
|
||||
PRINT_ONE_FLAG(state, dest, cur, DAMAGED);
|
||||
PRINT_ONE_FLAG(state, dest, cur, NORESERVE);
|
||||
PRINT_ONE_FLAG(state, dest, cur, QGROUP_RESERVED);
|
||||
PRINT_ONE_FLAG(state, dest, cur, CLEAR_DATA_RESV);
|
||||
@ -172,7 +171,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
sectorsize - 1, start, end);
|
||||
goto out_bits;
|
||||
}
|
||||
unlock_extent(tmp, start, end);
|
||||
unlock_extent(tmp, start, end, NULL);
|
||||
unlock_page(locked_page);
|
||||
put_page(locked_page);
|
||||
|
||||
@ -208,7 +207,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
test_err("there were unlocked pages in the range");
|
||||
goto out_bits;
|
||||
}
|
||||
unlock_extent(tmp, start, end);
|
||||
unlock_extent(tmp, start, end, NULL);
|
||||
/* locked_page was unlocked above */
|
||||
put_page(locked_page);
|
||||
|
||||
@ -263,7 +262,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
test_err("pages in range were not all locked");
|
||||
goto out_bits;
|
||||
}
|
||||
unlock_extent(tmp, start, end);
|
||||
unlock_extent(tmp, start, end, NULL);
|
||||
|
||||
/*
|
||||
* Now to test where we run into a page that is no longer dirty in the
|
||||
|
@ -82,7 +82,7 @@ static int test_extents(struct btrfs_block_group *cache)
|
||||
}
|
||||
|
||||
/* Cleanup */
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -149,7 +149,7 @@ static int test_bitmaps(struct btrfs_block_group *cache, u32 sectorsize)
|
||||
return -1;
|
||||
}
|
||||
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -230,7 +230,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
|
||||
return -1;
|
||||
}
|
||||
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
|
||||
/* Now with the extent entry offset into the bitmap */
|
||||
ret = test_add_free_space_entry(cache, SZ_4M, SZ_4M, 1);
|
||||
@ -266,7 +266,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
|
||||
* [ bitmap ]
|
||||
* [ del ]
|
||||
*/
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
ret = test_add_free_space_entry(cache, bitmap_offset + SZ_4M, SZ_4M, 1);
|
||||
if (ret) {
|
||||
test_err("couldn't add bitmap %d", ret);
|
||||
@ -291,7 +291,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
|
||||
return -1;
|
||||
}
|
||||
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
|
||||
/*
|
||||
* This blew up before, we have part of the free space in a bitmap and
|
||||
@ -317,7 +317,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
|
||||
return ret;
|
||||
}
|
||||
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -629,7 +629,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group *cache,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
|
||||
/*
|
||||
* Now test a similar scenario, but where our extent entry is located
|
||||
@ -819,7 +819,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group *cache,
|
||||
return ret;
|
||||
|
||||
cache->free_space_ctl->op = orig_free_space_ops;
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -868,7 +868,7 @@ static int test_bytes_index(struct btrfs_block_group *cache, u32 sectorsize)
|
||||
}
|
||||
|
||||
/* Now validate bitmaps do the correct thing. */
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
for (i = 0; i < 2; i++) {
|
||||
offset = i * BITS_PER_BITMAP * sectorsize;
|
||||
bytes = (i + 1) * SZ_1M;
|
||||
@ -891,7 +891,7 @@ static int test_bytes_index(struct btrfs_block_group *cache, u32 sectorsize)
|
||||
}
|
||||
|
||||
/* Now validate bitmaps with different ->max_extent_size. */
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
orig_free_space_ops = cache->free_space_ctl->op;
|
||||
cache->free_space_ctl->op = &test_free_space_ops;
|
||||
|
||||
@ -998,7 +998,7 @@ static int test_bytes_index(struct btrfs_block_group *cache, u32 sectorsize)
|
||||
}
|
||||
|
||||
cache->free_space_ctl->op = orig_free_space_ops;
|
||||
__btrfs_remove_free_space_cache(cache->free_space_ctl);
|
||||
btrfs_remove_free_space_cache(cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -267,7 +267,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
goto out;
|
||||
}
|
||||
free_extent_map(em);
|
||||
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false);
|
||||
|
||||
/*
|
||||
* All of the magic numbers are based on the mapping setup in
|
||||
@ -975,7 +975,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
BTRFS_MAX_EXTENT_SIZE >> 1,
|
||||
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
|
||||
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_UPTODATE, 0, 0, NULL);
|
||||
EXTENT_UPTODATE, NULL);
|
||||
if (ret) {
|
||||
test_err("clear_extent_bit returned %d", ret);
|
||||
goto out;
|
||||
@ -1043,7 +1043,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
BTRFS_MAX_EXTENT_SIZE + sectorsize,
|
||||
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
|
||||
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_UPTODATE, 0, 0, NULL);
|
||||
EXTENT_UPTODATE, NULL);
|
||||
if (ret) {
|
||||
test_err("clear_extent_bit returned %d", ret);
|
||||
goto out;
|
||||
@ -1076,7 +1076,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
|
||||
/* Empty */
|
||||
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
|
||||
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_UPTODATE, 0, 0, NULL);
|
||||
EXTENT_UPTODATE, NULL);
|
||||
if (ret) {
|
||||
test_err("clear_extent_bit returned %d", ret);
|
||||
goto out;
|
||||
@ -1092,7 +1092,7 @@ out:
|
||||
if (ret)
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
|
||||
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
|
||||
EXTENT_UPTODATE, 0, 0, NULL);
|
||||
EXTENT_UPTODATE, NULL);
|
||||
iput(inode);
|
||||
btrfs_free_dummy_root(root);
|
||||
btrfs_free_dummy_fs_info(fs_info);
|
||||
|
@ -161,7 +161,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
||||
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *root, *tmp;
|
||||
struct btrfs_caching_control *caching_ctl, *next;
|
||||
|
||||
/*
|
||||
* At this point no one can be using this transaction to modify any tree
|
||||
@ -196,46 +195,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
||||
}
|
||||
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||
|
||||
/*
|
||||
* We have to update the last_byte_to_unpin under the commit_root_sem,
|
||||
* at the same time we swap out the commit roots.
|
||||
*
|
||||
* This is because we must have a real view of the last spot the caching
|
||||
* kthreads were while caching. Consider the following views of the
|
||||
* extent tree for a block group
|
||||
*
|
||||
* commit root
|
||||
* +----+----+----+----+----+----+----+
|
||||
* |\\\\| |\\\\|\\\\| |\\\\|\\\\|
|
||||
* +----+----+----+----+----+----+----+
|
||||
* 0 1 2 3 4 5 6 7
|
||||
*
|
||||
* new commit root
|
||||
* +----+----+----+----+----+----+----+
|
||||
* | | | |\\\\| | |\\\\|
|
||||
* +----+----+----+----+----+----+----+
|
||||
* 0 1 2 3 4 5 6 7
|
||||
*
|
||||
* If the cache_ctl->progress was at 3, then we are only allowed to
|
||||
* unpin [0,1) and [2,3], because the caching thread has already
|
||||
* processed those extents. We are not allowed to unpin [5,6), because
|
||||
* the caching thread will re-start it's search from 3, and thus find
|
||||
* the hole from [4,6) to add to the free space cache.
|
||||
*/
|
||||
write_lock(&fs_info->block_group_cache_lock);
|
||||
list_for_each_entry_safe(caching_ctl, next,
|
||||
&fs_info->caching_block_groups, list) {
|
||||
struct btrfs_block_group *cache = caching_ctl->block_group;
|
||||
|
||||
if (btrfs_block_group_done(cache)) {
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
list_del_init(&caching_ctl->list);
|
||||
btrfs_put_caching_control(caching_ctl);
|
||||
} else {
|
||||
cache->last_byte_to_unpin = caching_ctl->progress;
|
||||
}
|
||||
}
|
||||
write_unlock(&fs_info->block_group_cache_lock);
|
||||
up_write(&fs_info->commit_root_sem);
|
||||
}
|
||||
|
||||
@ -313,6 +272,8 @@ loop:
|
||||
atomic_inc(&cur_trans->num_writers);
|
||||
extwriter_counter_inc(cur_trans, type);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
|
||||
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
@ -334,16 +295,23 @@ loop:
|
||||
if (!cur_trans)
|
||||
return -ENOMEM;
|
||||
|
||||
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
|
||||
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (fs_info->running_transaction) {
|
||||
/*
|
||||
* someone started a transaction after we unlocked. Make sure
|
||||
* to redo the checks above
|
||||
*/
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
|
||||
kfree(cur_trans);
|
||||
goto loop;
|
||||
} else if (BTRFS_FS_ERROR(fs_info)) {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
|
||||
kfree(cur_trans);
|
||||
return -EROFS;
|
||||
}
|
||||
@ -397,7 +365,7 @@ loop:
|
||||
spin_lock_init(&cur_trans->releasing_ebs_lock);
|
||||
list_add_tail(&cur_trans->list, &fs_info->trans_list);
|
||||
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
|
||||
IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
|
||||
IO_TREE_TRANS_DIRTY_PAGES, NULL);
|
||||
extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
|
||||
IO_TREE_FS_PINNED_EXTENTS, NULL);
|
||||
fs_info->generation++;
|
||||
@ -541,6 +509,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
|
||||
refcount_inc(&cur_trans->use_count);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
|
||||
wait_event(fs_info->transaction_wait,
|
||||
cur_trans->state >= TRANS_STATE_UNBLOCKED ||
|
||||
TRANS_ABORTED(cur_trans));
|
||||
@ -625,7 +594,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
|
||||
*/
|
||||
num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
|
||||
if (flush == BTRFS_RESERVE_FLUSH_ALL &&
|
||||
delayed_refs_rsv->full == 0) {
|
||||
btrfs_block_rsv_full(delayed_refs_rsv) == 0) {
|
||||
delayed_refs_bytes = num_bytes;
|
||||
num_bytes <<= 1;
|
||||
}
|
||||
@ -650,7 +619,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
|
||||
if (rsv->space_info->force_alloc)
|
||||
do_chunk_alloc = true;
|
||||
} else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
|
||||
!delayed_refs_rsv->full) {
|
||||
!btrfs_block_rsv_full(delayed_refs_rsv)) {
|
||||
/*
|
||||
* Some people call with btrfs_start_transaction(root, 0)
|
||||
* because they can be throttled, but have some other mechanism
|
||||
@ -859,6 +828,15 @@ static noinline void wait_for_commit(struct btrfs_transaction *commit,
|
||||
u64 transid = commit->transid;
|
||||
bool put = false;
|
||||
|
||||
/*
|
||||
* At the moment this function is called with min_state either being
|
||||
* TRANS_STATE_COMPLETED or TRANS_STATE_SUPER_COMMITTED.
|
||||
*/
|
||||
if (min_state == TRANS_STATE_COMPLETED)
|
||||
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
|
||||
else
|
||||
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
|
||||
|
||||
while (1) {
|
||||
wait_event(commit->commit_wait, commit->state >= min_state);
|
||||
if (put)
|
||||
@ -1022,6 +1000,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
|
||||
extwriter_counter_dec(cur_trans, trans->type);
|
||||
|
||||
cond_wake_up(&cur_trans->writer_wait);
|
||||
|
||||
btrfs_lockdep_release(info, btrfs_trans_num_extwriters);
|
||||
btrfs_lockdep_release(info, btrfs_trans_num_writers);
|
||||
|
||||
btrfs_put_transaction(cur_trans);
|
||||
|
||||
if (current->journal_info == trans)
|
||||
@ -1134,7 +1116,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
* it's safe to do it (through extent_io_tree_release()).
|
||||
*/
|
||||
err = clear_extent_bit(dirty_pages, start, end,
|
||||
EXTENT_NEED_WAIT, 0, 0, &cached_state);
|
||||
EXTENT_NEED_WAIT, &cached_state);
|
||||
if (err == -ENOMEM)
|
||||
err = 0;
|
||||
if (!err)
|
||||
@ -1912,14 +1894,6 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
|
||||
super->cache_generation = 0;
|
||||
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
|
||||
super->uuid_tree_generation = root_item->generation;
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
|
||||
root_item = &fs_info->block_group_root->root_item;
|
||||
|
||||
super->block_group_root = root_item->bytenr;
|
||||
super->block_group_root_generation = root_item->generation;
|
||||
super->block_group_root_level = root_item->level;
|
||||
}
|
||||
}
|
||||
|
||||
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
|
||||
@ -1967,6 +1941,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
|
||||
* Wait for the current transaction commit to start and block
|
||||
* subsequent transaction joins
|
||||
*/
|
||||
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
wait_event(fs_info->transaction_blocked_wait,
|
||||
cur_trans->state >= TRANS_STATE_COMMIT_START ||
|
||||
TRANS_ABORTED(cur_trans));
|
||||
@ -1994,6 +1969,12 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
|
||||
if (cur_trans == fs_info->running_transaction) {
|
||||
cur_trans->state = TRANS_STATE_COMMIT_DOING;
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
/*
|
||||
* The thread has already released the lockdep map as reader
|
||||
* already in btrfs_commit_transaction().
|
||||
*/
|
||||
btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
|
||||
wait_event(cur_trans->writer_wait,
|
||||
atomic_read(&cur_trans->num_writers) == 1);
|
||||
|
||||
@ -2118,12 +2099,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
ktime_t interval;
|
||||
|
||||
ASSERT(refcount_read(&trans->use_count) == 1);
|
||||
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
|
||||
/* Stop the commit early if ->aborted is set */
|
||||
if (TRANS_ABORTED(cur_trans)) {
|
||||
ret = cur_trans->aborted;
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
goto lockdep_trans_commit_start_release;
|
||||
}
|
||||
|
||||
btrfs_trans_release_metadata(trans);
|
||||
@ -2140,10 +2121,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
* Any running threads may add more while we are here.
|
||||
*/
|
||||
ret = btrfs_run_delayed_refs(trans, 0);
|
||||
if (ret) {
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
}
|
||||
if (ret)
|
||||
goto lockdep_trans_commit_start_release;
|
||||
}
|
||||
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
@ -2172,10 +2151,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
if (run_it) {
|
||||
ret = btrfs_start_dirty_block_groups(trans);
|
||||
if (ret) {
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
}
|
||||
if (ret)
|
||||
goto lockdep_trans_commit_start_release;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2190,6 +2167,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
if (trans->in_fsync)
|
||||
want_state = TRANS_STATE_SUPER_COMMITTED;
|
||||
|
||||
btrfs_trans_state_lockdep_release(fs_info,
|
||||
BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
ret = btrfs_end_transaction(trans);
|
||||
wait_for_commit(cur_trans, want_state);
|
||||
|
||||
@ -2203,6 +2183,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
cur_trans->state = TRANS_STATE_COMMIT_START;
|
||||
wake_up(&fs_info->transaction_blocked_wait);
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
|
||||
if (cur_trans->list.prev != &fs_info->trans_list) {
|
||||
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
|
||||
@ -2222,7 +2203,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
btrfs_put_transaction(prev_trans);
|
||||
if (ret)
|
||||
goto cleanup_transaction;
|
||||
goto lockdep_release;
|
||||
} else {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
}
|
||||
@ -2236,7 +2217,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
*/
|
||||
if (BTRFS_FS_ERROR(fs_info)) {
|
||||
ret = -EROFS;
|
||||
goto cleanup_transaction;
|
||||
goto lockdep_release;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2250,19 +2231,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
ret = btrfs_start_delalloc_flush(fs_info);
|
||||
if (ret)
|
||||
goto cleanup_transaction;
|
||||
goto lockdep_release;
|
||||
|
||||
ret = btrfs_run_delayed_items(trans);
|
||||
if (ret)
|
||||
goto cleanup_transaction;
|
||||
goto lockdep_release;
|
||||
|
||||
/*
|
||||
* The thread has started/joined the transaction thus it holds the
|
||||
* lockdep map as a reader. It has to release it before acquiring the
|
||||
* lockdep map as a writer.
|
||||
*/
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
|
||||
btrfs_might_wait_for_event(fs_info, btrfs_trans_num_extwriters);
|
||||
wait_event(cur_trans->writer_wait,
|
||||
extwriter_counter_read(cur_trans) == 0);
|
||||
|
||||
/* some pending stuffs might be added after the previous flush. */
|
||||
ret = btrfs_run_delayed_items(trans);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
|
||||
goto cleanup_transaction;
|
||||
}
|
||||
|
||||
btrfs_wait_delalloc_flush(fs_info);
|
||||
|
||||
@ -2271,6 +2261,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
* transaction. Otherwise if this transaction commits before the ordered
|
||||
* extents complete we lose logged data after a power failure.
|
||||
*/
|
||||
btrfs_might_wait_for_event(fs_info, btrfs_trans_pending_ordered);
|
||||
wait_event(cur_trans->pending_wait,
|
||||
atomic_read(&cur_trans->pending_ordered) == 0);
|
||||
|
||||
@ -2284,9 +2275,27 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
add_pending_snapshot(trans);
|
||||
cur_trans->state = TRANS_STATE_COMMIT_DOING;
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
/*
|
||||
* The thread has started/joined the transaction thus it holds the
|
||||
* lockdep map as a reader. It has to release it before acquiring the
|
||||
* lockdep map as a writer.
|
||||
*/
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
|
||||
btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
|
||||
wait_event(cur_trans->writer_wait,
|
||||
atomic_read(&cur_trans->num_writers) == 1);
|
||||
|
||||
/*
|
||||
* Make lockdep happy by acquiring the state locks after
|
||||
* btrfs_trans_num_writers is released. If we acquired the state locks
|
||||
* before releasing the btrfs_trans_num_writers lock then lockdep would
|
||||
* complain because we did not follow the reverse order unlocking rule.
|
||||
*/
|
||||
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
|
||||
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
|
||||
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
|
||||
|
||||
/*
|
||||
* We've started the commit, clear the flag in case we were triggered to
|
||||
* do an async commit but somebody else started before the transaction
|
||||
@ -2296,6 +2305,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
if (TRANS_ABORTED(cur_trans)) {
|
||||
ret = cur_trans->aborted;
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
|
||||
goto scrub_continue;
|
||||
}
|
||||
/*
|
||||
@ -2430,6 +2440,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
|
||||
wake_up(&fs_info->transaction_wait);
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
|
||||
|
||||
ret = btrfs_write_and_wait_transaction(trans);
|
||||
if (ret) {
|
||||
@ -2461,6 +2472,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
*/
|
||||
cur_trans->state = TRANS_STATE_SUPER_COMMITTED;
|
||||
wake_up(&cur_trans->commit_wait);
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
|
||||
|
||||
btrfs_finish_extent_commit(trans);
|
||||
|
||||
@ -2474,6 +2486,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
*/
|
||||
cur_trans->state = TRANS_STATE_COMPLETED;
|
||||
wake_up(&cur_trans->commit_wait);
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
list_del_init(&cur_trans->list);
|
||||
@ -2502,7 +2515,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
unlock_reloc:
|
||||
mutex_unlock(&fs_info->reloc_mutex);
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
|
||||
scrub_continue:
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
|
||||
btrfs_scrub_continue(fs_info);
|
||||
cleanup_transaction:
|
||||
btrfs_trans_release_metadata(trans);
|
||||
@ -2515,6 +2531,16 @@ cleanup_transaction:
|
||||
cleanup_transaction(trans, ret);
|
||||
|
||||
return ret;
|
||||
|
||||
lockdep_release:
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
|
||||
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
|
||||
goto cleanup_transaction;
|
||||
|
||||
lockdep_trans_commit_start_release:
|
||||
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
1543
fs/btrfs/tree-log.c
1543
fs/btrfs/tree-log.c
File diff suppressed because it is too large
Load Diff
@ -20,6 +20,7 @@ struct btrfs_log_ctx {
|
||||
int log_transid;
|
||||
bool log_new_dentries;
|
||||
bool logging_new_name;
|
||||
bool logging_new_delayed_dentries;
|
||||
/* Indicate if the inode being logged was logged before. */
|
||||
bool logged_before;
|
||||
/* Tracks the last logged dir item/index key offset. */
|
||||
@ -28,6 +29,9 @@ struct btrfs_log_ctx {
|
||||
struct list_head list;
|
||||
/* Only used for fast fsyncs. */
|
||||
struct list_head ordered_extents;
|
||||
struct list_head conflict_inodes;
|
||||
int num_conflict_inodes;
|
||||
bool logging_conflict_inodes;
|
||||
};
|
||||
|
||||
static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
|
||||
@ -37,10 +41,14 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
|
||||
ctx->log_transid = 0;
|
||||
ctx->log_new_dentries = false;
|
||||
ctx->logging_new_name = false;
|
||||
ctx->logging_new_delayed_dentries = false;
|
||||
ctx->logged_before = false;
|
||||
ctx->inode = inode;
|
||||
INIT_LIST_HEAD(&ctx->list);
|
||||
INIT_LIST_HEAD(&ctx->ordered_extents);
|
||||
INIT_LIST_HEAD(&ctx->conflict_inodes);
|
||||
ctx->num_conflict_inodes = 0;
|
||||
ctx->logging_conflict_inodes = false;
|
||||
}
|
||||
|
||||
static inline void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
|
||||
|
@ -659,8 +659,7 @@ rollback:
|
||||
*
|
||||
* Returns the size on success or a negative error code on failure.
|
||||
*/
|
||||
static int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
|
||||
size_t buf_size)
|
||||
int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size)
|
||||
{
|
||||
u64 true_size;
|
||||
int ret = 0;
|
||||
|
@ -34,6 +34,8 @@
|
||||
#include "discard.h"
|
||||
#include "zoned.h"
|
||||
|
||||
static struct bio_set btrfs_bioset;
|
||||
|
||||
#define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
|
||||
BTRFS_BLOCK_GROUP_RAID10 | \
|
||||
BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||
@ -247,10 +249,10 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans);
|
||||
static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
|
||||
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
|
||||
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
enum btrfs_map_op op, u64 logical, u64 *length,
|
||||
struct btrfs_io_context **bioc_ret,
|
||||
int mirror_num, int need_raid_map);
|
||||
struct btrfs_io_stripe *smap,
|
||||
int *mirror_num_ret, int need_raid_map);
|
||||
|
||||
/*
|
||||
* Device locking
|
||||
@ -2017,7 +2019,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
|
||||
struct page *page;
|
||||
int ret;
|
||||
|
||||
disk_super = btrfs_read_dev_one_super(bdev, copy_num);
|
||||
disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
|
||||
if (IS_ERR(disk_super))
|
||||
continue;
|
||||
|
||||
@ -5595,7 +5597,7 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
bg->chunk_item_inserted = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED, &bg->runtime_flags);
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
|
||||
ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
|
||||
@ -5896,7 +5898,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
|
||||
sizeof(u64) * (total_stripes),
|
||||
GFP_NOFS|__GFP_NOFAIL);
|
||||
|
||||
atomic_set(&bioc->error, 0);
|
||||
refcount_set(&bioc->refs, 1);
|
||||
|
||||
bioc->fs_info = fs_info;
|
||||
@ -6092,7 +6093,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
|
||||
int ret = 0;
|
||||
|
||||
ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
|
||||
logical, &length, &bioc, 0, 0);
|
||||
logical, &length, &bioc, NULL, NULL, 0);
|
||||
if (ret) {
|
||||
ASSERT(bioc == NULL);
|
||||
return ret;
|
||||
@ -6153,9 +6154,7 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, logical);
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
ret = cache->to_copy;
|
||||
spin_unlock(&cache->lock);
|
||||
ret = test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
return ret;
|
||||
@ -6351,11 +6350,19 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
|
||||
u32 stripe_index, u64 stripe_offset, u64 stripe_nr)
|
||||
{
|
||||
dst->dev = map->stripes[stripe_index].dev;
|
||||
dst->physical = map->stripes[stripe_index].physical +
|
||||
stripe_offset + stripe_nr * map->stripe_len;
|
||||
}
|
||||
|
||||
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
enum btrfs_map_op op, u64 logical, u64 *length,
|
||||
struct btrfs_io_context **bioc_ret,
|
||||
int mirror_num, int need_raid_map)
|
||||
struct btrfs_io_stripe *smap,
|
||||
int *mirror_num_ret, int need_raid_map)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
@ -6366,6 +6373,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
int data_stripes;
|
||||
int i;
|
||||
int ret = 0;
|
||||
int mirror_num = (mirror_num_ret ? *mirror_num_ret : 0);
|
||||
int num_stripes;
|
||||
int max_errors = 0;
|
||||
int tgtdev_indexes = 0;
|
||||
@ -6526,6 +6534,29 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
tgtdev_indexes = num_stripes;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this I/O maps to a single device, try to return the device and
|
||||
* physical block information on the stack instead of allocating an
|
||||
* I/O context structure.
|
||||
*/
|
||||
if (smap && num_alloc_stripes == 1 &&
|
||||
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
|
||||
(!need_full_stripe(op) || !dev_replace_is_ongoing ||
|
||||
!dev_replace->tgtdev)) {
|
||||
if (patch_the_first_stripe_for_dev_replace) {
|
||||
smap->dev = dev_replace->tgtdev;
|
||||
smap->physical = physical_to_patch_in_first_stripe;
|
||||
*mirror_num_ret = map->num_stripes + 1;
|
||||
} else {
|
||||
set_io_stripe(smap, map, stripe_index, stripe_offset,
|
||||
stripe_nr);
|
||||
*mirror_num_ret = mirror_num;
|
||||
}
|
||||
*bioc_ret = NULL;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes);
|
||||
if (!bioc) {
|
||||
ret = -ENOMEM;
|
||||
@ -6533,9 +6564,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
for (i = 0; i < num_stripes; i++) {
|
||||
bioc->stripes[i].physical = map->stripes[stripe_index].physical +
|
||||
stripe_offset + stripe_nr * map->stripe_len;
|
||||
bioc->stripes[i].dev = map->stripes[stripe_index].dev;
|
||||
set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset,
|
||||
stripe_nr);
|
||||
stripe_index++;
|
||||
}
|
||||
|
||||
@ -6603,7 +6633,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
struct btrfs_io_context **bioc_ret, int mirror_num)
|
||||
{
|
||||
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
|
||||
mirror_num, 0);
|
||||
NULL, &mirror_num, 0);
|
||||
}
|
||||
|
||||
/* For Scrub/replace */
|
||||
@ -6611,14 +6641,77 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
struct btrfs_io_context **bioc_ret)
|
||||
{
|
||||
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
|
||||
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
|
||||
NULL, NULL, 1);
|
||||
}
|
||||
|
||||
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
|
||||
/*
|
||||
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
|
||||
* is already initialized by the block layer.
|
||||
*/
|
||||
static inline void btrfs_bio_init(struct btrfs_bio *bbio,
|
||||
btrfs_bio_end_io_t end_io, void *private)
|
||||
{
|
||||
if (bioc->orig_bio->bi_opf & REQ_META)
|
||||
return bioc->fs_info->endio_meta_workers;
|
||||
return bioc->fs_info->endio_workers;
|
||||
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
|
||||
bbio->end_io = end_io;
|
||||
bbio->private = private;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
|
||||
* btrfs, and is used for all I/O submitted through btrfs_submit_bio.
|
||||
*
|
||||
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
|
||||
* a mempool.
|
||||
*/
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
|
||||
btrfs_bio_end_io_t end_io, void *private)
|
||||
{
|
||||
struct bio *bio;
|
||||
|
||||
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
|
||||
btrfs_bio_init(btrfs_bio(bio), end_io, private);
|
||||
return bio;
|
||||
}
|
||||
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
|
||||
btrfs_bio_end_io_t end_io, void *private)
|
||||
{
|
||||
struct bio *bio;
|
||||
struct btrfs_bio *bbio;
|
||||
|
||||
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
|
||||
|
||||
bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
|
||||
bbio = btrfs_bio(bio);
|
||||
btrfs_bio_init(bbio, end_io, private);
|
||||
|
||||
bio_trim(bio, offset >> 9, size >> 9);
|
||||
bbio->iter = bio->bi_iter;
|
||||
return bio;
|
||||
}
|
||||
|
||||
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
|
||||
{
|
||||
if (!dev || !dev->bdev)
|
||||
return;
|
||||
if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
|
||||
return;
|
||||
|
||||
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
|
||||
if (!(bio->bi_opf & REQ_RAHEAD))
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
|
||||
}
|
||||
|
||||
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
|
||||
struct bio *bio)
|
||||
{
|
||||
if (bio->bi_opf & REQ_META)
|
||||
return fs_info->endio_meta_workers;
|
||||
return fs_info->endio_workers;
|
||||
}
|
||||
|
||||
static void btrfs_end_bio_work(struct work_struct *work)
|
||||
@ -6626,103 +6719,101 @@ static void btrfs_end_bio_work(struct work_struct *work)
|
||||
struct btrfs_bio *bbio =
|
||||
container_of(work, struct btrfs_bio, end_io_work);
|
||||
|
||||
bio_endio(&bbio->bio);
|
||||
bbio->end_io(bbio);
|
||||
}
|
||||
|
||||
static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
|
||||
static void btrfs_simple_end_io(struct bio *bio)
|
||||
{
|
||||
struct bio *orig_bio = bioc->orig_bio;
|
||||
struct btrfs_bio *bbio = btrfs_bio(orig_bio);
|
||||
struct btrfs_fs_info *fs_info = bio->bi_private;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
|
||||
if (bio->bi_status)
|
||||
btrfs_log_dev_io_error(bio, bbio->device);
|
||||
|
||||
if (bio_op(bio) == REQ_OP_READ) {
|
||||
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
|
||||
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
|
||||
} else {
|
||||
bbio->end_io(bbio);
|
||||
}
|
||||
}
|
||||
|
||||
static void btrfs_raid56_end_io(struct bio *bio)
|
||||
{
|
||||
struct btrfs_io_context *bioc = bio->bi_private;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
|
||||
btrfs_bio_counter_dec(bioc->fs_info);
|
||||
bbio->mirror_num = bioc->mirror_num;
|
||||
orig_bio->bi_private = bioc->private;
|
||||
orig_bio->bi_end_io = bioc->end_io;
|
||||
bbio->end_io(bbio);
|
||||
|
||||
btrfs_put_bioc(bioc);
|
||||
}
|
||||
|
||||
static void btrfs_orig_write_end_io(struct bio *bio)
|
||||
{
|
||||
struct btrfs_io_stripe *stripe = bio->bi_private;
|
||||
struct btrfs_io_context *bioc = stripe->bioc;
|
||||
struct btrfs_bio *bbio = btrfs_bio(bio);
|
||||
|
||||
btrfs_bio_counter_dec(bioc->fs_info);
|
||||
|
||||
if (bio->bi_status) {
|
||||
atomic_inc(&bioc->error);
|
||||
btrfs_log_dev_io_error(bio, stripe->dev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Only send an error to the higher layers if it is beyond the tolerance
|
||||
* threshold.
|
||||
*/
|
||||
if (atomic_read(&bioc->error) > bioc->max_errors)
|
||||
orig_bio->bi_status = BLK_STS_IOERR;
|
||||
bio->bi_status = BLK_STS_IOERR;
|
||||
else
|
||||
orig_bio->bi_status = BLK_STS_OK;
|
||||
|
||||
if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
|
||||
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
|
||||
queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
|
||||
} else {
|
||||
bio_endio(orig_bio);
|
||||
}
|
||||
bio->bi_status = BLK_STS_OK;
|
||||
|
||||
bbio->end_io(bbio);
|
||||
btrfs_put_bioc(bioc);
|
||||
}
|
||||
|
||||
static void btrfs_end_bio(struct bio *bio)
|
||||
static void btrfs_clone_write_end_io(struct bio *bio)
|
||||
{
|
||||
struct btrfs_io_stripe *stripe = bio->bi_private;
|
||||
struct btrfs_io_context *bioc = stripe->bioc;
|
||||
|
||||
if (bio->bi_status) {
|
||||
atomic_inc(&bioc->error);
|
||||
if (bio->bi_status == BLK_STS_IOERR ||
|
||||
bio->bi_status == BLK_STS_TARGET) {
|
||||
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
|
||||
btrfs_dev_stat_inc_and_print(stripe->dev,
|
||||
BTRFS_DEV_STAT_WRITE_ERRS);
|
||||
else if (!(bio->bi_opf & REQ_RAHEAD))
|
||||
btrfs_dev_stat_inc_and_print(stripe->dev,
|
||||
BTRFS_DEV_STAT_READ_ERRS);
|
||||
if (bio->bi_opf & REQ_PREFLUSH)
|
||||
btrfs_dev_stat_inc_and_print(stripe->dev,
|
||||
BTRFS_DEV_STAT_FLUSH_ERRS);
|
||||
}
|
||||
atomic_inc(&stripe->bioc->error);
|
||||
btrfs_log_dev_io_error(bio, stripe->dev);
|
||||
}
|
||||
|
||||
if (bio != bioc->orig_bio)
|
||||
bio_put(bio);
|
||||
|
||||
btrfs_bio_counter_dec(bioc->fs_info);
|
||||
if (atomic_dec_and_test(&bioc->stripes_pending))
|
||||
btrfs_end_bioc(bioc, true);
|
||||
/* Pass on control to the original bio this one was cloned from */
|
||||
bio_endio(stripe->bioc->orig_bio);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
static void submit_stripe_bio(struct btrfs_io_context *bioc,
|
||||
struct bio *orig_bio, int dev_nr, bool clone)
|
||||
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bioc->fs_info;
|
||||
struct btrfs_device *dev = bioc->stripes[dev_nr].dev;
|
||||
u64 physical = bioc->stripes[dev_nr].physical;
|
||||
struct bio *bio;
|
||||
|
||||
if (!dev || !dev->bdev ||
|
||||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
|
||||
(btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
|
||||
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
|
||||
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
|
||||
atomic_inc(&bioc->error);
|
||||
if (atomic_dec_and_test(&bioc->stripes_pending))
|
||||
btrfs_end_bioc(bioc, false);
|
||||
bio_io_error(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
if (clone) {
|
||||
bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
|
||||
} else {
|
||||
bio = orig_bio;
|
||||
bio_set_dev(bio, dev->bdev);
|
||||
btrfs_bio(bio)->device = dev;
|
||||
}
|
||||
bio_set_dev(bio, dev->bdev);
|
||||
|
||||
bioc->stripes[dev_nr].bioc = bioc;
|
||||
bio->bi_private = &bioc->stripes[dev_nr];
|
||||
bio->bi_end_io = btrfs_end_bio;
|
||||
bio->bi_iter.bi_sector = physical >> 9;
|
||||
/*
|
||||
* For zone append writing, bi_sector must point the beginning of the
|
||||
* zone
|
||||
*/
|
||||
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
|
||||
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
|
||||
|
||||
if (btrfs_dev_is_sequential(dev, physical)) {
|
||||
u64 zone_start = round_down(physical, fs_info->zone_size);
|
||||
u64 zone_start = round_down(physical,
|
||||
dev->fs_info->zone_size);
|
||||
|
||||
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
|
||||
} else {
|
||||
@ -6730,50 +6821,53 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
|
||||
bio->bi_opf |= REQ_OP_WRITE;
|
||||
}
|
||||
}
|
||||
btrfs_debug_in_rcu(fs_info,
|
||||
btrfs_debug_in_rcu(dev->fs_info,
|
||||
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
|
||||
__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
|
||||
(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
|
||||
dev->devid, bio->bi_iter.bi_size);
|
||||
|
||||
btrfs_bio_counter_inc_noblocked(fs_info);
|
||||
|
||||
btrfsic_check_bio(bio);
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
|
||||
{
|
||||
struct bio *orig_bio = bioc->orig_bio, *bio;
|
||||
|
||||
ASSERT(bio_op(orig_bio) != REQ_OP_READ);
|
||||
|
||||
/* Reuse the bio embedded into the btrfs_bio for the last mirror */
|
||||
if (dev_nr == bioc->num_stripes - 1) {
|
||||
bio = orig_bio;
|
||||
bio->bi_end_io = btrfs_orig_write_end_io;
|
||||
} else {
|
||||
bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
|
||||
bio_inc_remaining(orig_bio);
|
||||
bio->bi_end_io = btrfs_clone_write_end_io;
|
||||
}
|
||||
|
||||
bio->bi_private = &bioc->stripes[dev_nr];
|
||||
bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
|
||||
bioc->stripes[dev_nr].bioc = bioc;
|
||||
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
|
||||
}
|
||||
|
||||
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
|
||||
{
|
||||
u64 logical = bio->bi_iter.bi_sector << 9;
|
||||
u64 length = bio->bi_iter.bi_size;
|
||||
u64 map_length = length;
|
||||
int ret;
|
||||
int dev_nr;
|
||||
int total_devs;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
struct btrfs_io_stripe smap;
|
||||
int ret;
|
||||
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
|
||||
&map_length, &bioc, mirror_num, 1);
|
||||
ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
|
||||
&bioc, &smap, &mirror_num, 1);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio->bi_status = errno_to_blk_status(ret);
|
||||
bio_endio(bio);
|
||||
return;
|
||||
}
|
||||
|
||||
total_devs = bioc->num_stripes;
|
||||
bioc->orig_bio = bio;
|
||||
bioc->private = bio->bi_private;
|
||||
bioc->end_io = bio->bi_end_io;
|
||||
atomic_set(&bioc->stripes_pending, total_devs);
|
||||
|
||||
if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
|
||||
((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
|
||||
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
|
||||
raid56_parity_write(bio, bioc);
|
||||
else
|
||||
raid56_parity_recover(bio, bioc, mirror_num, true);
|
||||
btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -6784,12 +6878,31 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
|
||||
BUG();
|
||||
}
|
||||
|
||||
for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
|
||||
const bool should_clone = (dev_nr < total_devs - 1);
|
||||
if (!bioc) {
|
||||
/* Single mirror read/write fast path */
|
||||
btrfs_bio(bio)->mirror_num = mirror_num;
|
||||
btrfs_bio(bio)->device = smap.dev;
|
||||
bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
|
||||
bio->bi_private = fs_info;
|
||||
bio->bi_end_io = btrfs_simple_end_io;
|
||||
btrfs_submit_dev_bio(smap.dev, bio);
|
||||
} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
/* Parity RAID write or read recovery */
|
||||
bio->bi_private = bioc;
|
||||
bio->bi_end_io = btrfs_raid56_end_io;
|
||||
if (bio_op(bio) == REQ_OP_READ)
|
||||
raid56_parity_recover(bio, bioc, mirror_num);
|
||||
else
|
||||
raid56_parity_write(bio, bioc);
|
||||
} else {
|
||||
/* Write to multiple mirrors */
|
||||
int total_devs = bioc->num_stripes;
|
||||
int dev_nr;
|
||||
|
||||
submit_stripe_bio(bioc, bio, dev_nr, should_clone);
|
||||
bioc->orig_bio = bio;
|
||||
for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
|
||||
btrfs_submit_mirrored_bio(bioc, dev_nr);
|
||||
}
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
}
|
||||
|
||||
static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
|
||||
@ -8244,7 +8357,7 @@ static int relocating_repair_kthread(void *data)
|
||||
if (!cache)
|
||||
goto out;
|
||||
|
||||
if (!cache->relocating_repair)
|
||||
if (!test_bit(BLOCK_GROUP_FLAG_RELOCATING_REPAIR, &cache->runtime_flags))
|
||||
goto out;
|
||||
|
||||
ret = btrfs_may_alloc_data_chunk(fs_info, target);
|
||||
@ -8281,17 +8394,27 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
|
||||
if (!cache)
|
||||
return true;
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
if (cache->relocating_repair) {
|
||||
spin_unlock(&cache->lock);
|
||||
if (test_and_set_bit(BLOCK_GROUP_FLAG_RELOCATING_REPAIR, &cache->runtime_flags)) {
|
||||
btrfs_put_block_group(cache);
|
||||
return true;
|
||||
}
|
||||
cache->relocating_repair = 1;
|
||||
spin_unlock(&cache->lock);
|
||||
|
||||
kthread_run(relocating_repair_kthread, cache,
|
||||
"btrfs-relocating-repair");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int __init btrfs_bioset_init(void)
|
||||
{
|
||||
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
|
||||
offsetof(struct btrfs_bio, bio),
|
||||
BIOSET_NEED_BVECS))
|
||||
return -ENOMEM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void __cold btrfs_bioset_exit(void)
|
||||
{
|
||||
bioset_exit(&btrfs_bioset);
|
||||
}
|
||||
|
@ -180,6 +180,31 @@ struct btrfs_device {
|
||||
u64 scrub_speed_max;
|
||||
};
|
||||
|
||||
/*
|
||||
* Block group or device which contains an active swapfile. Used for preventing
|
||||
* unsafe operations while a swapfile is active.
|
||||
*
|
||||
* These are sorted on (ptr, inode) (note that a block group or device can
|
||||
* contain more than one swapfile). We compare the pointer values because we
|
||||
* don't actually care what the object is, we just need a quick check whether
|
||||
* the object exists in the rbtree.
|
||||
*/
|
||||
struct btrfs_swapfile_pin {
|
||||
struct rb_node node;
|
||||
void *ptr;
|
||||
struct inode *inode;
|
||||
/*
|
||||
* If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
|
||||
* points to a struct btrfs_device.
|
||||
*/
|
||||
bool is_block_group;
|
||||
/*
|
||||
* Only used when 'is_block_group' is true and it is the number of
|
||||
* extents used by a swapfile for this block group ('ptr' field).
|
||||
*/
|
||||
int bg_extent_count;
|
||||
};
|
||||
|
||||
/*
|
||||
* If we read those variants at the context of their own lock, we needn't
|
||||
* use the following helpers, reading them directly is safe.
|
||||
@ -361,6 +386,8 @@ struct btrfs_fs_devices {
|
||||
*/
|
||||
#define BTRFS_MAX_BIO_SECTORS (256)
|
||||
|
||||
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
|
||||
|
||||
/*
|
||||
* Additional info to pass along bio.
|
||||
*
|
||||
@ -378,6 +405,10 @@ struct btrfs_bio {
|
||||
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
|
||||
struct bvec_iter iter;
|
||||
|
||||
/* End I/O information supplied to btrfs_bio_alloc */
|
||||
btrfs_bio_end_io_t end_io;
|
||||
void *private;
|
||||
|
||||
/* For read end I/O handling */
|
||||
struct work_struct end_io_work;
|
||||
|
||||
@ -393,6 +424,20 @@ static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
|
||||
return container_of(bio, struct btrfs_bio, bio);
|
||||
}
|
||||
|
||||
int __init btrfs_bioset_init(void);
|
||||
void __cold btrfs_bioset_exit(void);
|
||||
|
||||
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
|
||||
btrfs_bio_end_io_t end_io, void *private);
|
||||
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
|
||||
btrfs_bio_end_io_t end_io, void *private);
|
||||
|
||||
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
|
||||
{
|
||||
bbio->bio.bi_status = status;
|
||||
bbio->end_io(bbio);
|
||||
}
|
||||
|
||||
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
|
||||
{
|
||||
if (bbio->csum != bbio->csum_inline) {
|
||||
@ -451,12 +496,9 @@ struct btrfs_discard_stripe {
|
||||
*/
|
||||
struct btrfs_io_context {
|
||||
refcount_t refs;
|
||||
atomic_t stripes_pending;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
u64 map_type; /* get from map_lookup->type */
|
||||
bio_end_io_t *end_io;
|
||||
struct bio *orig_bio;
|
||||
void *private;
|
||||
atomic_t error;
|
||||
int max_errors;
|
||||
int num_stripes;
|
||||
@ -714,4 +756,6 @@ const char *btrfs_bg_type_to_raid_name(u64 flags);
|
||||
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
|
||||
|
||||
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
|
||||
|
||||
#endif
|
||||
|
142
fs/btrfs/zoned.c
142
fs/btrfs/zoned.c
@ -652,80 +652,55 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_device *device;
|
||||
|
||||
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
|
||||
if (device->bdev &&
|
||||
bdev_zoned_model(device->bdev) == BLK_ZONED_HM) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: mode not enabled but zoned device found: %pg",
|
||||
device->bdev);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
struct btrfs_device *device;
|
||||
u64 zoned_devices = 0;
|
||||
u64 nr_devices = 0;
|
||||
u64 zone_size = 0;
|
||||
u64 max_zone_append_size = 0;
|
||||
const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
/* Count zoned devices */
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
enum blk_zoned_model model;
|
||||
/*
|
||||
* Host-Managed devices can't be used without the ZONED flag. With the
|
||||
* ZONED all devices can be used, using zone emulation if required.
|
||||
*/
|
||||
if (!btrfs_fs_incompat(fs_info, ZONED))
|
||||
return btrfs_check_for_zoned_device(fs_info);
|
||||
|
||||
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
|
||||
struct btrfs_zoned_device_info *zone_info = device->zone_info;
|
||||
|
||||
if (!device->bdev)
|
||||
continue;
|
||||
|
||||
model = bdev_zoned_model(device->bdev);
|
||||
/*
|
||||
* A Host-Managed zoned device must be used as a zoned device.
|
||||
* A Host-Aware zoned device and a non-zoned devices can be
|
||||
* treated as a zoned device, if ZONED flag is enabled in the
|
||||
* superblock.
|
||||
*/
|
||||
if (model == BLK_ZONED_HM ||
|
||||
(model == BLK_ZONED_HA && incompat_zoned) ||
|
||||
(model == BLK_ZONED_NONE && incompat_zoned)) {
|
||||
struct btrfs_zoned_device_info *zone_info;
|
||||
|
||||
zone_info = device->zone_info;
|
||||
zoned_devices++;
|
||||
if (!zone_size) {
|
||||
zone_size = zone_info->zone_size;
|
||||
} else if (zone_info->zone_size != zone_size) {
|
||||
btrfs_err(fs_info,
|
||||
if (!zone_size) {
|
||||
zone_size = zone_info->zone_size;
|
||||
} else if (zone_info->zone_size != zone_size) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: unequal block device zone sizes: have %llu found %llu",
|
||||
device->zone_info->zone_size,
|
||||
zone_size);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (!max_zone_append_size ||
|
||||
(zone_info->max_zone_append_size &&
|
||||
zone_info->max_zone_append_size < max_zone_append_size))
|
||||
max_zone_append_size =
|
||||
zone_info->max_zone_append_size;
|
||||
zone_info->zone_size, zone_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
nr_devices++;
|
||||
}
|
||||
|
||||
if (!zoned_devices && !incompat_zoned)
|
||||
goto out;
|
||||
|
||||
if (!zoned_devices && incompat_zoned) {
|
||||
/* No zoned block device found on ZONED filesystem */
|
||||
btrfs_err(fs_info,
|
||||
"zoned: no zoned devices found on a zoned filesystem");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (zoned_devices && !incompat_zoned) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: mode not enabled but zoned device found");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (zoned_devices != nr_devices) {
|
||||
btrfs_err(fs_info,
|
||||
"zoned: cannot mix zoned and regular devices");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
if (!max_zone_append_size ||
|
||||
(zone_info->max_zone_append_size &&
|
||||
zone_info->max_zone_append_size < max_zone_append_size))
|
||||
max_zone_append_size = zone_info->max_zone_append_size;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -737,14 +712,12 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||
btrfs_err(fs_info,
|
||||
"zoned: zone size %llu not aligned to stripe %u",
|
||||
zone_size, BTRFS_STRIPE_LEN);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
|
||||
btrfs_err(fs_info, "zoned: mixed block groups not supported");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
fs_info->zone_size = zone_size;
|
||||
@ -760,11 +733,10 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||
*/
|
||||
ret = btrfs_check_mountopts_zoned(fs_info);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size);
|
||||
out:
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
|
||||
@ -1436,7 +1408,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
goto out;
|
||||
} else if (map->num_stripes == num_conventional) {
|
||||
cache->alloc_offset = last_alloc;
|
||||
cache->zone_is_active = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
@ -1452,7 +1424,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
}
|
||||
cache->alloc_offset = alloc_offsets[0];
|
||||
cache->zone_capacity = caps[0];
|
||||
cache->zone_is_active = test_bit(0, active);
|
||||
if (test_bit(0, active))
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_DUP:
|
||||
if (map->type & BTRFS_BLOCK_GROUP_DATA) {
|
||||
@ -1486,7 +1459,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
cache->zone_is_active = test_bit(0, active);
|
||||
if (test_bit(0, active))
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
&cache->runtime_flags);
|
||||
}
|
||||
cache->alloc_offset = alloc_offsets[0];
|
||||
cache->zone_capacity = min(caps[0], caps[1]);
|
||||
@ -1530,7 +1505,7 @@ out:
|
||||
|
||||
if (!ret) {
|
||||
cache->meta_write_pointer = cache->alloc_offset + cache->start;
|
||||
if (cache->zone_is_active) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) {
|
||||
btrfs_get_block_group(cache);
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
list_add_tail(&cache->active_bg_list,
|
||||
@ -1563,7 +1538,6 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
|
||||
free = cache->zone_capacity - cache->alloc_offset;
|
||||
|
||||
/* We only need ->free_space in ALLOC_SEQ block groups */
|
||||
cache->last_byte_to_unpin = (u64)-1;
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
cache->free_space_ctl->free_space = free;
|
||||
cache->zone_unusable = unusable;
|
||||
@ -1871,7 +1845,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->zone_is_active) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
|
||||
ret = true;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -1897,7 +1871,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
}
|
||||
|
||||
/* Successfully activated all the zones */
|
||||
block_group->zone_is_active = 1;
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
|
||||
space_info->active_total_bytes += block_group->length;
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_try_granting_tickets(fs_info, space_info);
|
||||
@ -1960,7 +1934,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
int i;
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (!block_group->zone_is_active) {
|
||||
if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -2001,7 +1975,8 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
* Bail out if someone already deactivated the block group, or
|
||||
* allocated space is left in the block group.
|
||||
*/
|
||||
if (!block_group->zone_is_active) {
|
||||
if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
&block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return 0;
|
||||
@ -2014,7 +1989,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
}
|
||||
}
|
||||
|
||||
block_group->zone_is_active = 0;
|
||||
clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
|
||||
block_group->alloc_offset = block_group->zone_capacity;
|
||||
block_group->free_space_ctl->free_space = 0;
|
||||
btrfs_clear_treelog_bg(block_group);
|
||||
@ -2222,13 +2197,14 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
|
||||
ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (!block_group->zoned_data_reloc_ongoing)
|
||||
if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))
|
||||
goto out;
|
||||
|
||||
/* All relocation extents are written. */
|
||||
if (block_group->start + block_group->alloc_offset == logical + length) {
|
||||
/* Now, release this block group for further allocations. */
|
||||
block_group->zoned_data_reloc_ongoing = 0;
|
||||
clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
|
||||
&block_group->runtime_flags);
|
||||
}
|
||||
|
||||
out:
|
||||
@ -2300,7 +2276,9 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
|
||||
list) {
|
||||
if (!spin_trylock(&bg->lock))
|
||||
continue;
|
||||
if (btrfs_zoned_bg_is_full(bg) || bg->zone_is_active) {
|
||||
if (btrfs_zoned_bg_is_full(bg) ||
|
||||
test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
|
||||
&bg->runtime_flags)) {
|
||||
spin_unlock(&bg->lock);
|
||||
continue;
|
||||
}
|
||||
|
@ -70,8 +70,6 @@ struct fsverity_info {
|
||||
const struct inode *inode;
|
||||
};
|
||||
|
||||
/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
|
||||
#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
|
||||
|
||||
#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \
|
||||
sizeof(struct fsverity_descriptor))
|
||||
|
@ -22,6 +22,9 @@
|
||||
*/
|
||||
#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
|
||||
|
||||
/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
|
||||
#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
|
||||
|
||||
/* Verity operations for filesystems */
|
||||
struct fsverity_operations {
|
||||
|
||||
|
@ -84,7 +84,6 @@ struct raid56_bio_trace_info;
|
||||
EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \
|
||||
EM( IO_TREE_BTREE_INODE_IO, "BTREE_INODE_IO") \
|
||||
EM( IO_TREE_INODE_IO, "INODE_IO") \
|
||||
EM( IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE") \
|
||||
EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \
|
||||
EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \
|
||||
EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \
|
||||
@ -154,7 +153,6 @@ FLUSH_STATES
|
||||
{ EXTENT_NODATASUM, "NODATASUM"}, \
|
||||
{ EXTENT_CLEAR_META_RESV, "CLEAR_META_RESV"}, \
|
||||
{ EXTENT_NEED_WAIT, "NEED_WAIT"}, \
|
||||
{ EXTENT_DAMAGED, "DAMAGED"}, \
|
||||
{ EXTENT_NORESERVE, "NORESERVE"}, \
|
||||
{ EXTENT_QGROUP_RESERVED, "QGROUP_RESERVED"}, \
|
||||
{ EXTENT_CLEAR_DATA_RESV, "CLEAR_DATA_RESV"}, \
|
||||
|
@ -290,6 +290,12 @@ struct btrfs_ioctl_fs_info_args {
|
||||
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
|
||||
#define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2)
|
||||
|
||||
/*
|
||||
* Put all block group items into a dedicated block group tree, greatly
|
||||
* reducing mount time for large filesystem due to better locality.
|
||||
*/
|
||||
#define BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE (1ULL << 3)
|
||||
|
||||
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
|
||||
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
|
||||
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
|
||||
|
@ -965,6 +965,10 @@ static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
|
||||
*/
|
||||
#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
|
||||
|
||||
#define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \
|
||||
BTRFS_QGROUP_STATUS_FLAG_RESCAN | \
|
||||
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
|
||||
|
||||
#define BTRFS_QGROUP_STATUS_VERSION 1
|
||||
|
||||
struct btrfs_qgroup_status_item {
|
||||
|
@ -1933,6 +1933,7 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
|
||||
wb_put(wb);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(balance_dirty_pages_ratelimited_flags);
|
||||
|
||||
/**
|
||||
* balance_dirty_pages_ratelimited - balance dirty memory state.
|
||||
|
Loading…
Reference in New Issue
Block a user