for-6.1-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmM6zNkACgkQxWXV+ddt
 WDsNMg/+LTuwf6Js+mAl1AgtSpLOl2gLfNBJAUXhzwPbc3nF9bwONE/EUYEXTo5h
 kTf1cQRj0NCIZ7iHDwXuWNm77diNl+SChEDIoc7k0d6P7Qmmn2AWbTLM4dleyg5S
 6jxPpOMbegycQfL9tSJNaiT9zlZxj9Z+0yPibR99otrgtuv6zuvRxcdh34rEFIyf
 xoabO3/18lAKHzYzAZxNXMpbUSBmqLPVoZEOcfBAXvcuIJkzKRP6Y9gwlYs+kn+D
 J8BPa3LoSNxXrpCvWzlu7vO3gwNp7H7pQQqZKjjEcOZ+dj2UYQeTyJvl1vdzaNyk
 EoFYlkaKkYi7RaonuHjNaTeD/igJf8Eo6DTiXzACECssbKutlvNG4HXuFApsWy7M
 T7KZ5jTAQ98ZMYjgZ27UbEpFZd8lYHzV952Njjo9zbRVbqwaPEZTTdkjpz+3X6t4
 Z0A951ixOYKiOVdu3Uj1fHaBv0n/p0wrXIGt3ZIdjufM9TctV3oJwOZOiM2H0ccb
 XJVwsQG92+ja9XLZrw8H62PCKBYo3LL52r9b9NVodY9aTsQWTfiV5OP84RRlncCp
 hzPkHmO1YIyVcLoijagiO7cW21pQbKfqsRX/P1F7DXyjosHppmDS7IHDWA7Adf3W
 QA6eBnoWqVwBh7P+IyxJuRG0CrnxkPZeAZIhohDwk5Mt4NGATkA=
 =NlUz
 -----END PGP SIGNATURE-----

Merge tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "There's a bunch of performance improvements, most notably the FIEMAP
  speedup, the new block group tree to speed up mount on large
  filesystems, more io_uring integration, some sysfs exports and the
  usual fixes and core updates.

  Summary:

  Performance:

   - outstanding FIEMAP speed improvement
      - algorithmic change how extents are enumerated leads to orders of
        magnitude speed boost (uncached and cached)
      - extent sharing check speedup (2.2x uncached, 3x cached)
      - add more cancellation points, allowing to interrupt seeking in
        files with large number of extents
      - more efficient hole and data seeking (4x uncached, 1.3x cached)
      - sample results:
	    256M, 32K extents:   4s ->  29ms  (~150x)
	    512M, 64K extents:  30s ->  59ms  (~550x)
	    1G,  128K extents: 225s -> 120ms (~1800x)

   - improved inode logging, especially for directories (on dbench
     workload throughput +25%, max latency -21%)

   - improved buffered IO, remove redundant extent state tracking,
     lowering memory consumption and avoiding rb tree traversal

   - add sysfs tunable to let qgroup temporarily skip exact accounting
     when deleting snapshot, leading to a speedup but requiring a rescan
     after that, will be used by snapper

   - support io_uring and buffered writes, until now it was just for
     direct IO, with the no-wait semantics implemented in the buffered
     write path it now works and leads to speed improvement in IOPS
     (2x), throughput (2.2x), latency (depends, 2x to 150x)

   - small performance improvements when dropping and searching for
     extent maps as well as when flushing delalloc in COW mode
     (throughput +5MB/s)

  User visible changes:

   - new incompatible feature block-group-tree adding a dedicated tree
     for tracking block groups, this allows a much faster load during
     mount and avoids seeking unlike when it's scattered in the extent
     tree items
      - this reduces mount time for many-terabyte sized filesystems
      - conversion tool will be provided so existing filesystem can also
        be updated in place
      - to reduce test matrix and feature combinations requires no-holes
        and free-space-tree (mkfs defaults since 5.15)

   - improved reporting of super block corruption detected by scrub

   - scrub also tries to repair super block and does not wait until next
     commit

   - discard stats and tunables are exported in sysfs
     (/sys/fs/btrfs/FSID/discard)

   - qgroup status is exported in sysfs
     (/sys/sys/fs/btrfs/FSID/qgroups/)

   - verify that super block was not modified when thawing filesystem

  Fixes:

   - FIEMAP fixes
      - fix extent sharing status, does not depend on the cached status
        where merged
      - flush delalloc so compressed extents are reported correctly

   - fix alignment of VMA for memory mapped files on THP

   - send: fix failures when processing inodes with no links (orphan
     files and directories)

   - fix race between quota enable and quota rescan ioctl

   - handle more corner cases for read-only compat feature verification

   - fix missed extent on fsync after dropping extent maps

  Core:

   - lockdep annotations to validate various transactions states and
     state transitions

   - preliminary support for fs-verity in send

   - more effective memory use in scrub for subpage where sector is
     smaller than page

   - block group caching progress logic has been removed, load is now
     synchronous

   - simplify end IO callbacks and bio handling, use chained bios
     instead of own tracking

   - add no-wait semantics to several functions (tree search, nocow,
     flushing, buffered write

   - cleanups and refactoring

  MM changes:

   - export balance_dirty_pages_ratelimited_flags"

* tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (177 commits)
  btrfs: set generation before calling btrfs_clean_tree_block in btrfs_init_new_buffer
  btrfs: drop extent map range more efficiently
  btrfs: avoid pointless extent map tree search when flushing delalloc
  btrfs: remove unnecessary next extent map search
  btrfs: remove unnecessary NULL pointer checks when searching extent maps
  btrfs: assert tree is locked when clearing extent map from logging
  btrfs: remove unnecessary extent map initializations
  btrfs: remove the refcount warning/check at free_extent_map()
  btrfs: add helper to replace extent map range with a new extent map
  btrfs: move open coded extent map tree deletion out of inode eviction
  btrfs: use cond_resched_rwlock_write() during inode eviction
  btrfs: use extent_map_end() at btrfs_drop_extent_map_range()
  btrfs: move btrfs_drop_extent_cache() to extent_map.c
  btrfs: fix missed extent on fsync after dropping extent maps
  btrfs: remove stale prototype of btrfs_write_inode
  btrfs: enable nowait async buffered writes
  btrfs: assert nowait mode is not used for some btree search functions
  btrfs: make btrfs_buffered_write nowait compatible
  btrfs: plumb NOWAIT through the write path
  btrfs: make lock_and_cleanup_extent_if_need nowait compatible
  ...
This commit is contained in:
Linus Torvalds 2022-10-06 17:36:48 -07:00
commit 76e4503534
70 changed files with 7171 additions and 5201 deletions

View File

@ -31,7 +31,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
subpage.o tree-mod-log.o
subpage.o tree-mod-log.o extent-io-tree.o
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o

View File

@ -1511,16 +1511,118 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
return ret;
}
/**
* Check if an extent is shared or not
/*
* The caller has joined a transaction or is holding a read lock on the
* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
* snapshot field changing while updating or checking the cache.
*/
static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
struct btrfs_root *root,
u64 bytenr, int level, bool *is_shared)
{
struct btrfs_backref_shared_cache_entry *entry;
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return false;
/*
* Level -1 is used for the data extent, which is not reliable to cache
* because its reference count can increase or decrease without us
* realizing. We cache results only for extent buffers that lead from
* the root node down to the leaf with the file extent item.
*/
ASSERT(level >= 0);
entry = &cache->entries[level];
/* Unused cache entry or being used for some other extent buffer. */
if (entry->bytenr != bytenr)
return false;
/*
* We cached a false result, but the last snapshot generation of the
* root changed, so we now have a snapshot. Don't trust the result.
*/
if (!entry->is_shared &&
entry->gen != btrfs_root_last_snapshot(&root->root_item))
return false;
/*
* If we cached a true result and the last generation used for dropping
* a root changed, we can not trust the result, because the dropped root
* could be a snapshot sharing this extent buffer.
*/
if (entry->is_shared &&
entry->gen != btrfs_get_last_root_drop_gen(root->fs_info))
return false;
*is_shared = entry->is_shared;
return true;
}
/*
* The caller has joined a transaction or is holding a read lock on the
* fs_info->commit_root_sem semaphore, so no need to worry about the root's last
* snapshot field changing while updating or checking the cache.
*/
static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
struct btrfs_root *root,
u64 bytenr, int level, bool is_shared)
{
struct btrfs_backref_shared_cache_entry *entry;
u64 gen;
if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
return;
/*
* Level -1 is used for the data extent, which is not reliable to cache
* because its reference count can increase or decrease without us
* realizing. We cache results only for extent buffers that lead from
* the root node down to the leaf with the file extent item.
*/
ASSERT(level >= 0);
if (is_shared)
gen = btrfs_get_last_root_drop_gen(root->fs_info);
else
gen = btrfs_root_last_snapshot(&root->root_item);
entry = &cache->entries[level];
entry->bytenr = bytenr;
entry->is_shared = is_shared;
entry->gen = gen;
/*
* If we found an extent buffer is shared, set the cache result for all
* extent buffers below it to true. As nodes in the path are COWed,
* their sharedness is moved to their children, and if a leaf is COWed,
* then the sharedness of a data extent becomes direct, the refcount of
* data extent is increased in the extent item at the extent tree.
*/
if (is_shared) {
for (int i = 0; i < level; i++) {
entry = &cache->entries[i];
entry->is_shared = is_shared;
entry->gen = gen;
}
}
}
/*
* Check if a data extent is shared or not.
*
* @root: root inode belongs to
* @inum: inode number of the inode whose extent we are checking
* @bytenr: logical bytenr of the extent we are checking
* @roots: list of roots this extent is shared among
* @tmp: temporary list used for iteration
* @root: The root the inode belongs to.
* @inum: Number of the inode whose extent we are checking.
* @bytenr: Logical bytenr of the extent we are checking.
* @extent_gen: Generation of the extent (file extent item) or 0 if it is
* not known.
* @roots: List of roots this extent is shared among.
* @tmp: Temporary list used for iteration.
* @cache: A backref lookup result cache.
*
* btrfs_check_shared uses the backref walking code but will short
* btrfs_is_data_extent_shared uses the backref walking code but will short
* circuit as soon as it finds a root or inode that doesn't match the
* one passed in. This provides a significant performance benefit for
* callers (such as fiemap) which want to know whether the extent is
@ -1531,8 +1633,10 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
*
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
*/
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
struct ulist *roots, struct ulist *tmp)
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
u64 extent_gen,
struct ulist *roots, struct ulist *tmp,
struct btrfs_backref_shared_cache *cache)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
@ -1545,6 +1649,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
.inum = inum,
.share_count = 0,
};
int level;
ulist_init(roots);
ulist_init(tmp);
@ -1561,22 +1666,52 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
btrfs_get_tree_mod_seq(fs_info, &elem);
}
/* -1 means we are in the bytenr of the data extent. */
level = -1;
ULIST_ITER_INIT(&uiter);
while (1) {
bool is_shared;
bool cached;
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, &shared, false);
if (ret == BACKREF_FOUND_SHARED) {
/* this is the only condition under which we return 1 */
ret = 1;
if (level >= 0)
store_backref_shared_cache(cache, root, bytenr,
level, true);
break;
}
if (ret < 0 && ret != -ENOENT)
break;
ret = 0;
/*
* If our data extent is not shared through reflinks and it was
* created in a generation after the last one used to create a
* snapshot of the inode's root, then it can not be shared
* indirectly through subtrees, as that can only happen with
* snapshots. In this case bail out, no need to check for the
* sharedness of extent buffers.
*/
if (level == -1 &&
extent_gen > btrfs_root_last_snapshot(&root->root_item))
break;
if (level >= 0)
store_backref_shared_cache(cache, root, bytenr,
level, false);
node = ulist_next(tmp, &uiter);
if (!node)
break;
bytenr = node->val;
level++;
cached = lookup_backref_shared_cache(cache, root, bytenr, level,
&is_shared);
if (cached) {
ret = (is_shared ? 1 : 0);
break;
}
shared.share_count = 0;
cond_resched();
}

View File

@ -17,6 +17,20 @@ struct inode_fs_paths {
struct btrfs_data_container *fspath;
};
struct btrfs_backref_shared_cache_entry {
u64 bytenr;
u64 gen;
bool is_shared;
};
struct btrfs_backref_shared_cache {
/*
* A path from a root to a leaf that has a file extent item pointing to
* a given data extent should never exceed the maximum b+tree height.
*/
struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL];
};
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
void *ctx);
@ -62,8 +76,10 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
u64 start_off, struct btrfs_path *path,
struct btrfs_inode_extref **ret_extref,
u64 *found_off);
int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
struct ulist *roots, struct ulist *tmp_ulist);
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
u64 extent_gen,
struct ulist *roots, struct ulist *tmp,
struct btrfs_backref_shared_cache *cache);
int __init btrfs_prelim_ref_init(void);
void __cold btrfs_prelim_ref_exit(void);

View File

@ -593,8 +593,6 @@ next:
if (need_resched() ||
rwsem_is_contended(&fs_info->commit_root_sem)) {
if (wakeup)
caching_ctl->progress = last;
btrfs_release_path(path);
up_read(&fs_info->commit_root_sem);
mutex_unlock(&caching_ctl->mutex);
@ -618,9 +616,6 @@ next:
key.objectid = last;
key.offset = 0;
key.type = BTRFS_EXTENT_ITEM_KEY;
if (wakeup)
caching_ctl->progress = last;
btrfs_release_path(path);
goto next;
}
@ -655,7 +650,6 @@ next:
total_found += add_new_free_space(block_group, last,
block_group->start + block_group->length);
caching_ctl->progress = (u64)-1;
out:
btrfs_free_path(path);
@ -725,8 +719,6 @@ done:
}
#endif
caching_ctl->progress = (u64)-1;
up_read(&fs_info->commit_root_sem);
btrfs_free_excluded_extents(block_group);
mutex_unlock(&caching_ctl->mutex);
@ -755,7 +747,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
mutex_init(&caching_ctl->mutex);
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
caching_ctl->progress = cache->start;
refcount_set(&caching_ctl->count, 2);
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
@ -772,7 +763,6 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
WARN_ON(cache->caching_ctl);
cache->caching_ctl = caching_ctl;
cache->cached = BTRFS_CACHE_STARTED;
cache->has_caching_ctl = 1;
spin_unlock(&cache->lock);
write_lock(&fs_info->block_group_cache_lock);
@ -784,8 +774,10 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work);
out:
/* REVIEW */
if (wait && caching_ctl)
ret = btrfs_caching_ctl_wait_done(cache, caching_ctl);
/* wait_event(caching_ctl->wait, space_cache_v1_done(cache)); */
if (caching_ctl)
btrfs_put_caching_control(caching_ctl);
@ -988,32 +980,31 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
kobject_put(kobj);
}
if (block_group->has_caching_ctl)
caching_ctl = btrfs_get_caching_control(block_group);
if (block_group->cached == BTRFS_CACHE_STARTED)
btrfs_wait_block_group_cache_done(block_group);
if (block_group->has_caching_ctl) {
write_lock(&fs_info->block_group_cache_lock);
if (!caching_ctl) {
struct btrfs_caching_control *ctl;
list_for_each_entry(ctl,
&fs_info->caching_block_groups, list)
if (ctl->block_group == block_group) {
caching_ctl = ctl;
refcount_inc(&caching_ctl->count);
break;
}
}
if (caching_ctl)
list_del_init(&caching_ctl->list);
write_unlock(&fs_info->block_group_cache_lock);
if (caching_ctl) {
/* Once for the caching bgs list and once for us. */
btrfs_put_caching_control(caching_ctl);
btrfs_put_caching_control(caching_ctl);
write_lock(&fs_info->block_group_cache_lock);
caching_ctl = btrfs_get_caching_control(block_group);
if (!caching_ctl) {
struct btrfs_caching_control *ctl;
list_for_each_entry(ctl, &fs_info->caching_block_groups, list) {
if (ctl->block_group == block_group) {
caching_ctl = ctl;
refcount_inc(&caching_ctl->count);
break;
}
}
}
if (caching_ctl)
list_del_init(&caching_ctl->list);
write_unlock(&fs_info->block_group_cache_lock);
if (caching_ctl) {
/* Once for the caching bgs list and once for us. */
btrfs_put_caching_control(caching_ctl);
btrfs_put_caching_control(caching_ctl);
}
spin_lock(&trans->transaction->dirty_bgs_lock);
WARN_ON(!list_empty(&block_group->dirty_list));
@ -1034,12 +1025,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
< block_group->zone_unusable);
WARN_ON(block_group->space_info->disk_total
< block_group->length * factor);
WARN_ON(block_group->zone_is_active &&
WARN_ON(test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
&block_group->runtime_flags) &&
block_group->space_info->active_total_bytes
< block_group->length);
}
block_group->space_info->total_bytes -= block_group->length;
if (block_group->zone_is_active)
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
block_group->space_info->active_total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable);
@ -1069,7 +1061,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
goto out;
spin_lock(&block_group->lock);
block_group->removed = 1;
set_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags);
/*
* At this point trimming or scrub can't start on this block group,
* because we removed the block group from the rbtree
@ -1304,6 +1297,9 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
if (btrfs_fs_closing(fs_info))
return;
/*
* Long running balances can keep us blocked here for eternity, so
* simply skip deletion if we're unable to get the mutex.
@ -1543,6 +1539,9 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
if (btrfs_fs_closing(fs_info))
return;
if (!btrfs_should_reclaim(fs_info))
return;
@ -1890,16 +1889,6 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
return 0;
}
static void link_block_group(struct btrfs_block_group *cache)
{
struct btrfs_space_info *space_info = cache->space_info;
int index = btrfs_bg_flags_to_raid_index(cache->flags);
down_write(&space_info->groups_sem);
list_add_tail(&cache->list, &space_info->block_groups[index]);
up_write(&space_info->groups_sem);
}
static struct btrfs_block_group *btrfs_create_block_group_cache(
struct btrfs_fs_info *fs_info, u64 start)
{
@ -1937,7 +1926,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
btrfs_init_free_space_ctl(cache, cache->free_space_ctl);
atomic_set(&cache->frozen, 0);
mutex_init(&cache->free_space_lock);
btrfs_init_full_stripe_locks_tree(&cache->full_stripe_locks_root);
cache->full_stripe_locks_root.root = RB_ROOT;
mutex_init(&cache->full_stripe_locks_root.lock);
return cache;
}
@ -2002,7 +1992,6 @@ static int read_one_block_group(struct btrfs_fs_info *info,
int need_clear)
{
struct btrfs_block_group *cache;
struct btrfs_space_info *space_info;
const bool mixed = btrfs_fs_incompat(info, MIXED_GROUPS);
int ret;
@ -2078,11 +2067,9 @@ static int read_one_block_group(struct btrfs_fs_info *info,
/* Should not have any excluded extents. Just in case, though. */
btrfs_free_excluded_extents(cache);
} else if (cache->length == cache->used) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
btrfs_free_excluded_extents(cache);
} else if (cache->used == 0) {
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
add_new_free_space(cache, cache->start,
cache->start + cache->length);
@ -2095,14 +2082,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
goto error;
}
trace_btrfs_add_block_group(info, cache, 0);
btrfs_update_space_info(info, cache->flags, cache->length,
cache->used, cache->bytes_super,
cache->zone_unusable, cache->zone_is_active,
&space_info);
cache->space_info = space_info;
link_block_group(cache);
btrfs_add_bg_to_space_info(info, cache);
set_avail_alloc_bits(info, cache->flags);
if (btrfs_chunk_writeable(info, cache->start)) {
@ -2126,7 +2106,6 @@ error:
static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
{
struct extent_map_tree *em_tree = &fs_info->mapping_tree;
struct btrfs_space_info *space_info;
struct rb_node *node;
int ret = 0;
@ -2146,7 +2125,6 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
/* Fill dummy cache as FULL */
bg->length = em->len;
bg->flags = map->type;
bg->last_byte_to_unpin = (u64)-1;
bg->cached = BTRFS_CACHE_FINISHED;
bg->used = em->len;
bg->flags = map->type;
@ -2167,10 +2145,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
break;
}
btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, 0, false, &space_info);
bg->space_info = space_info;
link_block_group(bg);
btrfs_add_bg_to_space_info(fs_info, bg);
set_avail_alloc_bits(fs_info, bg->flags);
}
@ -2190,7 +2165,16 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
int need_clear = 0;
u64 cache_gen;
if (!root)
/*
* Either no extent root (with ibadroots rescue option) or we have
* unsupported RO options. The fs can never be mounted read-write, so no
* need to waste time searching block group items.
*
* This also allows new extent tree related changes to be RO compat,
* no need for a full incompat flag.
*/
if (!root || (btrfs_super_compat_ro_flags(info->super_copy) &
~BTRFS_FEATURE_COMPAT_RO_SUPP))
return fill_dummy_bgs(info);
key.objectid = 0;
@ -2425,7 +2409,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
ret = insert_block_group_item(trans, block_group);
if (ret)
btrfs_abort_transaction(trans, ret);
if (!block_group->chunk_item_inserted) {
if (!test_bit(BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
&block_group->runtime_flags)) {
mutex_lock(&fs_info->chunk_mutex);
ret = btrfs_chunk_alloc_add_chunk_item(trans, block_group);
mutex_unlock(&fs_info->chunk_mutex);
@ -2494,7 +2479,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
set_free_space_tree_thresholds(cache);
cache->used = bytes_used;
cache->flags = type;
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
@ -2519,14 +2503,6 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
btrfs_free_excluded_extents(cache);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(cache)) {
u64 new_bytes_used = size - bytes_used;
bytes_used += new_bytes_used >> 1;
fragment_free_space(cache);
}
#endif
/*
* Ensure the corresponding space_info object is created and
* assigned to our block group. We want our bg to be added to the rbtree
@ -2547,12 +2523,17 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
* the rbtree, update the space info's counters.
*/
trace_btrfs_add_block_group(fs_info, cache, 1);
btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
cache->bytes_super, cache->zone_unusable,
cache->zone_is_active, &cache->space_info);
btrfs_add_bg_to_space_info(fs_info, cache);
btrfs_update_global_block_rsv(fs_info);
link_block_group(cache);
#ifdef CONFIG_BTRFS_DEBUG
if (btrfs_should_fragment_free_space(cache)) {
u64 new_bytes_used = size - bytes_used;
cache->space_info->bytes_used += new_bytes_used >> 1;
fragment_free_space(cache);
}
#endif
list_add_tail(&cache->bg_list, &trans->new_bgs);
trans->delayed_ref_updates++;
@ -2869,7 +2850,7 @@ again:
cache_size *= fs_info->sectorsize;
ret = btrfs_check_data_free_space(BTRFS_I(inode), &data_reserved, 0,
cache_size);
cache_size, false);
if (ret)
goto out_put;
@ -3965,35 +3946,24 @@ void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
{
struct btrfs_block_group *block_group;
u64 last = 0;
while (1) {
struct inode *inode;
block_group = btrfs_lookup_first_block_group(info, 0);
while (block_group) {
btrfs_wait_block_group_cache_done(block_group);
spin_lock(&block_group->lock);
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF,
&block_group->runtime_flags)) {
struct inode *inode = block_group->inode;
block_group = btrfs_lookup_first_block_group(info, last);
while (block_group) {
btrfs_wait_block_group_cache_done(block_group);
spin_lock(&block_group->lock);
if (block_group->iref)
break;
block_group->inode = NULL;
spin_unlock(&block_group->lock);
block_group = btrfs_next_block_group(block_group);
}
if (!block_group) {
if (last == 0)
break;
last = 0;
continue;
}
inode = block_group->inode;
block_group->iref = 0;
block_group->inode = NULL;
spin_unlock(&block_group->lock);
ASSERT(block_group->io_ctl.inode == NULL);
iput(inode);
last = block_group->start + block_group->length;
btrfs_put_block_group(block_group);
ASSERT(block_group->io_ctl.inode == NULL);
iput(inode);
} else {
spin_unlock(&block_group->lock);
}
block_group = btrfs_next_block_group(block_group);
}
}
@ -4129,7 +4099,7 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
spin_lock(&block_group->lock);
cleanup = (atomic_dec_and_test(&block_group->frozen) &&
block_group->removed);
test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags));
spin_unlock(&block_group->lock);
if (cleanup) {
@ -4150,7 +4120,7 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
* tasks trimming this block group have left 1 entry each one.
* Free them if any.
*/
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
btrfs_remove_free_space_cache(block_group);
}
}

View File

@ -46,19 +46,44 @@ enum btrfs_chunk_alloc_enum {
CHUNK_ALLOC_FORCE_FOR_EXTENT,
};
/* Block group flags set at runtime */
enum btrfs_block_group_flags {
BLOCK_GROUP_FLAG_IREF,
BLOCK_GROUP_FLAG_REMOVED,
BLOCK_GROUP_FLAG_TO_COPY,
BLOCK_GROUP_FLAG_RELOCATING_REPAIR,
BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
};
enum btrfs_caching_type {
BTRFS_CACHE_NO,
BTRFS_CACHE_STARTED,
BTRFS_CACHE_FINISHED,
BTRFS_CACHE_ERROR,
};
struct btrfs_caching_control {
struct list_head list;
struct mutex mutex;
wait_queue_head_t wait;
struct btrfs_work work;
struct btrfs_block_group *block_group;
u64 progress;
refcount_t count;
};
/* Once caching_thread() finds this much free space, it will wake up waiters. */
#define CACHING_CTL_WAKE_UP SZ_2M
/*
* Tree to record all locked full stripes of a RAID5/6 block group
*/
struct btrfs_full_stripe_locks_tree {
struct rb_root root;
struct mutex lock;
};
struct btrfs_block_group {
struct btrfs_fs_info *fs_info;
struct inode *inode;
@ -95,23 +120,15 @@ struct btrfs_block_group {
/* For raid56, this is a full stripe, without parity */
unsigned long full_stripe_len;
unsigned long runtime_flags;
unsigned int ro;
unsigned int iref:1;
unsigned int has_caching_ctl:1;
unsigned int removed:1;
unsigned int to_copy:1;
unsigned int relocating_repair:1;
unsigned int chunk_item_inserted:1;
unsigned int zone_is_active:1;
unsigned int zoned_data_reloc_ongoing:1;
int disk_cache_state;
/* Cache tracking stuff */
int cached;
struct btrfs_caching_control *caching_ctl;
u64 last_byte_to_unpin;
struct btrfs_space_info *space_info;
@ -305,8 +322,6 @@ void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
int btrfs_free_block_groups(struct btrfs_fs_info *info);
void btrfs_wait_space_cache_v1_finished(struct btrfs_block_group *cache,
struct btrfs_caching_control *caching_ctl);
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
struct block_device *bdev, u64 physical, u64 **logical,
int *naddrs, int *stripe_len);

View File

@ -286,7 +286,7 @@ u64 btrfs_block_rsv_release(struct btrfs_fs_info *fs_info,
*/
if (block_rsv == delayed_rsv)
target = global_rsv;
else if (block_rsv != global_rsv && !delayed_rsv->full)
else if (block_rsv != global_rsv && !btrfs_block_rsv_full(delayed_rsv))
target = delayed_rsv;
if (target && block_rsv->space_info != target->space_info)
@ -424,6 +424,7 @@ void btrfs_init_root_block_rsv(struct btrfs_root *root)
case BTRFS_CSUM_TREE_OBJECTID:
case BTRFS_EXTENT_TREE_OBJECTID:
case BTRFS_FREE_SPACE_TREE_OBJECTID:
case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
root->block_rsv = &fs_info->delayed_refs_rsv;
break;
case BTRFS_ROOT_TREE_OBJECTID:

View File

@ -92,4 +92,13 @@ static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
btrfs_block_rsv_release(fs_info, block_rsv, 0, NULL);
}
/*
* Fast path to check if the reserve is full, may be carefully used outside of
* locks.
*/
static inline bool btrfs_block_rsv_full(const struct btrfs_block_rsv *rsv)
{
return data_race(rsv->full);
}
#endif /* BTRFS_BLOCK_RSV_H */

View File

@ -65,6 +65,8 @@ enum {
* on the same file.
*/
BTRFS_INODE_VERITY_IN_PROGRESS,
/* Set when this inode is a free space inode. */
BTRFS_INODE_FREE_SPACE_INODE,
};
/* in memory btrfs inode */
@ -94,7 +96,8 @@ struct btrfs_inode {
/* special utility tree used to record which mirrors have already been
* tried when checksums fail for a given block
*/
struct extent_io_tree io_failure_tree;
struct rb_root io_failure_tree;
spinlock_t io_failure_lock;
/*
* Keep track of where the inode has extent items mapped in order to
@ -250,11 +253,6 @@ struct btrfs_inode {
struct inode vfs_inode;
};
static inline u32 btrfs_inode_sectorsize(const struct btrfs_inode *inode)
{
return inode->root->fs_info->sectorsize;
}
static inline struct btrfs_inode *BTRFS_I(const struct inode *inode)
{
return container_of(inode, struct btrfs_inode, vfs_inode);
@ -272,13 +270,6 @@ static inline unsigned long btrfs_inode_hash(u64 objectid,
return (unsigned long)h;
}
static inline void btrfs_insert_inode_hash(struct inode *inode)
{
unsigned long h = btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root);
__insert_inode_hash(inode, h);
}
#if BITS_PER_LONG == 32
/*
@ -312,13 +303,7 @@ static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
{
struct btrfs_root *root = inode->root;
if (root == root->fs_info->tree_root &&
btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID)
return true;
return false;
return test_bit(BTRFS_INODE_FREE_SPACE_INODE, &inode->runtime_flags);
}
static inline bool is_data_inode(struct inode *inode)

View File

@ -152,9 +152,7 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
}
/* Do io completion on the original bio */
if (cb->status != BLK_STS_OK)
cb->orig_bio->bi_status = cb->status;
bio_endio(cb->orig_bio);
btrfs_bio_end_io(btrfs_bio(cb->orig_bio), cb->status);
/* Finally free the cb struct */
kfree(cb->compressed_pages);
@ -166,16 +164,15 @@ static void finish_compressed_bio_read(struct compressed_bio *cb)
* before decompressing it into the original bio and freeing the uncompressed
* pages.
*/
static void end_compressed_bio_read(struct bio *bio)
static void end_compressed_bio_read(struct btrfs_bio *bbio)
{
struct compressed_bio *cb = bio->bi_private;
struct compressed_bio *cb = bbio->private;
struct inode *inode = cb->inode;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_inode *bi = BTRFS_I(inode);
bool csum = !(bi->flags & BTRFS_INODE_NODATASUM) &&
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
blk_status_t status = bio->bi_status;
struct btrfs_bio *bbio = btrfs_bio(bio);
blk_status_t status = bbio->bio.bi_status;
struct bvec_iter iter;
struct bio_vec bv;
u32 offset;
@ -186,9 +183,8 @@ static void end_compressed_bio_read(struct bio *bio)
if (!status &&
(!csum || !btrfs_check_data_csum(inode, bbio, offset,
bv.bv_page, bv.bv_offset))) {
clean_io_failure(fs_info, &bi->io_failure_tree,
&bi->io_tree, start, bv.bv_page,
btrfs_ino(bi), bv.bv_offset);
btrfs_clean_io_failure(bi, start, bv.bv_page,
bv.bv_offset);
} else {
int ret;
@ -209,7 +205,7 @@ static void end_compressed_bio_read(struct bio *bio)
if (refcount_dec_and_test(&cb->pending_ios))
finish_compressed_bio_read(cb);
btrfs_bio_free_csum(bbio);
bio_put(bio);
bio_put(&bbio->bio);
}
/*
@ -301,20 +297,20 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
* This also calls the writeback end hooks for the file pages so that metadata
* and checksums can be updated in the file.
*/
static void end_compressed_bio_write(struct bio *bio)
static void end_compressed_bio_write(struct btrfs_bio *bbio)
{
struct compressed_bio *cb = bio->bi_private;
struct compressed_bio *cb = bbio->private;
if (bio->bi_status)
cb->status = bio->bi_status;
if (bbio->bio.bi_status)
cb->status = bbio->bio.bi_status;
if (refcount_dec_and_test(&cb->pending_ios)) {
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
btrfs_record_physical_zoned(cb->inode, cb->start, bio);
btrfs_record_physical_zoned(cb->inode, cb->start, &bbio->bio);
queue_work(fs_info->compressed_write_workers, &cb->write_end_work);
}
bio_put(bio);
bio_put(&bbio->bio);
}
/*
@ -335,7 +331,8 @@ static void end_compressed_bio_write(struct bio *bio)
static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_bytenr,
blk_opf_t opf, bio_end_io_t endio_func,
blk_opf_t opf,
btrfs_bio_end_io_t endio_func,
u64 *next_stripe_start)
{
struct btrfs_fs_info *fs_info = btrfs_sb(cb->inode->i_sb);
@ -344,12 +341,8 @@ static struct bio *alloc_compressed_bio(struct compressed_bio *cb, u64 disk_byte
struct bio *bio;
int ret;
bio = btrfs_bio_alloc(BIO_MAX_VECS);
bio = btrfs_bio_alloc(BIO_MAX_VECS, opf, endio_func, cb);
bio->bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
bio->bi_opf = opf;
bio->bi_private = cb;
bio->bi_end_io = endio_func;
em = btrfs_get_chunk_map(fs_info, disk_bytenr, fs_info->sectorsize);
if (IS_ERR(em)) {
@ -478,8 +471,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, true);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
btrfs_bio_end_io(btrfs_bio(bio), ret);
break;
}
}
@ -596,7 +588,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
}
page_end = (pg_index << PAGE_SHIFT) + PAGE_SIZE - 1;
lock_extent(tree, cur, page_end);
lock_extent(tree, cur, page_end, NULL);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, cur, page_end + 1 - cur);
read_unlock(&em_tree->lock);
@ -610,7 +602,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
(cur + fs_info->sectorsize > extent_map_end(em)) ||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, cur, page_end);
unlock_extent(tree, cur, page_end, NULL);
unlock_page(page);
put_page(page);
break;
@ -630,7 +622,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
add_size = min(em->start + em->len, page_end + 1) - cur;
ret = bio_add_page(cb->orig_bio, page, add_size, offset_in_page(cur));
if (ret != add_size) {
unlock_extent(tree, cur, page_end);
unlock_extent(tree, cur, page_end, NULL);
unlock_page(page);
put_page(page);
break;
@ -799,8 +791,7 @@ void btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_lookup_bio_sums(inode, comp_bio, NULL);
if (ret) {
comp_bio->bi_status = ret;
bio_endio(comp_bio);
btrfs_bio_end_io(btrfs_bio(comp_bio), ret);
break;
}
@ -826,8 +817,7 @@ fail:
kfree(cb);
out:
free_extent_map(em);
bio->bi_status = ret;
bio_endio(bio);
btrfs_bio_end_io(btrfs_bio(bio), ret);
return;
}

View File

@ -1447,6 +1447,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
return 0;
}
if (p->nowait) {
free_extent_buffer(tmp);
return -EAGAIN;
}
if (unlock_up)
btrfs_unlock_up_safe(p, level + 1);
@ -1467,6 +1472,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
ret = -EAGAIN;
goto out;
} else if (p->nowait) {
return -EAGAIN;
}
if (unlock_up) {
@ -1634,7 +1641,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
* We don't know the level of the root node until we actually
* have it read locked
*/
b = btrfs_read_lock_root_node(root);
if (p->nowait) {
b = btrfs_try_read_lock_root_node(root);
if (IS_ERR(b))
return b;
} else {
b = btrfs_read_lock_root_node(root);
}
level = btrfs_header_level(b);
if (level > write_lock_level)
goto out;
@ -1910,6 +1923,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
WARN_ON(p->nodes[0] != NULL);
BUG_ON(!cow && ins_len);
/*
* For now only allow nowait for read only operations. There's no
* strict reason why we can't, we just only need it for reads so it's
* only implemented for reads.
*/
ASSERT(!p->nowait || !cow);
if (ins_len < 0) {
lowest_unlock = 2;
@ -1936,7 +1956,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (p->need_commit_sem) {
ASSERT(p->search_commit_root);
down_read(&fs_info->commit_root_sem);
if (p->nowait) {
if (!down_read_trylock(&fs_info->commit_root_sem))
return -EAGAIN;
} else {
down_read(&fs_info->commit_root_sem);
}
}
again:
@ -2082,7 +2107,15 @@ cow_done:
btrfs_tree_lock(b);
p->locks[level] = BTRFS_WRITE_LOCK;
} else {
btrfs_tree_read_lock(b);
if (p->nowait) {
if (!btrfs_try_tree_read_lock(b)) {
free_extent_buffer(b);
ret = -EAGAIN;
goto done;
}
} else {
btrfs_tree_read_lock(b);
}
p->locks[level] = BTRFS_READ_LOCK;
}
p->nodes[level] = b;
@ -2131,6 +2164,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
ASSERT(!p->nowait);
if (p->search_commit_root) {
BUG_ON(time_seq);
@ -4432,6 +4466,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
int ret = 1;
int keep_locks = path->keep_locks;
ASSERT(!path->nowait);
path->keep_locks = 1;
again:
cur = btrfs_read_lock_root_node(root);
@ -4612,6 +4647,8 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
int ret;
int i;
ASSERT(!path->nowait);
nritems = btrfs_header_nritems(path->nodes[0]);
if (nritems == 0)
return 1;

View File

@ -42,7 +42,6 @@ struct btrfs_delayed_ref_root;
struct btrfs_space_info;
struct btrfs_block_group;
extern struct kmem_cache *btrfs_trans_handle_cachep;
extern struct kmem_cache *btrfs_bit_radix_cachep;
extern struct kmem_cache *btrfs_path_cachep;
extern struct kmem_cache *btrfs_free_space_cachep;
extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
@ -50,6 +49,11 @@ struct btrfs_ordered_sum;
struct btrfs_ref;
struct btrfs_bio;
struct btrfs_ioctl_encoded_io_args;
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
struct reloc_control;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
@ -280,14 +284,9 @@ struct btrfs_super_block {
/* the UUID written into btree blocks */
u8 metadata_uuid[BTRFS_FSID_SIZE];
/* Extent tree v2 */
__le64 block_group_root;
__le64 block_group_root_generation;
u8 block_group_root_level;
/* future expansion */
u8 reserved8[7];
__le64 reserved[25];
u8 reserved8[8];
__le64 reserved[27];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
@ -307,7 +306,8 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
#define BTRFS_FEATURE_COMPAT_RO_SUPP \
(BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \
BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \
BTRFS_FEATURE_COMPAT_RO_VERITY)
BTRFS_FEATURE_COMPAT_RO_VERITY | \
BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE)
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
@ -443,9 +443,10 @@ struct btrfs_path {
* header (ie. sizeof(struct btrfs_item) is not included).
*/
unsigned int search_for_extension:1;
/* Stop search if any locks need to be taken (for read) */
unsigned int nowait:1;
};
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
sizeof(struct btrfs_item))
struct btrfs_dev_replace {
u64 replace_state; /* see #define above */
time64_t time_started; /* seconds since 1-Jan-1970 */
@ -502,21 +503,6 @@ struct btrfs_free_cluster {
struct list_head block_group_list;
};
enum btrfs_caching_type {
BTRFS_CACHE_NO,
BTRFS_CACHE_STARTED,
BTRFS_CACHE_FINISHED,
BTRFS_CACHE_ERROR,
};
/*
* Tree to record all locked full stripes of a RAID5/6 block group
*/
struct btrfs_full_stripe_locks_tree {
struct rb_root root;
struct mutex lock;
};
/* Discard control. */
/*
* Async discard uses multiple lists to differentiate the discard filter
@ -548,42 +534,6 @@ struct btrfs_discard_ctl {
atomic64_t discard_bytes_saved;
};
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
/* fs_info */
struct reloc_control;
struct btrfs_device;
struct btrfs_fs_devices;
struct btrfs_balance_control;
struct btrfs_delayed_root;
/*
* Block group or device which contains an active swapfile. Used for preventing
* unsafe operations while a swapfile is active.
*
* These are sorted on (ptr, inode) (note that a block group or device can
* contain more than one swapfile). We compare the pointer values because we
* don't actually care what the object is, we just need a quick check whether
* the object exists in the rbtree.
*/
struct btrfs_swapfile_pin {
struct rb_node node;
void *ptr;
struct inode *inode;
/*
* If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
* points to a struct btrfs_device.
*/
bool is_block_group;
/*
* Only used when 'is_block_group' is true and it is the number of
* extents used by a swapfile for this block group ('ptr' field).
*/
int bg_extent_count;
};
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
enum {
BTRFS_FS_CLOSING_START,
BTRFS_FS_CLOSING_DONE,
@ -890,6 +840,7 @@ struct btrfs_fs_info {
struct kobject *space_info_kobj;
struct kobject *qgroups_kobj;
struct kobject *discard_kobj;
/* used to keep from writing metadata until there is a nice batch */
struct percpu_counter dirty_metadata_bytes;
@ -1005,6 +956,7 @@ struct btrfs_fs_info {
struct completion qgroup_rescan_completion;
struct btrfs_work qgroup_rescan_work;
bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
u8 qgroup_drop_subtree_thres;
/* filesystem state */
unsigned long fs_state;
@ -1092,6 +1044,23 @@ struct btrfs_fs_info {
/* Updates are not protected by any lock */
struct btrfs_commit_stats commit_stats;
/*
* Last generation where we dropped a non-relocation root.
* Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
* to change it and to read it, respectively.
*/
u64 last_root_drop_gen;
/*
* Annotations for transaction events (structures are empty when
* compiled without lockdep).
*/
struct lockdep_map btrfs_trans_num_writers_map;
struct lockdep_map btrfs_trans_num_extwriters_map;
struct lockdep_map btrfs_state_change_map[4];
struct lockdep_map btrfs_trans_pending_ordered_map;
struct lockdep_map btrfs_ordered_extent_map;
#ifdef CONFIG_BTRFS_FS_REF_VERIFY
spinlock_t ref_verify_lock;
struct rb_root block_tree;
@ -1099,7 +1068,6 @@ struct btrfs_fs_info {
#ifdef CONFIG_BTRFS_DEBUG
struct kobject *debug_kobj;
struct kobject *discard_debug_kobj;
struct list_head allocated_roots;
spinlock_t eb_leak_lock;
@ -1107,11 +1075,84 @@ struct btrfs_fs_info {
#endif
};
static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info,
u64 gen)
{
WRITE_ONCE(fs_info->last_root_drop_gen, gen);
}
static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info)
{
return READ_ONCE(fs_info->last_root_drop_gen);
}
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
}
/*
* Take the number of bytes to be checksummed and figure out how many leaves
* it would require to store the csums for that many bytes.
*/
static inline u64 btrfs_csum_bytes_to_leaves(
const struct btrfs_fs_info *fs_info, u64 csum_bytes)
{
const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
}
/*
* Use this if we would be adding new items, as we could split nodes as we cow
* down the tree.
*/
static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
* Doing a truncate or a modification won't result in new nodes or leaves, just
* what we need for COW.
*/
static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
sizeof(struct btrfs_item))
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
return fs_info->zone_size > 0;
}
/*
* Count how many fs_info->max_extent_size cover the @size
*/
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (!fs_info)
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
#endif
return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
}
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type);
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type);
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation op);
/*
* The state of btrfs root
*/
@ -1174,6 +1215,82 @@ enum {
BTRFS_ROOT_RESET_LOCKDEP_CLASS,
};
enum btrfs_lockdep_trans_states {
BTRFS_LOCKDEP_TRANS_COMMIT_START,
BTRFS_LOCKDEP_TRANS_UNBLOCKED,
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
BTRFS_LOCKDEP_TRANS_COMPLETED,
};
/*
* Lockdep annotation for wait events.
*
* @owner: The struct where the lockdep map is defined
* @lock: The lockdep map corresponding to a wait event
*
* This macro is used to annotate a wait event. In this case a thread acquires
* the lockdep map as writer (exclusive lock) because it has to block until all
* the threads that hold the lock as readers signal the condition for the wait
* event and release their locks.
*/
#define btrfs_might_wait_for_event(owner, lock) \
do { \
rwsem_acquire(&owner->lock##_map, 0, 0, _THIS_IP_); \
rwsem_release(&owner->lock##_map, _THIS_IP_); \
} while (0)
/*
* Protection for the resource/condition of a wait event.
*
* @owner: The struct where the lockdep map is defined
* @lock: The lockdep map corresponding to a wait event
*
* Many threads can modify the condition for the wait event at the same time
* and signal the threads that block on the wait event. The threads that modify
* the condition and do the signaling acquire the lock as readers (shared
* lock).
*/
#define btrfs_lockdep_acquire(owner, lock) \
rwsem_acquire_read(&owner->lock##_map, 0, 0, _THIS_IP_)
/*
* Used after signaling the condition for a wait event to release the lockdep
* map held by a reader thread.
*/
#define btrfs_lockdep_release(owner, lock) \
rwsem_release(&owner->lock##_map, _THIS_IP_)
/*
* Macros for the transaction states wait events, similar to the generic wait
* event macros.
*/
#define btrfs_might_wait_for_state(owner, i) \
do { \
rwsem_acquire(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_); \
rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_); \
} while (0)
#define btrfs_trans_state_lockdep_acquire(owner, i) \
rwsem_acquire_read(&owner->btrfs_state_change_map[i], 0, 0, _THIS_IP_)
#define btrfs_trans_state_lockdep_release(owner, i) \
rwsem_release(&owner->btrfs_state_change_map[i], _THIS_IP_)
/* Initialization of the lockdep map */
#define btrfs_lockdep_init_map(owner, lock) \
do { \
static struct lock_class_key lock##_key; \
lockdep_init_map(&owner->lock##_map, #lock, &lock##_key, 0); \
} while (0)
/* Initialization of the transaction states lockdep maps. */
#define btrfs_state_lockdep_init_map(owner, lock, state) \
do { \
static struct lock_class_key lock##_key; \
lockdep_init_map(&owner->btrfs_state_change_map[state], #lock, \
&lock##_key, 0); \
} while (0)
static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
{
clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
@ -2391,17 +2508,6 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
num_devices, 64);
/*
* For extent tree v2 we overload the extent root with the block group root, as
* we will have multiple extent roots.
*/
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
extent_root, 64);
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
extent_root_gen, 64);
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
struct btrfs_root_backup, extent_root_level, 8);
/* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
@ -2534,13 +2640,6 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
uuid_tree_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
block_group_root, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
struct btrfs_super_block,
block_group_root_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
block_group_root_level, 8);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
@ -2761,45 +2860,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
enum btrfs_inline_ref_type is_data);
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
static inline u8 *btrfs_csum_ptr(const struct btrfs_fs_info *fs_info, u8 *csums,
u64 offset)
{
u64 offset_in_sectors = offset >> fs_info->sectorsize_bits;
return csums + offset_in_sectors * fs_info->csum_size;
}
/*
* Take the number of bytes to be checksummed and figure out how many leaves
* it would require to store the csums for that many bytes.
*/
static inline u64 btrfs_csum_bytes_to_leaves(
const struct btrfs_fs_info *fs_info, u64 csum_bytes)
{
const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits;
return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf);
}
/*
* Use this if we would be adding new items, as we could split nodes as we cow
* down the tree.
*/
static inline u64 btrfs_calc_insert_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
* Doing a truncate or a modification won't result in new nodes or leaves, just
* what we need for COW.
*/
static inline u64 btrfs_calc_metadata_size(struct btrfs_fs_info *fs_info,
unsigned num_items)
{
return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items;
}
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 num_bytes);
@ -3257,12 +3317,9 @@ int btrfs_find_orphan_item(struct btrfs_root *root, u64 offset);
int btrfs_del_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 bytenr, u64 len);
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst);
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
u64 disk_offset, u64 disk_num_bytes,
u64 num_bytes, u64 offset, u64 ram_bytes,
u8 compression, u8 encryption, u16 other_encoding);
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 objectid, u64 pos,
u64 num_bytes);
int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 objectid,
@ -3273,7 +3330,8 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 offset, bool one_ordered);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit);
struct list_head *list, int search_commit,
bool nowait);
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
@ -3299,11 +3357,9 @@ unsigned int btrfs_verify_data_csum(struct btrfs_bio *bbio,
u64 start, u64 end);
int btrfs_check_data_csum(struct inode *inode, struct btrfs_bio *bbio,
u32 bio_offset, struct page *page, u32 pgoff);
struct extent_map *btrfs_get_extent_fiemap(struct btrfs_inode *inode,
u64 start, u64 len);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
u64 *orig_start, u64 *orig_block_len,
u64 *ram_bytes, bool strict);
u64 *ram_bytes, bool nowait, bool strict);
void __btrfs_del_delalloc_inode(struct btrfs_root *root,
struct btrfs_inode *inode);
@ -3358,7 +3414,6 @@ void btrfs_split_delalloc_extent(struct inode *inode,
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf);
void btrfs_evict_inode(struct inode *inode);
int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc);
struct inode *btrfs_alloc_inode(struct super_block *sb);
void btrfs_destroy_inode(struct inode *inode);
void btrfs_free_inode(struct inode *inode);
@ -3439,15 +3494,6 @@ void btrfs_get_block_group_info(struct list_head *groups_list,
struct btrfs_ioctl_space_info *space);
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_balance_args *bargs);
bool btrfs_exclop_start(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type);
bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation type);
void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info);
void btrfs_exclop_finish(struct btrfs_fs_info *fs_info);
void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
enum btrfs_exclusive_operation op);
/* file.c */
int __init btrfs_auto_defrag_init(void);
@ -3457,8 +3503,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void btrfs_drop_extent_cache(struct btrfs_inode *inode, u64 start, u64 end,
int skip_pinned);
extern const struct file_operations btrfs_file_operations;
int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_inode *inode,
@ -3478,8 +3522,10 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
struct extent_state **cached, bool noreserve);
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
size_t *write_bytes);
size_t *write_bytes, bool nowait);
void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
u64 *delalloc_start_ret, u64 *delalloc_end_ret);
/* tree-defrag.c */
int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
@ -3745,7 +3791,7 @@ const char * __attribute_const__ btrfs_decode_error(int errno);
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
const char *function,
unsigned int line, int errno);
unsigned int line, int errno, bool first_hit);
/*
* Call btrfs_abort_transaction as early as possible when an error condition is
@ -3753,9 +3799,11 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
*/
#define btrfs_abort_transaction(trans, errno) \
do { \
bool first = false; \
/* Report first abort since mount */ \
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
&((trans)->fs_info->fs_state))) { \
first = true; \
if ((errno) != -EIO && (errno) != -EROFS) { \
WARN(1, KERN_DEBUG \
"BTRFS: Transaction aborted (error %d)\n", \
@ -3767,7 +3815,7 @@ do { \
} \
} \
__btrfs_abort_transaction((trans), __func__, \
__LINE__, (errno)); \
__LINE__, (errno), first); \
} while (0)
#ifdef CONFIG_PRINTK_INDEX
@ -3984,16 +4032,9 @@ int btrfs_scrub_cancel(struct btrfs_fs_info *info);
int btrfs_scrub_cancel_dev(struct btrfs_device *dev);
int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid,
struct btrfs_scrub_progress *progress);
static inline void btrfs_init_full_stripe_locks_tree(
struct btrfs_full_stripe_locks_tree *locks_root)
{
locks_root->root = RB_ROOT;
mutex_init(&locks_root->lock);
}
/* dev-replace.c */
void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info);
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info);
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount);
static inline void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
@ -4020,6 +4061,7 @@ static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
extern const struct fsverity_operations btrfs_verityops;
int btrfs_drop_verity_items(struct btrfs_inode *inode);
int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size);
BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item,
encryption, 8);
@ -4037,6 +4079,12 @@ static inline int btrfs_drop_verity_items(struct btrfs_inode *inode)
return 0;
}
static inline int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
size_t buf_size)
{
return -EPERM;
}
#endif
/* Sanity test specific functions */
@ -4053,24 +4101,6 @@ static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
}
#endif
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
return fs_info->zone_size > 0;
}
/*
* Count how many fs_info->max_extent_size cover the @size
*/
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
{
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
if (!fs_info)
return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
#endif
return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
}
static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
{
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;

View File

@ -127,9 +127,11 @@ int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
}
int btrfs_check_data_free_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len)
struct extent_changeset **reserved, u64 start,
u64 len, bool noflush)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_DATA;
int ret;
/* align the range */
@ -137,7 +139,12 @@ int btrfs_check_data_free_space(struct btrfs_inode *inode,
round_down(start, fs_info->sectorsize);
start = round_down(start, fs_info->sectorsize);
ret = btrfs_alloc_data_chunk_ondemand(inode, len);
if (noflush)
flush = BTRFS_RESERVE_NO_FLUSH;
else if (btrfs_is_free_space_inode(inode))
flush = BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE;
ret = btrfs_reserve_data_bytes(fs_info, len, flush);
if (ret < 0)
return ret;
@ -454,7 +461,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
{
int ret;
ret = btrfs_check_data_free_space(inode, reserved, start, len);
ret = btrfs_check_data_free_space(inode, reserved, start, len, false);
if (ret < 0)
return ret;
ret = btrfs_delalloc_reserve_metadata(inode, len, len, false);

View File

@ -7,7 +7,8 @@ struct extent_changeset;
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
int btrfs_check_data_free_space(struct btrfs_inode *inode,
struct extent_changeset **reserved, u64 start, u64 len);
struct extent_changeset **reserved, u64 start, u64 len,
bool noflush);
void btrfs_free_reserved_data_space(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start, u64 len);
void btrfs_delalloc_release_space(struct btrfs_inode *inode,

View File

@ -302,15 +302,21 @@ static inline void btrfs_release_prepared_delayed_node(
__btrfs_release_delayed_node(node, 1);
}
static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u16 data_len,
struct btrfs_delayed_node *node,
enum btrfs_delayed_item_type type)
{
struct btrfs_delayed_item *item;
item = kmalloc(sizeof(*item) + data_len, GFP_NOFS);
if (item) {
item->data_len = data_len;
item->ins_or_del = 0;
item->type = type;
item->bytes_reserved = 0;
item->delayed_node = NULL;
item->delayed_node = node;
RB_CLEAR_NODE(&item->rb_node);
INIT_LIST_HEAD(&item->log_list);
item->logged = false;
refcount_set(&item->refs, 1);
}
return item;
@ -319,72 +325,32 @@ static struct btrfs_delayed_item *btrfs_alloc_delayed_item(u32 data_len)
/*
* __btrfs_lookup_delayed_item - look up the delayed item by key
* @delayed_node: pointer to the delayed node
* @key: the key to look up
* @prev: used to store the prev item if the right item isn't found
* @next: used to store the next item if the right item isn't found
* @index: the dir index value to lookup (offset of a dir index key)
*
* Note: if we don't find the right item, we will return the prev item and
* the next item.
*/
static struct btrfs_delayed_item *__btrfs_lookup_delayed_item(
struct rb_root *root,
struct btrfs_key *key,
struct btrfs_delayed_item **prev,
struct btrfs_delayed_item **next)
u64 index)
{
struct rb_node *node, *prev_node = NULL;
struct rb_node *node = root->rb_node;
struct btrfs_delayed_item *delayed_item = NULL;
int ret = 0;
node = root->rb_node;
while (node) {
delayed_item = rb_entry(node, struct btrfs_delayed_item,
rb_node);
prev_node = node;
ret = btrfs_comp_cpu_keys(&delayed_item->key, key);
if (ret < 0)
if (delayed_item->index < index)
node = node->rb_right;
else if (ret > 0)
else if (delayed_item->index > index)
node = node->rb_left;
else
return delayed_item;
}
if (prev) {
if (!prev_node)
*prev = NULL;
else if (ret < 0)
*prev = delayed_item;
else if ((node = rb_prev(prev_node)) != NULL) {
*prev = rb_entry(node, struct btrfs_delayed_item,
rb_node);
} else
*prev = NULL;
}
if (next) {
if (!prev_node)
*next = NULL;
else if (ret > 0)
*next = delayed_item;
else if ((node = rb_next(prev_node)) != NULL) {
*next = rb_entry(node, struct btrfs_delayed_item,
rb_node);
} else
*next = NULL;
}
return NULL;
}
static struct btrfs_delayed_item *__btrfs_lookup_delayed_insertion_item(
struct btrfs_delayed_node *delayed_node,
struct btrfs_key *key)
{
return __btrfs_lookup_delayed_item(&delayed_node->ins_root.rb_root, key,
NULL, NULL);
}
static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
struct btrfs_delayed_item *ins)
{
@ -392,15 +358,13 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
struct rb_node *parent_node = NULL;
struct rb_root_cached *root;
struct btrfs_delayed_item *item;
int cmp;
bool leftmost = true;
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_node->ins_root;
else if (ins->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
root = &delayed_node->del_root;
else
BUG();
root = &delayed_node->del_root;
p = &root->rb_root.rb_node;
node = &ins->rb_node;
@ -409,11 +373,10 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
item = rb_entry(parent_node, struct btrfs_delayed_item,
rb_node);
cmp = btrfs_comp_cpu_keys(&item->key, &ins->key);
if (cmp < 0) {
if (item->index < ins->index) {
p = &(*p)->rb_right;
leftmost = false;
} else if (cmp > 0) {
} else if (item->index > ins->index) {
p = &(*p)->rb_left;
} else {
return -EEXIST;
@ -422,14 +385,10 @@ static int __btrfs_add_delayed_item(struct btrfs_delayed_node *delayed_node,
rb_link_node(node, parent_node, p);
rb_insert_color_cached(node, root, leftmost);
ins->delayed_node = delayed_node;
/* Delayed items are always for dir index items. */
ASSERT(ins->key.type == BTRFS_DIR_INDEX_KEY);
if (ins->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM &&
ins->key.offset >= delayed_node->index_cnt)
delayed_node->index_cnt = ins->key.offset + 1;
if (ins->type == BTRFS_DELAYED_INSERTION_ITEM &&
ins->index >= delayed_node->index_cnt)
delayed_node->index_cnt = ins->index + 1;
delayed_node->count++;
atomic_inc(&delayed_node->root->fs_info->delayed_root->items);
@ -451,21 +410,21 @@ static void __btrfs_remove_delayed_item(struct btrfs_delayed_item *delayed_item)
struct rb_root_cached *root;
struct btrfs_delayed_root *delayed_root;
/* Not associated with any delayed_node */
if (!delayed_item->delayed_node)
/* Not inserted, ignore it. */
if (RB_EMPTY_NODE(&delayed_item->rb_node))
return;
delayed_root = delayed_item->delayed_node->root->fs_info->delayed_root;
BUG_ON(!delayed_root);
BUG_ON(delayed_item->ins_or_del != BTRFS_DELAYED_DELETION_ITEM &&
delayed_item->ins_or_del != BTRFS_DELAYED_INSERTION_ITEM);
if (delayed_item->ins_or_del == BTRFS_DELAYED_INSERTION_ITEM)
if (delayed_item->type == BTRFS_DELAYED_INSERTION_ITEM)
root = &delayed_item->delayed_node->ins_root;
else
root = &delayed_item->delayed_node->del_root;
rb_erase_cached(&delayed_item->rb_node, root);
RB_CLEAR_NODE(&delayed_item->rb_node);
delayed_item->delayed_node->count--;
finish_one_item(delayed_root);
@ -520,12 +479,11 @@ static struct btrfs_delayed_item *__btrfs_next_delayed_item(
}
static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_delayed_item *item)
{
struct btrfs_block_rsv *src_rsv;
struct btrfs_block_rsv *dst_rsv;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_fs_info *fs_info = trans->fs_info;
u64 num_bytes;
int ret;
@ -545,14 +503,14 @@ static int btrfs_delayed_item_reserve_metadata(struct btrfs_trans_handle *trans,
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, true);
if (!ret) {
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid,
item->delayed_node->inode_id,
num_bytes, 1);
/*
* For insertions we track reserved metadata space by accounting
* for the number of leaves that will be used, based on the delayed
* node's index_items_size field.
*/
if (item->ins_or_del == BTRFS_DELAYED_DELETION_ITEM)
if (item->type == BTRFS_DELAYED_DELETION_ITEM)
item->bytes_reserved = num_bytes;
}
@ -574,8 +532,8 @@ static void btrfs_delayed_item_release_metadata(struct btrfs_root *root,
* to release/reserve qgroup space.
*/
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, item->bytes_reserved,
0);
item->delayed_node->inode_id,
item->bytes_reserved, 0);
btrfs_block_rsv_release(fs_info, rsv, item->bytes_reserved, NULL);
}
@ -688,6 +646,8 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
struct btrfs_delayed_item *next;
const int max_size = BTRFS_LEAF_DATA_SIZE(fs_info);
struct btrfs_item_batch batch;
struct btrfs_key first_key;
const u32 first_data_size = first_item->data_len;
int total_size;
char *ins_data = NULL;
int ret;
@ -716,9 +676,9 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
ASSERT(first_item->bytes_reserved == 0);
list_add_tail(&first_item->tree_list, &item_list);
batch.total_data_size = first_item->data_len;
batch.total_data_size = first_data_size;
batch.nr = 1;
total_size = first_item->data_len + sizeof(struct btrfs_item);
total_size = first_data_size + sizeof(struct btrfs_item);
curr = first_item;
while (true) {
@ -732,8 +692,7 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
* We cannot allow gaps in the key space if we're doing log
* replay.
*/
if (continuous_keys_only &&
(next->key.offset != curr->key.offset + 1))
if (continuous_keys_only && (next->index != curr->index + 1))
break;
ASSERT(next->bytes_reserved == 0);
@ -750,8 +709,11 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
}
if (batch.nr == 1) {
batch.keys = &first_item->key;
batch.data_sizes = &first_item->data_len;
first_key.objectid = node->inode_id;
first_key.type = BTRFS_DIR_INDEX_KEY;
first_key.offset = first_item->index;
batch.keys = &first_key;
batch.data_sizes = &first_data_size;
} else {
struct btrfs_key *ins_keys;
u32 *ins_sizes;
@ -768,7 +730,9 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
batch.keys = ins_keys;
batch.data_sizes = ins_sizes;
list_for_each_entry(curr, &item_list, tree_list) {
ins_keys[i] = curr->key;
ins_keys[i].objectid = node->inode_id;
ins_keys[i].type = BTRFS_DIR_INDEX_KEY;
ins_keys[i].offset = curr->index;
ins_sizes[i] = curr->data_len;
i++;
}
@ -864,6 +828,7 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_delayed_item *item)
{
const u64 ino = item->delayed_node->inode_id;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_delayed_item *curr, *next;
struct extent_buffer *leaf = path->nodes[0];
@ -902,7 +867,9 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
slot++;
btrfs_item_key_to_cpu(leaf, &key, slot);
if (btrfs_comp_cpu_keys(&next->key, &key) != 0)
if (key.objectid != ino ||
key.type != BTRFS_DIR_INDEX_KEY ||
key.offset != next->index)
break;
nitems++;
curr = next;
@ -920,9 +887,8 @@ static int btrfs_batch_delete_items(struct btrfs_trans_handle *trans,
* Check btrfs_delayed_item_reserve_metadata() to see why we
* don't need to release/reserve qgroup space.
*/
trace_btrfs_space_reservation(fs_info, "delayed_item",
item->key.objectid, total_reserved_size,
0);
trace_btrfs_space_reservation(fs_info, "delayed_item", ino,
total_reserved_size, 0);
btrfs_block_rsv_release(fs_info, &fs_info->delayed_block_rsv,
total_reserved_size, NULL);
}
@ -940,8 +906,12 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_delayed_node *node)
{
struct btrfs_key key;
int ret = 0;
key.objectid = node->inode_id;
key.type = BTRFS_DIR_INDEX_KEY;
while (ret == 0) {
struct btrfs_delayed_item *item;
@ -952,7 +922,8 @@ static int btrfs_delete_delayed_items(struct btrfs_trans_handle *trans,
break;
}
ret = btrfs_search_slot(trans, root, &item->key, path, -1, 1);
key.offset = item->index;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0) {
/*
* There's no matching item in the leaf. This means we
@ -1457,16 +1428,15 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len);
delayed_item = btrfs_alloc_delayed_item(sizeof(*dir_item) + name_len,
delayed_node,
BTRFS_DELAYED_INSERTION_ITEM);
if (!delayed_item) {
ret = -ENOMEM;
goto release_node;
}
delayed_item->key.objectid = btrfs_ino(dir);
delayed_item->key.type = BTRFS_DIR_INDEX_KEY;
delayed_item->key.offset = index;
delayed_item->ins_or_del = BTRFS_DELAYED_INSERTION_ITEM;
delayed_item->index = index;
dir_item = (struct btrfs_dir_item *)delayed_item->data;
dir_item->location = *disk_key;
@ -1490,8 +1460,7 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
}
if (reserve_leaf_space) {
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root,
delayed_item);
ret = btrfs_delayed_item_reserve_metadata(trans, delayed_item);
/*
* Space was reserved for a dir index item insertion when we
* started the transaction, so getting a failure here should be
@ -1538,12 +1507,12 @@ release_node:
static int btrfs_delete_delayed_insertion_item(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_node *node,
struct btrfs_key *key)
u64 index)
{
struct btrfs_delayed_item *item;
mutex_lock(&node->mutex);
item = __btrfs_lookup_delayed_insertion_item(node, key);
item = __btrfs_lookup_delayed_item(&node->ins_root.rb_root, index);
if (!item) {
mutex_unlock(&node->mutex);
return 1;
@ -1589,32 +1558,25 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
{
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
struct btrfs_key item_key;
int ret;
node = btrfs_get_or_create_delayed_node(dir);
if (IS_ERR(node))
return PTR_ERR(node);
item_key.objectid = btrfs_ino(dir);
item_key.type = BTRFS_DIR_INDEX_KEY;
item_key.offset = index;
ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node,
&item_key);
ret = btrfs_delete_delayed_insertion_item(trans->fs_info, node, index);
if (!ret)
goto end;
item = btrfs_alloc_delayed_item(0);
item = btrfs_alloc_delayed_item(0, node, BTRFS_DELAYED_DELETION_ITEM);
if (!item) {
ret = -ENOMEM;
goto end;
}
item->key = item_key;
item->ins_or_del = BTRFS_DELAYED_DELETION_ITEM;
item->index = index;
ret = btrfs_delayed_item_reserve_metadata(trans, dir->root, item);
ret = btrfs_delayed_item_reserve_metadata(trans, item);
/*
* we have reserved enough space when we start a new transaction,
* so reserving metadata failure is impossible.
@ -1743,9 +1705,9 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
int ret = 0;
list_for_each_entry(curr, del_list, readdir_list) {
if (curr->key.offset > index)
if (curr->index > index)
break;
if (curr->key.offset == index) {
if (curr->index == index) {
ret = 1;
break;
}
@ -1779,13 +1741,13 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
list_for_each_entry_safe(curr, next, ins_list, readdir_list) {
list_del(&curr->readdir_list);
if (curr->key.offset < ctx->pos) {
if (curr->index < ctx->pos) {
if (refcount_dec_and_test(&curr->refs))
kfree(curr);
continue;
}
ctx->pos = curr->key.offset;
ctx->pos = curr->index;
di = (struct btrfs_dir_item *)curr->data;
name = (char *)(di + 1);
@ -2085,3 +2047,113 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info)
}
}
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
struct list_head *ins_list,
struct list_head *del_list)
{
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
node = btrfs_get_delayed_node(inode);
if (!node)
return;
mutex_lock(&node->mutex);
item = __btrfs_first_delayed_insertion_item(node);
while (item) {
/*
* It's possible that the item is already in a log list. This
* can happen in case two tasks are trying to log the same
* directory. For example if we have tasks A and task B:
*
* Task A collected the delayed items into a log list while
* under the inode's log_mutex (at btrfs_log_inode()), but it
* only releases the items after logging the inodes they point
* to (if they are new inodes), which happens after unlocking
* the log mutex;
*
* Task B enters btrfs_log_inode() and acquires the log_mutex
* of the same directory inode, before task B releases the
* delayed items. This can happen for example when logging some
* inode we need to trigger logging of its parent directory, so
* logging two files that have the same parent directory can
* lead to this.
*
* If this happens, just ignore delayed items already in a log
* list. All the tasks logging the directory are under a log
* transaction and whichever finishes first can not sync the log
* before the other completes and leaves the log transaction.
*/
if (!item->logged && list_empty(&item->log_list)) {
refcount_inc(&item->refs);
list_add_tail(&item->log_list, ins_list);
}
item = __btrfs_next_delayed_item(item);
}
item = __btrfs_first_delayed_deletion_item(node);
while (item) {
/* It may be non-empty, for the same reason mentioned above. */
if (!item->logged && list_empty(&item->log_list)) {
refcount_inc(&item->refs);
list_add_tail(&item->log_list, del_list);
}
item = __btrfs_next_delayed_item(item);
}
mutex_unlock(&node->mutex);
/*
* We are called during inode logging, which means the inode is in use
* and can not be evicted before we finish logging the inode. So we never
* have the last reference on the delayed inode.
* Also, we don't use btrfs_release_delayed_node() because that would
* requeue the delayed inode (change its order in the list of prepared
* nodes) and we don't want to do such change because we don't create or
* delete delayed items.
*/
ASSERT(refcount_read(&node->refs) > 1);
refcount_dec(&node->refs);
}
void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
struct list_head *ins_list,
struct list_head *del_list)
{
struct btrfs_delayed_node *node;
struct btrfs_delayed_item *item;
struct btrfs_delayed_item *next;
node = btrfs_get_delayed_node(inode);
if (!node)
return;
mutex_lock(&node->mutex);
list_for_each_entry_safe(item, next, ins_list, log_list) {
item->logged = true;
list_del_init(&item->log_list);
if (refcount_dec_and_test(&item->refs))
kfree(item);
}
list_for_each_entry_safe(item, next, del_list, log_list) {
item->logged = true;
list_del_init(&item->log_list);
if (refcount_dec_and_test(&item->refs))
kfree(item);
}
mutex_unlock(&node->mutex);
/*
* We are called during inode logging, which means the inode is in use
* and can not be evicted before we finish logging the inode. So we never
* have the last reference on the delayed inode.
* Also, we don't use btrfs_release_delayed_node() because that would
* requeue the delayed inode (change its order in the list of prepared
* nodes) and we don't want to do such change because we don't create or
* delete delayed items.
*/
ASSERT(refcount_read(&node->refs) > 1);
refcount_dec(&node->refs);
}

View File

@ -16,9 +16,10 @@
#include <linux/refcount.h>
#include "ctree.h"
/* types of the delayed item */
#define BTRFS_DELAYED_INSERTION_ITEM 1
#define BTRFS_DELAYED_DELETION_ITEM 2
enum btrfs_delayed_item_type {
BTRFS_DELAYED_INSERTION_ITEM,
BTRFS_DELAYED_DELETION_ITEM
};
struct btrfs_delayed_root {
spinlock_t lock;
@ -73,14 +74,27 @@ struct btrfs_delayed_node {
struct btrfs_delayed_item {
struct rb_node rb_node;
struct btrfs_key key;
/* Offset value of the corresponding dir index key. */
u64 index;
struct list_head tree_list; /* used for batch insert/delete items */
struct list_head readdir_list; /* used for readdir items */
/*
* Used when logging a directory.
* Insertions and deletions to this list are protected by the parent
* delayed node's mutex.
*/
struct list_head log_list;
u64 bytes_reserved;
struct btrfs_delayed_node *delayed_node;
refcount_t refs;
int ins_or_del;
u32 data_len;
enum btrfs_delayed_item_type type:8;
/*
* Track if this delayed item was already logged.
* Protected by the mutex of the parent delayed inode.
*/
bool logged;
/* The maximum leaf size is 64K, so u16 is more than enough. */
u16 data_len;
char data[];
};
@ -144,6 +158,14 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
struct list_head *ins_list);
/* Used during directory logging. */
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
struct list_head *ins_list,
struct list_head *del_list);
void btrfs_log_put_delayed_items(struct btrfs_inode *inode,
struct list_head *ins_list,
struct list_head *del_list);
/* for init */
int __init btrfs_delayed_inode_init(void);
void __cold btrfs_delayed_inode_exit(void);

View File

@ -545,10 +545,7 @@ static int mark_block_group_to_copy(struct btrfs_fs_info *fs_info,
if (!cache)
continue;
spin_lock(&cache->lock);
cache->to_copy = 1;
spin_unlock(&cache->lock);
set_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
btrfs_put_block_group(cache);
}
if (iter_ret < 0)
@ -577,7 +574,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
return true;
spin_lock(&cache->lock);
if (cache->removed) {
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &cache->runtime_flags)) {
spin_unlock(&cache->lock);
return true;
}
@ -610,9 +607,7 @@ bool btrfs_finish_block_group_to_copy(struct btrfs_device *srcdev,
}
/* Last stripe on this device */
spin_lock(&cache->lock);
cache->to_copy = 0;
spin_unlock(&cache->lock);
clear_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
return true;
}
@ -1288,11 +1283,6 @@ int __pure btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
return 1;
}
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
{
percpu_counter_inc(&fs_info->dev_replace.bio_counter);
}
void btrfs_bio_counter_sub(struct btrfs_fs_info *fs_info, s64 amount)
{
percpu_counter_sub(&fs_info->dev_replace.bio_counter, amount);

View File

@ -7,6 +7,10 @@
#define BTRFS_DEV_REPLACE_H
struct btrfs_ioctl_dev_replace_args;
struct btrfs_fs_info;
struct btrfs_trans_handle;
struct btrfs_dev_replace;
struct btrfs_block_group;
int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info);
int btrfs_run_dev_replace(struct btrfs_trans_handle *trans);

View File

@ -131,8 +131,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
if (atomic)
return -EAGAIN;
lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state);
lock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state);
if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
@ -145,8 +144,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
ret = 1;
clear_extent_buffer_uptodate(eb);
out:
unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state);
unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state);
return ret;
}
@ -647,16 +646,14 @@ static void run_one_async_start(struct btrfs_work *work)
*/
static void run_one_async_done(struct btrfs_work *work)
{
struct async_submit_bio *async;
struct inode *inode;
async = container_of(work, struct async_submit_bio, work);
inode = async->inode;
struct async_submit_bio *async =
container_of(work, struct async_submit_bio, work);
struct inode *inode = async->inode;
struct btrfs_bio *bbio = btrfs_bio(async->bio);
/* If an error occurred we just want to clean up the bio and move on */
if (async->status) {
async->bio->bi_status = async->status;
bio_endio(async->bio);
btrfs_bio_end_io(bbio, async->status);
return;
}
@ -757,6 +754,7 @@ static bool should_async_write(struct btrfs_fs_info *fs_info,
void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_bio *bbio = btrfs_bio(bio);
blk_status_t ret;
bio->bi_opf |= REQ_META;
@ -776,8 +774,7 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_
ret = btree_csum_one_bio(bio);
if (ret) {
bio->bi_status = ret;
bio_endio(bio);
btrfs_bio_end_io(bbio, ret);
return;
}
@ -1524,6 +1521,9 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID)
return btrfs_grab_root(fs_info->block_group_root) ?
fs_info->block_group_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
struct btrfs_root *root = btrfs_global_root(fs_info, &key);
@ -1980,14 +1980,7 @@ static void backup_super_roots(struct btrfs_fs_info *info)
btrfs_set_backup_chunk_root_level(root_backup,
btrfs_header_level(info->chunk_root->node));
if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
btrfs_set_backup_block_group_root(root_backup,
info->block_group_root->node->start);
btrfs_set_backup_block_group_root_gen(root_backup,
btrfs_header_generation(info->block_group_root->node));
btrfs_set_backup_block_group_root_level(root_backup,
btrfs_header_level(info->block_group_root->node));
} else {
if (!btrfs_fs_compat_ro(info, BLOCK_GROUP_TREE)) {
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
@ -2225,6 +2218,8 @@ static void btrfs_init_balance(struct btrfs_fs_info *fs_info)
static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
{
struct inode *inode = fs_info->btree_inode;
unsigned long hash = btrfs_inode_hash(BTRFS_BTREE_INODE_OBJECTID,
fs_info->tree_root);
inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
set_nlink(inode, 1);
@ -2238,8 +2233,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
IO_TREE_BTREE_INODE_IO, inode);
BTRFS_I(inode)->io_tree.track_uptodate = false;
IO_TREE_BTREE_INODE_IO, NULL);
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
@ -2247,7 +2241,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info)
BTRFS_I(inode)->location.type = 0;
BTRFS_I(inode)->location.offset = 0;
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
btrfs_insert_inode_hash(inode);
__insert_inode_hash(inode, hash);
}
static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
@ -2266,6 +2260,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->qgroup_seq = 1;
fs_info->qgroup_ulist = NULL;
fs_info->qgroup_rescan_running = false;
fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@ -2529,10 +2524,24 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
if (ret)
return ret;
location.objectid = BTRFS_DEV_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
location.offset = 0;
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
location.objectid = BTRFS_BLOCK_GROUP_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root)) {
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
ret = PTR_ERR(root);
goto out;
}
} else {
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
fs_info->block_group_root = root;
}
}
location.objectid = BTRFS_DEV_TREE_OBJECTID;
root = btrfs_read_tree_root(tree_root, &location);
if (IS_ERR(root)) {
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS)) {
@ -2600,8 +2609,8 @@ out:
* 1, 2 2nd and 3rd backup copy
* -1 skip bytenr check
*/
static int validate_super(struct btrfs_fs_info *fs_info,
struct btrfs_super_block *sb, int mirror_num)
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
struct btrfs_super_block *sb, int mirror_num)
{
u64 nodesize = btrfs_super_nodesize(sb);
u64 sectorsize = btrfs_super_sectorsize(sb);
@ -2703,6 +2712,18 @@ static int validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
/*
* Artificial requirement for block-group-tree to force newer features
* (free-space-tree, no-holes) so the test matrix is smaller.
*/
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
(!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
!btrfs_fs_incompat(fs_info, NO_HOLES))) {
btrfs_err(fs_info,
"block-group-tree feature requires fres-space-tree and no-holes");
ret = -EINVAL;
}
if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
@ -2785,7 +2806,7 @@ static int validate_super(struct btrfs_fs_info *fs_info,
*/
static int btrfs_validate_mount_super(struct btrfs_fs_info *fs_info)
{
return validate_super(fs_info, fs_info->super_copy, 0);
return btrfs_validate_super(fs_info, fs_info->super_copy, 0);
}
/*
@ -2799,7 +2820,7 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
{
int ret;
ret = validate_super(fs_info, sb, -1);
ret = btrfs_validate_super(fs_info, sb, -1);
if (ret < 0)
goto out;
if (!btrfs_supported_super_csum(btrfs_super_csum_type(sb))) {
@ -2860,17 +2881,7 @@ static int load_important_roots(struct btrfs_fs_info *fs_info)
btrfs_warn(fs_info, "couldn't read tree root");
return ret;
}
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
bytenr = btrfs_super_block_group_root(sb);
gen = btrfs_super_block_group_root_generation(sb);
level = btrfs_super_block_group_root_level(sb);
ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
if (ret)
btrfs_warn(fs_info, "couldn't read block group root");
return ret;
return 0;
}
static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
@ -2882,16 +2893,6 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
int ret = 0;
int i;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
struct btrfs_root *root;
root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
GFP_KERNEL);
if (!root)
return -ENOMEM;
fs_info->block_group_root = root;
}
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
if (handle_error) {
if (!IS_ERR(tree_root->node))
@ -2990,6 +2991,19 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
mutex_init(&fs_info->zoned_data_reloc_io_lock);
seqlock_init(&fs_info->profiles_lock);
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_writers);
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
BTRFS_LOCKDEP_TRANS_COMMIT_START);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
BTRFS_LOCKDEP_TRANS_UNBLOCKED);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_completed,
BTRFS_LOCKDEP_TRANS_COMPLETED);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
INIT_LIST_HEAD(&fs_info->space_info);
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
@ -3279,6 +3293,112 @@ out:
return ret;
}
/*
* Do various sanity and dependency checks of different features.
*
* This is the place for less strict checks (like for subpage or artificial
* feature dependencies).
*
* For strict checks or possible corruption detection, see
* btrfs_validate_super().
*
* This should be called after btrfs_parse_options(), as some mount options
* (space cache related) can modify on-disk format like free space tree and
* screw up certain feature dependencies.
*/
int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb)
{
struct btrfs_super_block *disk_super = fs_info->super_copy;
u64 incompat = btrfs_super_incompat_flags(disk_super);
const u64 compat_ro = btrfs_super_compat_ro_flags(disk_super);
const u64 compat_ro_unsupp = (compat_ro & ~BTRFS_FEATURE_COMPAT_RO_SUPP);
if (incompat & ~BTRFS_FEATURE_INCOMPAT_SUPP) {
btrfs_err(fs_info,
"cannot mount because of unknown incompat features (0x%llx)",
incompat);
return -EINVAL;
}
/* Runtime limitation for mixed block groups. */
if ((incompat & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(fs_info->sectorsize != fs_info->nodesize)) {
btrfs_err(fs_info,
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
fs_info->nodesize, fs_info->sectorsize);
return -EINVAL;
}
/* Mixed backref is an always-enabled feature. */
incompat |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
/* Set compression related flags just in case. */
if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
incompat |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
/*
* An ancient flag, which should really be marked deprecated.
* Such runtime limitation doesn't really need a incompat flag.
*/
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
incompat |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
if (compat_ro_unsupp && !sb_rdonly(sb)) {
btrfs_err(fs_info,
"cannot mount read-write because of unknown compat_ro features (0x%llx)",
compat_ro);
return -EINVAL;
}
/*
* We have unsupported RO compat features, although RO mounted, we
* should not cause any metadata writes, including log replay.
* Or we could screw up whatever the new feature requires.
*/
if (compat_ro_unsupp && btrfs_super_log_root(disk_super) &&
!btrfs_test_opt(fs_info, NOLOGREPLAY)) {
btrfs_err(fs_info,
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
compat_ro);
return -EINVAL;
}
/*
* Artificial limitations for block group tree, to force
* block-group-tree to rely on no-holes and free-space-tree.
*/
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE) &&
(!btrfs_fs_incompat(fs_info, NO_HOLES) ||
!btrfs_test_opt(fs_info, FREE_SPACE_TREE))) {
btrfs_err(fs_info,
"block-group-tree feature requires no-holes and free-space-tree features");
return -EINVAL;
}
/*
* Subpage runtime limitation on v1 cache.
*
* V1 space cache still has some hard codeed PAGE_SIZE usage, while
* we're already defaulting to v2 cache, no need to bother v1 as it's
* going to be deprecated anyway.
*/
if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
btrfs_warn(fs_info,
"v1 space cache is not supported for page size %lu with sectorsize %u",
PAGE_SIZE, fs_info->sectorsize);
return -EINVAL;
}
/* This can be called by remount, we need to protect the super block. */
spin_lock(&fs_info->super_lock);
btrfs_set_super_incompat_flags(disk_super, incompat);
spin_unlock(&fs_info->super_lock);
return 0;
}
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
char *options)
{
@ -3428,72 +3548,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_alloc;
}
features = btrfs_super_incompat_flags(disk_super) &
~BTRFS_FEATURE_INCOMPAT_SUPP;
if (features) {
btrfs_err(fs_info,
"cannot mount because of unsupported optional features (0x%llx)",
features);
err = -EINVAL;
ret = btrfs_check_features(fs_info, sb);
if (ret < 0) {
err = ret;
goto fail_alloc;
}
features = btrfs_super_incompat_flags(disk_super);
features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;
/*
* Flag our filesystem as having big metadata blocks if they are bigger
* than the page size.
*/
if (btrfs_super_nodesize(disk_super) > PAGE_SIZE)
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
/*
* mixed block groups end up with duplicate but slightly offset
* extent buffers for the same range. It leads to corruptions
*/
if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) &&
(sectorsize != nodesize)) {
btrfs_err(fs_info,
"unequal nodesize/sectorsize (%u != %u) are not allowed for mixed block groups",
nodesize, sectorsize);
goto fail_alloc;
}
/*
* Needn't use the lock because there is no other task which will
* update the flag.
*/
btrfs_set_super_incompat_flags(disk_super, features);
features = btrfs_super_compat_ro_flags(disk_super) &
~BTRFS_FEATURE_COMPAT_RO_SUPP;
if (!sb_rdonly(sb) && features) {
btrfs_err(fs_info,
"cannot mount read-write because of unsupported optional features (0x%llx)",
features);
err = -EINVAL;
goto fail_alloc;
}
/*
* We have unsupported RO compat features, although RO mounted, we
* should not cause any metadata write, including log replay.
* Or we could screw up whatever the new feature requires.
*/
if (unlikely(features && btrfs_super_log_root(disk_super) &&
!btrfs_test_opt(fs_info, NOLOGREPLAY))) {
btrfs_err(fs_info,
"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
features);
err = -EINVAL;
goto fail_alloc;
}
if (sectorsize < PAGE_SIZE) {
struct btrfs_subpage_info *subpage_info;
@ -3833,7 +3893,7 @@ static void btrfs_end_super_write(struct bio *bio)
}
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
int copy_num)
int copy_num, bool drop_cache)
{
struct btrfs_super_block *super;
struct page *page;
@ -3851,6 +3911,19 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
if (bytenr + BTRFS_SUPER_INFO_SIZE >= bdev_nr_bytes(bdev))
return ERR_PTR(-EINVAL);
if (drop_cache) {
/* This should only be called with the primary sb. */
ASSERT(copy_num == 0);
/*
* Drop the page of the primary superblock, so later read will
* always read from the device.
*/
invalidate_inode_pages2_range(mapping,
bytenr >> PAGE_SHIFT,
(bytenr + BTRFS_SUPER_INFO_SIZE) >> PAGE_SHIFT);
}
page = read_cache_page_gfp(mapping, bytenr >> PAGE_SHIFT, GFP_NOFS);
if (IS_ERR(page))
return ERR_CAST(page);
@ -3882,7 +3955,7 @@ struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev)
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
*/
for (i = 0; i < 1; i++) {
super = btrfs_read_dev_one_super(bdev, i);
super = btrfs_read_dev_one_super(bdev, i, false);
if (IS_ERR(super))
continue;

View File

@ -46,10 +46,13 @@ int __cold open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
char *options);
void __cold close_ctree(struct btrfs_fs_info *fs_info);
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
struct btrfs_super_block *sb, int mirror_num);
int btrfs_check_features(struct btrfs_fs_info *fs_info, struct super_block *sb);
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
int copy_num);
int copy_num, bool drop_cache);
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
struct btrfs_key *key);
@ -103,7 +106,7 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
{
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
return fs_info->block_group_root;
return btrfs_extent_root(fs_info, 0);
}

1673
fs/btrfs/extent-io-tree.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -17,7 +17,6 @@ struct io_failure_record;
#define EXTENT_NODATASUM (1U << 7)
#define EXTENT_CLEAR_META_RESV (1U << 8)
#define EXTENT_NEED_WAIT (1U << 9)
#define EXTENT_DAMAGED (1U << 10)
#define EXTENT_NORESERVE (1U << 11)
#define EXTENT_QGROUP_RESERVED (1U << 12)
#define EXTENT_CLEAR_DATA_RESV (1U << 13)
@ -35,10 +34,18 @@ struct io_failure_record;
* delalloc bytes decremented, in an atomic way to prevent races with stat(2).
*/
#define EXTENT_ADD_INODE_BYTES (1U << 15)
/*
* Set during truncate when we're clearing an entire range and we just want the
* extent states to go away.
*/
#define EXTENT_CLEAR_ALL_BITS (1U << 16)
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
EXTENT_CLEAR_DATA_RESV)
#define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | \
EXTENT_ADD_INODE_BYTES)
EXTENT_ADD_INODE_BYTES | \
EXTENT_CLEAR_ALL_BITS)
/*
* Redefined bits above which are used only in the device allocation tree,
@ -56,7 +63,6 @@ enum {
IO_TREE_FS_EXCLUDED_EXTENTS,
IO_TREE_BTREE_INODE_IO,
IO_TREE_INODE_IO,
IO_TREE_INODE_IO_FAILURE,
IO_TREE_RELOC_BLOCKS,
IO_TREE_TRANS_DIRTY_PAGES,
IO_TREE_ROOT_DIRTY_LOG_PAGES,
@ -70,8 +76,6 @@ struct extent_io_tree {
struct rb_root state;
struct btrfs_fs_info *fs_info;
void *private_data;
u64 dirty_bytes;
bool track_uptodate;
/* Who owns this io tree, should be one of IO_TREE_* */
u8 owner;
@ -89,33 +93,23 @@ struct extent_state {
refcount_t refs;
u32 state;
struct io_failure_record *failrec;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
};
int __init extent_state_cache_init(void);
void __cold extent_state_cache_exit(void);
void extent_io_tree_init(struct btrfs_fs_info *fs_info,
struct extent_io_tree *tree, unsigned int owner,
void *private_data);
void extent_io_tree_release(struct extent_io_tree *tree);
int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
{
return lock_extent_bits(tree, start, end, NULL);
}
int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached);
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
int __init extent_io_init(void);
void __cold extent_io_exit(void);
int __init extent_state_init_cachep(void);
void __cold extent_state_free_cachep(void);
u64 count_range_bits(struct extent_io_tree *tree,
u64 *start, u64 search_end,
@ -126,72 +120,66 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, int filled, struct extent_state *cached_state);
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, int wake, int delete,
struct extent_state **cached);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, int wake, int delete,
struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
u32 bits, struct extent_state **cached, gfp_t mask,
struct extent_changeset *changeset);
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end)
static inline int clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits,
struct extent_state **cached)
{
return clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, NULL);
return __clear_extent_bit(tree, start, end, bits, cached,
GFP_NOFS, NULL);
}
static inline int unlock_extent_cached(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_NOFS, NULL);
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
GFP_NOFS, NULL);
}
static inline int unlock_extent_cached_atomic(struct extent_io_tree *tree,
u64 start, u64 end, struct extent_state **cached)
static inline int unlock_extent_atomic(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, 1, 0, cached,
GFP_ATOMIC, NULL);
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
GFP_ATOMIC, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits)
{
int wake = 0;
if (bits & EXTENT_LOCKED)
wake = 1;
return clear_extent_bit(tree, start, end, bits, wake, 0, NULL);
return clear_extent_bit(tree, start, end, bits, NULL);
}
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, unsigned exclusive_bits, u64 *failed_start,
struct extent_state **cached_state, gfp_t mask,
struct extent_changeset *changeset);
int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits);
u32 bits, struct extent_state **cached_state, gfp_t mask);
static inline int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits)
{
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOWAIT);
}
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits)
{
return set_extent_bit(tree, start, end, bits, 0, NULL, NULL, GFP_NOFS,
NULL);
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOFS);
}
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
cached_state, GFP_NOFS, NULL);
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, 0, NULL, NULL,
mask, NULL);
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, mask);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
@ -199,7 +187,7 @@ static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
{
return clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING, 0, 0, cached);
EXTENT_DO_ACCOUNTING, cached);
}
int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
@ -211,30 +199,29 @@ static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | extra_bits,
0, NULL, cached_state, GFP_NOFS, NULL);
EXTENT_DELALLOC | extra_bits,
cached_state, GFP_NOFS);
}
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG,
0, NULL, cached_state, GFP_NOFS, NULL);
EXTENT_DELALLOC | EXTENT_DEFRAG,
cached_state, GFP_NOFS);
}
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
u64 end)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, 0, NULL, NULL,
GFP_NOFS, NULL);
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS);
}
static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, NULL,
cached_state, mask, NULL);
return set_extent_bit(tree, start, end, EXTENT_UPTODATE,
cached_state, mask);
}
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
@ -244,24 +231,9 @@ void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits);
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits);
int extent_invalidate_folio(struct extent_io_tree *tree,
struct folio *folio, size_t offset);
bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
u64 *end, u64 max_bytes,
struct extent_state **cached_state);
/* This should be reworked in the future and put elsewhere. */
struct io_failure_record *get_state_failrec(struct extent_io_tree *tree, u64 start);
int set_state_failrec(struct extent_io_tree *tree, u64 start,
struct io_failure_record *failrec);
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start,
u64 end);
int free_io_failure(struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree,
struct io_failure_record *rec);
int clean_io_failure(struct btrfs_fs_info *fs_info,
struct extent_io_tree *failure_tree,
struct extent_io_tree *io_tree, u64 start,
struct page *page, u64 ino, unsigned int pg_offset);
void wait_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, u32 bits);
#endif /* BTRFS_EXTENT_IO_TREE_H */

View File

@ -2220,6 +2220,12 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
}
if (!mutex_trylock(&head->mutex)) {
if (path->nowait) {
spin_unlock(&delayed_refs->lock);
btrfs_put_transaction(cur_trans);
return -EAGAIN;
}
refcount_inc(&head->refs);
spin_unlock(&delayed_refs->lock);
@ -2686,13 +2692,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
len = cache->start + cache->length - start;
len = min(len, end + 1 - start);
down_read(&fs_info->commit_root_sem);
if (start < cache->last_byte_to_unpin && return_free_space) {
u64 add_len = min(len, cache->last_byte_to_unpin - start);
btrfs_add_free_space(cache, start, add_len);
}
up_read(&fs_info->commit_root_sem);
if (return_free_space)
btrfs_add_free_space(cache, start, len);
start += len;
total_unpinned += len;
@ -3804,7 +3805,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
block_group->start == fs_info->data_reloc_bg ||
fs_info->data_reloc_bg == 0);
if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
if (block_group->ro ||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
ret = 1;
goto out;
}
@ -3881,7 +3883,7 @@ out:
* regular extents) at the same time to the same zone, which
* easily break the write pointer.
*/
block_group->zoned_data_reloc_ongoing = 1;
set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
fs_info->data_reloc_bg = 0;
}
spin_unlock(&fs_info->relocation_bg_lock);
@ -4888,6 +4890,9 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
!test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
lockdep_owner = BTRFS_FS_TREE_OBJECTID;
/* btrfs_clean_tree_block() accesses generation field. */
btrfs_set_header_generation(buf, trans->transid);
/*
* This needs to stay, because we could allocate a freed block from an
* old tree into a new tree, so we need to make sure this new block is
@ -5639,6 +5644,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
*/
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
{
const bool is_reloc_root = (root->root_key.objectid ==
BTRFS_TREE_RELOC_OBJECTID);
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
@ -5798,6 +5805,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
}
if (!is_reloc_root)
btrfs_set_last_root_drop_gen(fs_info, trans->transid);
btrfs_end_transaction_throttle(trans);
if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
btrfs_debug(fs_info,
@ -5832,7 +5842,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
}
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
if (!is_reloc_root) {
ret = btrfs_find_root(tree_root, &root->root_key, path,
NULL, NULL);
if (ret < 0) {
@ -5864,6 +5874,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
btrfs_put_root(root);
root_dropped = true;
out_end_trans:
if (!is_reloc_root)
btrfs_set_last_root_drop_gen(fs_info, trans->transid);
btrfs_end_transaction_throttle(trans);
out_free:
kfree(wc);

File diff suppressed because it is too large Load Diff

View File

@ -60,11 +60,13 @@ enum {
struct btrfs_bio;
struct btrfs_root;
struct btrfs_inode;
struct btrfs_io_bio;
struct btrfs_fs_info;
struct io_failure_record;
struct extent_io_tree;
int __init extent_buffer_init_cachep(void);
void __cold extent_buffer_free_cachep(void);
typedef void (submit_bio_hook_t)(struct inode *inode, struct bio *bio,
int mirror_num,
enum btrfs_compression_type compress_type);
@ -240,10 +242,10 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
u32 bits_to_clear, unsigned long page_ops);
int extent_invalidate_folio(struct extent_io_tree *tree,
struct folio *folio, size_t offset);
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
struct bio *btrfs_bio_alloc(unsigned int nr_iovecs);
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size);
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
@ -257,8 +259,12 @@ int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num);
* bio end_io callback is called to indicate things have failed.
*/
struct io_failure_record {
/* Use rb_simple_node for search/insert */
struct {
struct rb_node rb_node;
u64 bytenr;
};
struct page *page;
u64 start;
u64 len;
u64 logical;
int this_mirror;
@ -269,6 +275,9 @@ struct io_failure_record {
int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio,
u32 bio_offset, struct page *page, unsigned int pgoff,
submit_bio_hook_t *submit_bio_hook);
void btrfs_free_io_failure_record(struct btrfs_inode *inode, u64 start, u64 end);
int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start,
struct page *page, unsigned int pg_offset);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,

View File

@ -7,6 +7,7 @@
#include "volumes.h"
#include "extent_map.h"
#include "compression.h"
#include "btrfs_inode.h"
static struct kmem_cache *extent_map_cache;
@ -54,9 +55,7 @@ struct extent_map *alloc_extent_map(void)
if (!em)
return NULL;
RB_CLEAR_NODE(&em->rb_node);
em->flags = 0;
em->compress_type = BTRFS_COMPRESS_NONE;
em->generation = 0;
refcount_set(&em->refs, 1);
INIT_LIST_HEAD(&em->list);
return em;
@ -73,7 +72,6 @@ void free_extent_map(struct extent_map *em)
{
if (!em)
return;
WARN_ON(refcount_read(&em->refs) == 0);
if (refcount_dec_and_test(&em->refs)) {
WARN_ON(extent_map_in_tree(em));
WARN_ON(!list_empty(&em->list));
@ -143,8 +141,7 @@ static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
* it can't be found, try to find some neighboring extents
*/
static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
struct rb_node **prev_ret,
struct rb_node **next_ret)
struct rb_node **prev_or_next_ret)
{
struct rb_node *n = root->rb_node;
struct rb_node *prev = NULL;
@ -152,6 +149,8 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
struct extent_map *entry;
struct extent_map *prev_entry = NULL;
ASSERT(prev_or_next_ret);
while (n) {
entry = rb_entry(n, struct extent_map, rb_node);
prev = n;
@ -165,24 +164,29 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
return n;
}
if (prev_ret) {
orig_prev = prev;
while (prev && offset >= extent_map_end(prev_entry)) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*prev_ret = prev;
prev = orig_prev;
orig_prev = prev;
while (prev && offset >= extent_map_end(prev_entry)) {
prev = rb_next(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
if (next_ret) {
prev_entry = rb_entry(prev, struct extent_map, rb_node);
while (prev && offset < prev_entry->start) {
prev = rb_prev(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*next_ret = prev;
/*
* Previous extent map found, return as in this case the caller does not
* care about the next one.
*/
if (prev) {
*prev_or_next_ret = prev;
return NULL;
}
prev = orig_prev;
prev_entry = rb_entry(prev, struct extent_map, rb_node);
while (prev && offset < prev_entry->start) {
prev = rb_prev(prev);
prev_entry = rb_entry(prev, struct extent_map, rb_node);
}
*prev_or_next_ret = prev;
return NULL;
}
@ -336,6 +340,8 @@ out:
void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
{
lockdep_assert_held_write(&tree->lock);
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
if (extent_map_in_tree(em))
try_merge_map(tree, em);
@ -382,7 +388,7 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
__clear_extent_bit(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1, bits,
0, 0, NULL, GFP_NOWAIT, NULL);
NULL, GFP_NOWAIT, NULL);
}
}
@ -425,16 +431,13 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
{
struct extent_map *em;
struct rb_node *rb_node;
struct rb_node *prev = NULL;
struct rb_node *next = NULL;
struct rb_node *prev_or_next = NULL;
u64 end = range_end(start, len);
rb_node = __tree_search(&tree->map.rb_root, start, &prev, &next);
rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
if (!rb_node) {
if (prev)
rb_node = prev;
else if (next)
rb_node = next;
if (prev_or_next)
rb_node = prev_or_next;
else
return NULL;
}
@ -658,3 +661,293 @@ int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
ASSERT(ret == 0 || ret == -EEXIST);
return ret;
}
/*
* Drop all extent maps from a tree in the fastest possible way, rescheduling
* if needed. This avoids searching the tree, from the root down to the first
* extent map, before each deletion.
*/
static void drop_all_extent_maps_fast(struct extent_map_tree *tree)
{
write_lock(&tree->lock);
while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
struct extent_map *em;
struct rb_node *node;
node = rb_first_cached(&tree->map);
em = rb_entry(node, struct extent_map, rb_node);
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
remove_extent_mapping(tree, em);
free_extent_map(em);
cond_resched_rwlock_write(&tree->lock);
}
write_unlock(&tree->lock);
}
/*
* Drop all extent maps in a given range.
*
* @inode: The target inode.
* @start: Start offset of the range.
* @end: End offset of the range (inclusive value).
* @skip_pinned: Indicate if pinned extent maps should be ignored or not.
*
* This drops all the extent maps that intersect the given range [@start, @end].
* Extent maps that partially overlap the range and extend behind or beyond it,
* are split.
* The caller should have locked an appropriate file range in the inode's io
* tree before calling this function.
*/
void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
bool skip_pinned)
{
struct extent_map *split;
struct extent_map *split2;
struct extent_map *em;
struct extent_map_tree *em_tree = &inode->extent_tree;
u64 len = end - start + 1;
WARN_ON(end < start);
if (end == (u64)-1) {
if (start == 0 && !skip_pinned) {
drop_all_extent_maps_fast(em_tree);
return;
}
len = (u64)-1;
} else {
/* Make end offset exclusive for use in the loop below. */
end++;
}
/*
* It's ok if we fail to allocate the extent maps, see the comment near
* the bottom of the loop below. We only need two spare extent maps in
* the worst case, where the first extent map that intersects our range
* starts before the range and the last extent map that intersects our
* range ends after our range (and they might be the same extent map),
* because we need to split those two extent maps at the boundaries.
*/
split = alloc_extent_map();
split2 = alloc_extent_map();
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
while (em) {
/* extent_map_end() returns exclusive value (last byte + 1). */
const u64 em_end = extent_map_end(em);
struct extent_map *next_em = NULL;
u64 gen;
unsigned long flags;
bool modified;
bool compressed;
if (em_end < end) {
next_em = next_extent_map(em);
if (next_em) {
if (next_em->start < end)
refcount_inc(&next_em->refs);
else
next_em = NULL;
}
}
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
start = em_end;
if (end != (u64)-1)
len = start + len - em_end;
goto next;
}
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
clear_bit(EXTENT_FLAG_LOGGING, &flags);
modified = !list_empty(&em->list);
/*
* The extent map does not cross our target range, so no need to
* split it, we can remove it directly.
*/
if (em->start >= start && em_end <= end)
goto remove_em;
flags = em->flags;
gen = em->generation;
compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
if (em->start < start) {
if (!split) {
split = split2;
split2 = NULL;
if (!split)
goto remove_em;
}
split->start = em->start;
split->len = start - em->start;
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
split->orig_start = em->orig_start;
split->block_start = em->block_start;
if (compressed)
split->block_len = em->block_len;
else
split->block_len = split->len;
split->orig_block_len = max(split->block_len,
em->orig_block_len);
split->ram_bytes = em->ram_bytes;
} else {
split->orig_start = split->start;
split->block_len = 0;
split->block_start = em->block_start;
split->orig_block_len = 0;
split->ram_bytes = split->len;
}
split->generation = gen;
split->flags = flags;
split->compress_type = em->compress_type;
replace_extent_mapping(em_tree, em, split, modified);
free_extent_map(split);
split = split2;
split2 = NULL;
}
if (em_end > end) {
if (!split) {
split = split2;
split2 = NULL;
if (!split)
goto remove_em;
}
split->start = start + len;
split->len = em_end - (start + len);
split->block_start = em->block_start;
split->flags = flags;
split->compress_type = em->compress_type;
split->generation = gen;
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
split->orig_block_len = max(em->block_len,
em->orig_block_len);
split->ram_bytes = em->ram_bytes;
if (compressed) {
split->block_len = em->block_len;
split->orig_start = em->orig_start;
} else {
const u64 diff = start + len - em->start;
split->block_len = split->len;
split->block_start += diff;
split->orig_start = em->orig_start;
}
} else {
split->ram_bytes = split->len;
split->orig_start = split->start;
split->block_len = 0;
split->orig_block_len = 0;
}
if (extent_map_in_tree(em)) {
replace_extent_mapping(em_tree, em, split,
modified);
} else {
int ret;
ret = add_extent_mapping(em_tree, split,
modified);
/* Logic error, shouldn't happen. */
ASSERT(ret == 0);
if (WARN_ON(ret != 0) && modified)
btrfs_set_inode_full_sync(inode);
}
free_extent_map(split);
split = NULL;
}
remove_em:
if (extent_map_in_tree(em)) {
/*
* If the extent map is still in the tree it means that
* either of the following is true:
*
* 1) It fits entirely in our range (doesn't end beyond
* it or starts before it);
*
* 2) It starts before our range and/or ends after our
* range, and we were not able to allocate the extent
* maps for split operations, @split and @split2.
*
* If we are at case 2) then we just remove the entire
* extent map - this is fine since if anyone needs it to
* access the subranges outside our range, will just
* load it again from the subvolume tree's file extent
* item. However if the extent map was in the list of
* modified extents, then we must mark the inode for a
* full fsync, otherwise a fast fsync will miss this
* extent if it's new and needs to be logged.
*/
if ((em->start < start || em_end > end) && modified) {
ASSERT(!split);
btrfs_set_inode_full_sync(inode);
}
remove_extent_mapping(em_tree, em);
}
/*
* Once for the tree reference (we replaced or removed the
* extent map from the tree).
*/
free_extent_map(em);
next:
/* Once for us (for our lookup reference). */
free_extent_map(em);
em = next_em;
}
write_unlock(&em_tree->lock);
free_extent_map(split);
free_extent_map(split2);
}
/*
* Replace a range in the inode's extent map tree with a new extent map.
*
* @inode: The target inode.
* @new_em: The new extent map to add to the inode's extent map tree.
* @modified: Indicate if the new extent map should be added to the list of
* modified extents (for fast fsync tracking).
*
* Drops all the extent maps in the inode's extent map tree that intersect the
* range of the new extent map and adds the new extent map to the tree.
* The caller should have locked an appropriate file range in the inode's io
* tree before calling this function.
*/
int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
struct extent_map *new_em,
bool modified)
{
const u64 end = new_em->start + new_em->len - 1;
struct extent_map_tree *tree = &inode->extent_tree;
int ret;
ASSERT(!extent_map_in_tree(new_em));
/*
* The caller has locked an appropriate file range in the inode's io
* tree, but getting -EEXIST when adding the new extent map can still
* happen in case there are extents that partially cover the range, and
* this is due to two tasks operating on different parts of the extent.
* See commit 18e83ac75bfe67 ("Btrfs: fix unexpected EEXIST from
* btrfs_get_extent") for an example and details.
*/
do {
btrfs_drop_extent_map_range(inode, new_em->start, end, false);
write_lock(&tree->lock);
ret = add_extent_mapping(tree, new_em, modified);
write_unlock(&tree->lock);
} while (ret == -EEXIST);
return ret;
}

View File

@ -63,6 +63,8 @@ struct extent_map_tree {
rwlock_t lock;
};
struct btrfs_inode;
static inline int extent_map_in_tree(const struct extent_map *em)
{
return !RB_EMPTY_NODE(&em->rb_node);
@ -104,5 +106,11 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
int btrfs_add_extent_mapping(struct btrfs_fs_info *fs_info,
struct extent_map_tree *em_tree,
struct extent_map **em_in, u64 start, u64 len);
void btrfs_drop_extent_map_range(struct btrfs_inode *inode,
u64 start, u64 end,
bool skip_pinned);
int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
struct extent_map *new_em,
bool modified);
#endif

View File

@ -118,7 +118,7 @@ int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return clear_extent_bit(&inode->file_extent_tree, start,
start + len - 1, EXTENT_DIRTY, 0, 0, NULL);
start + len - 1, EXTENT_DIRTY, NULL);
}
static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
@ -129,12 +129,20 @@ static inline u32 max_ordered_sum_bytes(struct btrfs_fs_info *fs_info,
return ncsums * fs_info->sectorsize;
}
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
/*
* Calculate the total size needed to allocate for an ordered sum structure
* spanning @bytes in the file.
*/
static int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info, unsigned long bytes)
{
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size;
}
int btrfs_insert_hole_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 objectid, u64 pos,
u64 disk_offset, u64 disk_num_bytes,
u64 num_bytes, u64 offset, u64 ram_bytes,
u8 compression, u8 encryption, u16 other_encoding)
u64 objectid, u64 pos, u64 num_bytes)
{
int ret = 0;
struct btrfs_file_extent_item *item;
@ -157,16 +165,16 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
btrfs_set_file_extent_disk_bytenr(leaf, item, disk_offset);
btrfs_set_file_extent_disk_num_bytes(leaf, item, disk_num_bytes);
btrfs_set_file_extent_offset(leaf, item, offset);
btrfs_set_file_extent_disk_bytenr(leaf, item, 0);
btrfs_set_file_extent_disk_num_bytes(leaf, item, 0);
btrfs_set_file_extent_offset(leaf, item, 0);
btrfs_set_file_extent_num_bytes(leaf, item, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, item, ram_bytes);
btrfs_set_file_extent_ram_bytes(leaf, item, num_bytes);
btrfs_set_file_extent_generation(leaf, item, trans->transid);
btrfs_set_file_extent_type(leaf, item, BTRFS_FILE_EXTENT_REG);
btrfs_set_file_extent_compression(leaf, item, compression);
btrfs_set_file_extent_encryption(leaf, item, encryption);
btrfs_set_file_extent_other_encoding(leaf, item, other_encoding);
btrfs_set_file_extent_compression(leaf, item, 0);
btrfs_set_file_extent_encryption(leaf, item, 0);
btrfs_set_file_extent_other_encoding(leaf, item, 0);
btrfs_mark_buffer_dirty(leaf);
out:
@ -503,7 +511,8 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
}
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit)
struct list_head *list, int search_commit,
bool nowait)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
@ -525,6 +534,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
if (!path)
return -ENOMEM;
path->nowait = nowait;
if (search_commit) {
path->skip_locking = 1;
path->reada = READA_FORWARD;

File diff suppressed because it is too large Load Diff

View File

@ -48,6 +48,24 @@ static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info, u64 offset,
u64 bytes, bool update_stats);
static void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *info;
struct rb_node *node;
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
if (!info->bitmap) {
unlink_free_space(ctl, info, true);
kmem_cache_free(btrfs_free_space_cachep, info);
} else {
free_bitmap(ctl, info);
}
cond_resched_lock(&ctl->tree_lock);
}
}
static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
struct btrfs_path *path,
u64 offset)
@ -126,10 +144,8 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
block_group->disk_cache_state = BTRFS_DC_CLEAR;
}
if (!block_group->iref) {
if (!test_and_set_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags))
block_group->inode = igrab(inode);
block_group->iref = 1;
}
spin_unlock(&block_group->lock);
return inode;
@ -241,8 +257,7 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
clear_nlink(inode);
/* One for the block groups ref */
spin_lock(&block_group->lock);
if (block_group->iref) {
block_group->iref = 0;
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags)) {
block_group->inode = NULL;
spin_unlock(&block_group->lock);
iput(inode);
@ -333,8 +348,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
btrfs_i_size_write(inode, 0);
truncate_pagecache(vfs_inode, 0);
lock_extent_bits(&inode->io_tree, 0, (u64)-1, &cached_state);
btrfs_drop_extent_cache(inode, 0, (u64)-1, 0);
lock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
btrfs_drop_extent_map_range(inode, 0, (u64)-1, false);
/*
* We skip the throttling logic for free space cache inodes, so we don't
@ -345,7 +360,7 @@ int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
inode_sub_bytes(&inode->vfs_inode, control.sub_bytes);
btrfs_inode_safe_disk_i_size_write(inode, control.last_size);
unlock_extent_cached(&inode->io_tree, 0, (u64)-1, &cached_state);
unlock_extent(&inode->io_tree, 0, (u64)-1, &cached_state);
if (ret)
goto fail;
@ -693,6 +708,12 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
max_bitmaps = max_t(u64, max_bitmaps, 1);
if (ctl->total_bitmaps > max_bitmaps)
btrfs_err(block_group->fs_info,
"invalid free space control: bg start=%llu len=%llu total_bitmaps=%u unit=%u max_bitmaps=%llu bytes_per_bg=%llu",
block_group->start, block_group->length,
ctl->total_bitmaps, ctl->unit, max_bitmaps,
bytes_per_bg);
ASSERT(ctl->total_bitmaps <= max_bitmaps);
/*
@ -875,7 +896,10 @@ out:
return ret;
free_cache:
io_ctl_drop_pages(&io_ctl);
spin_lock(&ctl->tree_lock);
__btrfs_remove_free_space_cache(ctl);
spin_unlock(&ctl->tree_lock);
goto out;
}
@ -914,6 +938,8 @@ static int copy_free_space_cache(struct btrfs_block_group *block_group,
return ret;
}
static struct lock_class_key btrfs_free_space_inode_key;
int load_free_space_cache(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
@ -983,6 +1009,14 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
}
spin_unlock(&block_group->lock);
/*
* Reinitialize the class of struct inode's mapping->invalidate_lock for
* free space inodes to prevent false positives related to locks for normal
* inodes.
*/
lockdep_set_class(&(&inode->i_data)->invalidate_lock,
&btrfs_free_space_inode_key);
ret = __load_free_space_cache(fs_info->tree_root, inode, &tmp_ctl,
path, block_group->start);
btrfs_free_path(path);
@ -1001,7 +1035,13 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
if (ret == 0)
ret = 1;
} else {
/*
* We need to call the _locked variant so we don't try to update
* the discard counters.
*/
spin_lock(&tmp_ctl.tree_lock);
__btrfs_remove_free_space_cache(&tmp_ctl);
spin_unlock(&tmp_ctl.tree_lock);
btrfs_warn(fs_info,
"block group %llu has wrong amount of free space",
block_group->start);
@ -1123,7 +1163,7 @@ update_cache_item(struct btrfs_trans_handle *trans,
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DELALLOC, 0, 0, NULL);
EXTENT_DELALLOC, NULL);
goto fail;
}
leaf = path->nodes[0];
@ -1135,8 +1175,8 @@ update_cache_item(struct btrfs_trans_handle *trans,
if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
found_key.offset != offset) {
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0,
inode->i_size - 1, EXTENT_DELALLOC, 0,
0, NULL);
inode->i_size - 1, EXTENT_DELALLOC,
NULL);
btrfs_release_path(path);
goto fail;
}
@ -1232,7 +1272,7 @@ static int flush_dirty_cache(struct inode *inode)
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
EXTENT_DELALLOC, 0, 0, NULL);
EXTENT_DELALLOC, NULL);
return ret;
}
@ -1252,8 +1292,8 @@ cleanup_write_cache_enospc(struct inode *inode,
struct extent_state **cached_state)
{
io_ctl_drop_pages(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, cached_state);
unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
cached_state);
}
static int __btrfs_wait_cache_io(struct btrfs_root *root,
@ -1378,8 +1418,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
if (ret)
goto out_unlock;
lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
&cached_state);
lock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
&cached_state);
io_ctl_set_generation(io_ctl, trans->transid);
@ -1434,8 +1474,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
io_ctl_drop_pages(io_ctl);
io_ctl_free(io_ctl);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
i_size_read(inode) - 1, &cached_state);
unlock_extent(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
&cached_state);
/*
* at this point the pages are under IO and we're happy,
@ -2860,7 +2900,8 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
if (btrfs_is_zoned(fs_info)) {
btrfs_info(fs_info, "free space %llu active %d",
block_group->zone_capacity - block_group->alloc_offset,
block_group->zone_is_active);
test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
&block_group->runtime_flags));
return;
}
@ -2964,34 +3005,6 @@ static void __btrfs_return_cluster_to_free_space(
btrfs_put_block_group(block_group);
}
static void __btrfs_remove_free_space_cache_locked(
struct btrfs_free_space_ctl *ctl)
{
struct btrfs_free_space *info;
struct rb_node *node;
while ((node = rb_last(&ctl->free_space_offset)) != NULL) {
info = rb_entry(node, struct btrfs_free_space, offset_index);
if (!info->bitmap) {
unlink_free_space(ctl, info, true);
kmem_cache_free(btrfs_free_space_cachep, info);
} else {
free_bitmap(ctl, info);
}
cond_resched_lock(&ctl->tree_lock);
}
}
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl)
{
spin_lock(&ctl->tree_lock);
__btrfs_remove_free_space_cache_locked(ctl);
if (ctl->block_group)
btrfs_discard_update_discardable(ctl->block_group);
spin_unlock(&ctl->tree_lock);
}
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
@ -3009,7 +3022,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group)
cond_resched_lock(&ctl->tree_lock);
}
__btrfs_remove_free_space_cache_locked(ctl);
__btrfs_remove_free_space_cache(ctl);
btrfs_discard_update_discardable(block_group);
spin_unlock(&ctl->tree_lock);
@ -3992,7 +4005,7 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group,
*trimmed = 0;
spin_lock(&block_group->lock);
if (block_group->removed) {
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
return 0;
}
@ -4022,7 +4035,7 @@ int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
*trimmed = 0;
spin_lock(&block_group->lock);
if (block_group->removed) {
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
return 0;
}
@ -4044,7 +4057,7 @@ int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
*trimmed = 0;
spin_lock(&block_group->lock);
if (block_group->removed) {
if (test_bit(BLOCK_GROUP_FLAG_REMOVED, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
return 0;
}

View File

@ -113,7 +113,6 @@ int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
u64 bytenr, u64 size);
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,

View File

@ -1453,8 +1453,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY);
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
caching_ctl->progress = key.objectid;
offset = key.objectid;
while (offset < key.objectid + key.offset) {
bit = free_space_test_bit(block_group, path, offset);
@ -1490,8 +1488,6 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
goto out;
}
caching_ctl->progress = (u64)-1;
ret = 0;
out:
return ret;
@ -1531,8 +1527,6 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
caching_ctl->progress = key.objectid;
total_found += add_new_free_space(block_group, key.objectid,
key.objectid + key.offset);
if (total_found > CACHING_CTL_WAKE_UP) {
@ -1552,8 +1546,6 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
goto out;
}
caching_ctl->progress = (u64)-1;
ret = 0;
out:
return ret;

File diff suppressed because it is too large Load Diff

View File

@ -1218,10 +1218,10 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
/* get the big lock and read metadata off disk */
if (!locked)
lock_extent_bits(io_tree, start, end, &cached);
lock_extent(io_tree, start, end, &cached);
em = defrag_get_extent(BTRFS_I(inode), start, newer_than);
if (!locked)
unlock_extent_cached(io_tree, start, end, &cached);
unlock_extent(io_tree, start, end, &cached);
if (IS_ERR(em))
return NULL;
@ -1333,10 +1333,10 @@ again:
while (1) {
struct btrfs_ordered_extent *ordered;
lock_extent_bits(&inode->io_tree, page_start, page_end, &cached_state);
lock_extent(&inode->io_tree, page_start, page_end, &cached_state);
ordered = btrfs_lookup_ordered_range(inode, page_start, PAGE_SIZE);
unlock_extent_cached(&inode->io_tree, page_start, page_end,
&cached_state);
unlock_extent(&inode->io_tree, page_start, page_end,
&cached_state);
if (!ordered)
break;
@ -1616,7 +1616,7 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
return ret;
clear_extent_bit(&inode->io_tree, start, start + len - 1,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 0, 0, cached_state);
EXTENT_DEFRAG, cached_state);
set_extent_defrag(&inode->io_tree, start, start + len - 1, cached_state);
/* Update the page status */
@ -1666,9 +1666,9 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
wait_on_page_writeback(pages[i]);
/* Lock the pages range */
lock_extent_bits(&inode->io_tree, start_index << PAGE_SHIFT,
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
&cached_state);
lock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
&cached_state);
/*
* Now we have a consistent view about the extent map, re-check
* which range really needs to be defragged.
@ -1694,9 +1694,9 @@ static int defrag_one_range(struct btrfs_inode *inode, u64 start, u32 len,
kfree(entry);
}
unlock_extent:
unlock_extent_cached(&inode->io_tree, start_index << PAGE_SHIFT,
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
&cached_state);
unlock_extent(&inode->io_tree, start_index << PAGE_SHIFT,
(last_index << PAGE_SHIFT) + PAGE_SIZE - 1,
&cached_state);
free_pages:
for (i = 0; i < nr_pages; i++) {
if (pages[i]) {

View File

@ -285,6 +285,31 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
return eb;
}
/*
* Loop around taking references on and locking the root node of the tree in
* nowait mode until we end up with a lock on the root node or returning to
* avoid blocking.
*
* Return: root extent buffer with read lock held or -EAGAIN.
*/
struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
while (1) {
eb = btrfs_root_node(root);
if (!btrfs_try_tree_read_lock(eb)) {
free_extent_buffer(eb);
return ERR_PTR(-EAGAIN);
}
if (eb == root->node)
break;
btrfs_tree_read_unlock(eb);
free_extent_buffer(eb);
}
return eb;
}
/*
* DREW locks
* ==========

View File

@ -94,6 +94,7 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb);
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root);
#ifdef CONFIG_BTRFS_DEBUG
static inline void btrfs_assert_tree_write_locked(struct extent_buffer *eb)

View File

@ -88,6 +88,41 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
return NULL;
}
/*
* Search @root from an entry that starts or comes after @bytenr.
*
* @root: the root to search.
* @bytenr: bytenr to search from.
*
* Return the rb_node that start at or after @bytenr. If there is no entry at
* or after @bytner return NULL.
*/
static inline struct rb_node *rb_simple_search_first(struct rb_root *root,
u64 bytenr)
{
struct rb_node *node = root->rb_node, *ret = NULL;
struct rb_simple_node *entry, *ret_entry = NULL;
while (node) {
entry = rb_entry(node, struct rb_simple_node, rb_node);
if (bytenr < entry->bytenr) {
if (!ret || entry->bytenr < ret_entry->bytenr) {
ret = node;
ret_entry = entry;
}
node = node->rb_left;
} else if (bytenr > entry->bytenr) {
node = node->rb_right;
} else {
return node;
}
}
return ret;
}
static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
struct rb_node *node)
{

View File

@ -524,7 +524,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *node;
bool pending;
bool freespace_inode;
/*
* If this is a free space inode the thread has not acquired the ordered
* extents lockdep map.
*/
freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
/* This is paired with btrfs_add_ordered_extent. */
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
@ -580,6 +588,8 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
}
}
btrfs_lockdep_release(fs_info, btrfs_trans_pending_ordered);
spin_lock(&root->ordered_extent_lock);
list_del_init(&entry->root_extent_list);
root->nr_ordered_extents--;
@ -594,6 +604,8 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
}
spin_unlock(&root->ordered_extent_lock);
wake_up(&entry->wait);
if (!freespace_inode)
btrfs_lockdep_release(fs_info, btrfs_ordered_extent);
}
static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
@ -712,9 +724,16 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
u64 start = entry->file_offset;
u64 end = start + entry->num_bytes - 1;
struct btrfs_inode *inode = BTRFS_I(entry->inode);
bool freespace_inode;
trace_btrfs_ordered_extent_start(inode, entry);
/*
* If this is a free space inode do not take the ordered extents lockdep
* map.
*/
freespace_inode = btrfs_is_free_space_inode(inode);
/*
* pages in the range can be dirty, clean or writeback. We
* start IO on any dirty ones so the wait doesn't stall waiting
@ -723,6 +742,8 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry, int wait)
if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
filemap_fdatawrite_range(inode->vfs_inode.i_mapping, start, end);
if (wait) {
if (!freespace_inode)
btrfs_might_wait_for_event(inode->root->fs_info, btrfs_ordered_extent);
wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
&entry->flags));
}
@ -1022,7 +1043,7 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
cachedp = cached_state;
while (1) {
lock_extent_bits(&inode->io_tree, start, end, cachedp);
lock_extent(&inode->io_tree, start, end, cachedp);
ordered = btrfs_lookup_ordered_range(inode, start,
end - start + 1);
if (!ordered) {
@ -1035,12 +1056,37 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
refcount_dec(&cache->refs);
break;
}
unlock_extent_cached(&inode->io_tree, start, end, cachedp);
unlock_extent(&inode->io_tree, start, end, cachedp);
btrfs_start_ordered_extent(ordered, 1);
btrfs_put_ordered_extent(ordered);
}
}
/*
* Lock the passed range and ensure all pending ordered extents in it are run
* to completion in nowait mode.
*
* Return true if btrfs_lock_ordered_range does not return any extents,
* otherwise false.
*/
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end)
{
struct btrfs_ordered_extent *ordered;
if (!try_lock_extent(&inode->io_tree, start, end))
return false;
ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1);
if (!ordered)
return true;
btrfs_put_ordered_extent(ordered);
unlock_extent(&inode->io_tree, start, end, NULL);
return false;
}
static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
u64 len)
{

View File

@ -160,18 +160,6 @@ struct btrfs_ordered_extent {
struct block_device *bdev;
};
/*
* calculates the total size you need to allocate for an ordered sum
* structure spanning 'bytes' in the file
*/
static inline int btrfs_ordered_sum_size(struct btrfs_fs_info *fs_info,
unsigned long bytes)
{
int num_sectors = (int)DIV_ROUND_UP(bytes, fs_info->sectorsize);
return sizeof(struct btrfs_ordered_sum) + num_sectors * fs_info->csum_size;
}
static inline void
btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
{
@ -218,6 +206,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
u64 end,
struct extent_state **cached_state);
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end);
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
u64 post);
int __init ordered_data_init(void);

View File

@ -270,11 +270,8 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 ino = btrfs_ino(BTRFS_I(inode));
int ret;
ret = iterate_object_props(root, path, ino, inode_prop_iterator, inode);
return ret;
return iterate_object_props(root, path, ino, inode_prop_iterator, inode);
}
static int prop_compression_validate(const struct btrfs_inode *inode,

View File

@ -275,7 +275,7 @@ static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *p
}
/*
* Add relation specified by two qgoup ids.
* Add relation specified by two qgroup ids.
*
* Must be called with qgroup_lock held.
*
@ -333,6 +333,13 @@ int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
}
#endif
static void qgroup_mark_inconsistent(struct btrfs_fs_info *fs_info)
{
fs_info->qgroup_flags |= (BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT |
BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
}
/*
* The full config is read in one go, only called from open_ctree()
* It doesn't use any locking, as at this point we're still single-threaded
@ -401,7 +408,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
}
if (btrfs_qgroup_status_generation(l, ptr) !=
fs_info->generation) {
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
btrfs_err(fs_info,
"qgroup generation mismatch, marked as inconsistent");
}
@ -419,7 +426,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
if ((qgroup && found_key.type == BTRFS_QGROUP_INFO_KEY) ||
(!qgroup && found_key.type == BTRFS_QGROUP_LIMIT_KEY)) {
btrfs_err(fs_info, "inconsistent qgroup config");
flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
}
if (!qgroup) {
qgroup = add_qgroup_rb(fs_info, found_key.offset);
@ -878,7 +885,8 @@ static int update_qgroup_status_item(struct btrfs_trans_handle *trans)
l = path->nodes[0];
slot = path->slots[0];
ptr = btrfs_item_ptr(l, slot, struct btrfs_qgroup_status_item);
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags);
btrfs_set_qgroup_status_flags(l, ptr, fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAGS_MASK);
btrfs_set_qgroup_status_generation(l, ptr, trans->transid);
btrfs_set_qgroup_status_rescan(l, ptr,
fs_info->qgroup_rescan_progress.objectid);
@ -1052,7 +1060,8 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
btrfs_set_qgroup_status_version(leaf, ptr, BTRFS_QGROUP_STATUS_VERSION);
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON |
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags);
btrfs_set_qgroup_status_flags(leaf, ptr, fs_info->qgroup_flags &
BTRFS_QGROUP_STATUS_FLAGS_MASK);
btrfs_set_qgroup_status_rescan(leaf, ptr, 0);
btrfs_mark_buffer_dirty(leaf);
@ -1174,6 +1183,21 @@ out_add_root:
fs_info->qgroup_rescan_running = true;
btrfs_queue_work(fs_info->qgroup_rescan_workers,
&fs_info->qgroup_rescan_work);
} else {
/*
* We have set both BTRFS_FS_QUOTA_ENABLED and
* BTRFS_QGROUP_STATUS_FLAG_ON, so we can only fail with
* -EINPROGRESS. That can happen because someone started the
* rescan worker by calling quota rescan ioctl before we
* attempted to initialize the rescan worker. Failure due to
* quotas disabled in the meanwhile is not possible, because
* we are holding a write lock on fs_info->subvol_sem, which
* is also acquired when disabling quotas.
* Ignore such error, and any other error would need to undo
* everything we did in the transaction we just committed.
*/
ASSERT(ret == -EINPROGRESS);
ret = 0;
}
out_free_path:
@ -1255,6 +1279,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON;
fs_info->qgroup_drop_subtree_thres = BTRFS_MAX_LEVEL;
spin_unlock(&fs_info->qgroup_lock);
btrfs_free_qgroup_config(fs_info);
@ -1717,7 +1742,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
ret = update_qgroup_limit_item(trans, qgroup);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
btrfs_info(fs_info, "unable to update quota limit for %llu",
qgroupid);
}
@ -1790,10 +1815,13 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
*/
ASSERT(trans != NULL);
if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
return 0;
ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root,
true);
if (ret < 0) {
trans->fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(trans->fs_info);
btrfs_warn(trans->fs_info,
"error accounting new delayed refs extent (err code: %d), quota inconsistent",
ret);
@ -2269,7 +2297,7 @@ static int qgroup_trace_subtree_swap(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(dst_path);
if (ret < 0)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
return ret;
}
@ -2280,6 +2308,7 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret = 0;
int level;
u8 drop_subptree_thres;
struct extent_buffer *eb = root_eb;
struct btrfs_path *path = NULL;
@ -2289,6 +2318,23 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
return 0;
spin_lock(&fs_info->qgroup_lock);
drop_subptree_thres = fs_info->qgroup_drop_subtree_thres;
spin_unlock(&fs_info->qgroup_lock);
/*
* This function only gets called for snapshot drop, if we hit a high
* node here, it means we are going to change ownership for quite a lot
* of extents, which will greatly slow down btrfs_commit_transaction().
*
* So here if we find a high tree here, we just skip the accounting and
* mark qgroup inconsistent.
*/
if (root_level >= drop_subptree_thres) {
qgroup_mark_inconsistent(fs_info);
return 0;
}
if (!extent_buffer_uptodate(root_eb)) {
ret = btrfs_read_extent_buffer(root_eb, root_gen, root_level, NULL);
if (ret)
@ -2604,7 +2650,8 @@ int btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans, u64 bytenr,
* If quotas get disabled meanwhile, the resources need to be freed and
* we can't just exit here.
*/
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
goto out_free;
if (new_roots) {
@ -2700,7 +2747,8 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
num_dirty_extents++;
trace_btrfs_qgroup_account_extents(fs_info, record);
if (!ret) {
if (!ret && !(fs_info->qgroup_flags &
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) {
/*
* Old roots should be searched when inserting qgroup
* extent record
@ -2773,12 +2821,10 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
spin_unlock(&fs_info->qgroup_lock);
ret = update_qgroup_info_item(trans, qgroup);
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
ret = update_qgroup_limit_item(trans, qgroup);
if (ret)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
spin_lock(&fs_info->qgroup_lock);
}
if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
@ -2789,7 +2835,7 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
ret = update_qgroup_status_item(trans);
if (ret)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
return ret;
}
@ -2907,7 +2953,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
ret = update_qgroup_limit_item(trans, dstgroup);
if (ret) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
btrfs_info(fs_info,
"unable to update quota limit for %llu",
dstgroup->qgroupid);
@ -3013,7 +3059,7 @@ out:
if (!committing)
mutex_unlock(&fs_info->qgroup_ioctl_lock);
if (need_rescan)
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
return ret;
}
@ -3286,7 +3332,8 @@ static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
{
return btrfs_fs_closing(fs_info) ||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags) ||
fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
}
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
@ -3351,7 +3398,8 @@ out:
}
mutex_lock(&fs_info->qgroup_rescan_lock);
if (!stopped)
if (!stopped ||
fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN)
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
if (trans) {
ret = update_qgroup_status_item(trans);
@ -3362,6 +3410,7 @@ out:
}
}
fs_info->qgroup_rescan_running = false;
fs_info->qgroup_flags &= ~BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN;
complete_all(&fs_info->qgroup_rescan_completion);
mutex_unlock(&fs_info->qgroup_rescan_lock);
@ -3372,6 +3421,8 @@ out:
if (stopped) {
btrfs_info(fs_info, "qgroup scan paused");
} else if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN) {
btrfs_info(fs_info, "qgroup scan cancelled");
} else if (err >= 0) {
btrfs_info(fs_info, "qgroup scan completed%s",
err > 0 ? " (inconsistency flag cleared)" : "");
@ -3434,6 +3485,8 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
memset(&fs_info->qgroup_rescan_progress, 0,
sizeof(fs_info->qgroup_rescan_progress));
fs_info->qgroup_flags &= ~(BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN |
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING);
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
init_completion(&fs_info->qgroup_rescan_completion);
mutex_unlock(&fs_info->qgroup_rescan_lock);
@ -4231,8 +4284,7 @@ out_unlock:
spin_unlock(&blocks->lock);
out:
if (ret < 0)
fs_info->qgroup_flags |=
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
return ret;
}
@ -4319,7 +4371,7 @@ out:
btrfs_err_rl(fs_info,
"failed to account subtree at bytenr %llu: %d",
subvol_eb->start, ret);
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
qgroup_mark_inconsistent(fs_info);
}
return ret;
}

View File

@ -100,6 +100,9 @@
* subtree rescan for them.
*/
#define BTRFS_QGROUP_RUNTIME_FLAG_CANCEL_RESCAN (1UL << 3)
#define BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING (1UL << 4)
/*
* Record a dirty extent, and info qgroup to update quota on it
* TODO: Use kmem cache to alloc it.

View File

@ -275,7 +275,6 @@ static void merge_rbio(struct btrfs_raid_bio *dest,
/* Also inherit the bitmaps from @victim. */
bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
dest->stripe_nsectors);
dest->generic_bio_cnt += victim->generic_bio_cnt;
bio_list_init(&victim->bio_list);
}
@ -814,8 +813,6 @@ static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
struct bio *cur = bio_list_get(&rbio->bio_list);
struct bio *extra;
if (rbio->generic_bio_cnt)
btrfs_bio_counter_sub(rbio->bioc->fs_info, rbio->generic_bio_cnt);
/*
* Clear the data bitmap, as the rbio may be cached for later usage.
* do this before before unlock_stripe() so there will be no new bio
@ -946,6 +943,7 @@ static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
spin_lock_init(&rbio->bio_list_lock);
INIT_LIST_HEAD(&rbio->stripe_cache);
INIT_LIST_HEAD(&rbio->hash_list);
btrfs_get_bioc(bioc);
rbio->bioc = bioc;
rbio->nr_pages = num_pages;
rbio->nr_sectors = num_sectors;
@ -1813,15 +1811,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
rbio = alloc_rbio(fs_info, bioc);
if (IS_ERR(rbio)) {
btrfs_put_bioc(bioc);
ret = PTR_ERR(rbio);
goto out_dec_counter;
goto fail;
}
rbio->operation = BTRFS_RBIO_WRITE;
rbio_add_bio(rbio, bio);
rbio->generic_bio_cnt = 1;
/*
* don't plug on full rbios, just get them out the door
* as quickly as we can
@ -1829,7 +1824,7 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
if (rbio_is_full(rbio)) {
ret = full_stripe_write(rbio);
if (ret)
goto out_dec_counter;
goto fail;
return;
}
@ -1844,13 +1839,12 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
} else {
ret = __raid56_parity_write(rbio);
if (ret)
goto out_dec_counter;
goto fail;
}
return;
out_dec_counter:
btrfs_bio_counter_dec(fs_info);
fail:
bio->bi_status = errno_to_blk_status(ret);
bio_endio(bio);
}
@ -2198,18 +2192,11 @@ cleanup:
* of the drive.
*/
void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
int mirror_num, bool generic_io)
int mirror_num)
{
struct btrfs_fs_info *fs_info = bioc->fs_info;
struct btrfs_raid_bio *rbio;
if (generic_io) {
ASSERT(bioc->mirror_num == mirror_num);
btrfs_bio(bio)->mirror_num = mirror_num;
} else {
btrfs_get_bioc(bioc);
}
rbio = alloc_rbio(fs_info, bioc);
if (IS_ERR(rbio)) {
bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
@ -2225,14 +2212,11 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
"%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bioc has map_type %llu)",
__func__, bio->bi_iter.bi_sector << 9,
(u64)bio->bi_iter.bi_size, bioc->map_type);
kfree(rbio);
__free_raid_bio(rbio);
bio->bi_status = BLK_STS_IOERR;
goto out_end_bio;
}
if (generic_io)
rbio->generic_bio_cnt = 1;
/*
* Loop retry:
* for 'mirror == 2', reconstruct from all other stripes.
@ -2261,8 +2245,6 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
return;
out_end_bio:
btrfs_bio_counter_dec(fs_info);
btrfs_put_bioc(bioc);
bio_endio(bio);
}
@ -2326,13 +2308,6 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
ASSERT(i < rbio->real_stripes);
bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
/*
* We have already increased bio_counter when getting bioc, record it
* so we can free it at rbio_orig_end_io().
*/
rbio->generic_bio_cnt = 1;
return rbio;
}
@ -2772,12 +2747,6 @@ raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc)
return NULL;
}
/*
* When we get bioc, we have already increased bio_counter, record it
* so we can free it at rbio_orig_end_io()
*/
rbio->generic_bio_cnt = 1;
return rbio;
}

View File

@ -89,8 +89,6 @@ struct btrfs_raid_bio {
*/
int bio_list_bytes;
int generic_bio_cnt;
refcount_t refs;
atomic_t stripes_pending;
@ -166,7 +164,7 @@ static inline int nr_data_stripes(const struct map_lookup *map)
struct btrfs_device;
void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
int mirror_num, bool generic_io);
int mirror_num);
void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,

View File

@ -92,7 +92,7 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
clear_extent_bit(&inode->io_tree, file_offset, range_end,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, NULL);
NULL);
ret = btrfs_set_extent_delalloc(inode, file_offset, range_end, 0, NULL);
if (ret)
goto out_unlock;
@ -615,8 +615,8 @@ out:
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
struct inode *inode2, u64 loff2, u64 len)
{
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL);
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL);
}
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
@ -634,8 +634,8 @@ static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
swap(range1_end, range2_end);
}
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end);
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end);
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL);
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL);
btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end);
btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end);

View File

@ -1124,10 +1124,10 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
if (!ret)
continue;
btrfs_drop_extent_cache(BTRFS_I(inode),
key.offset, end, 1);
btrfs_drop_extent_map_range(BTRFS_I(inode),
key.offset, end, true);
unlock_extent(&BTRFS_I(inode)->io_tree,
key.offset, end);
key.offset, end, NULL);
}
}
@ -1566,9 +1566,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,
}
/* the lock_extent waits for read_folio to complete */
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 1);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, true);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
}
return 0;
}
@ -2869,13 +2869,13 @@ static noinline_for_stack int prealloc_file_extent_cluster(
else
end = cluster->end - offset;
lock_extent(&inode->io_tree, start, end);
lock_extent(&inode->io_tree, start, end, NULL);
num_bytes = end + 1 - start;
ret = btrfs_prealloc_file_range(&inode->vfs_inode, 0, start,
num_bytes, num_bytes,
end + 1, &alloc_hint);
cur_offset = end + 1;
unlock_extent(&inode->io_tree, start, end);
unlock_extent(&inode->io_tree, start, end, NULL);
if (ret)
break;
}
@ -2890,7 +2890,6 @@ static noinline_for_stack int prealloc_file_extent_cluster(
static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
u64 start, u64 end, u64 block_start)
{
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_map *em;
int ret = 0;
@ -2904,18 +2903,11 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod
em->block_start = block_start;
set_bit(EXTENT_FLAG_PINNED, &em->flags);
lock_extent(&BTRFS_I(inode)->io_tree, start, end);
while (1) {
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, em, 0);
write_unlock(&em_tree->lock);
if (ret != -EEXIST) {
free_extent_map(em);
break;
}
btrfs_drop_extent_cache(BTRFS_I(inode), start, end, 0);
}
unlock_extent(&BTRFS_I(inode)->io_tree, start, end);
lock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false);
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, NULL);
free_extent_map(em);
return ret;
}
@ -3006,7 +2998,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
goto release_page;
/* Mark the range delalloc and dirty for later writeback */
lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
lock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL);
ret = btrfs_set_extent_delalloc(BTRFS_I(inode), clamped_start,
clamped_end, 0, NULL);
if (ret) {
@ -3039,7 +3031,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
boundary_start, boundary_end,
EXTENT_BOUNDARY);
}
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end);
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end, NULL);
btrfs_delalloc_release_extents(BTRFS_I(inode), clamped_len);
cur += clamped_len;
@ -4339,7 +4331,7 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
disk_bytenr = file_pos + inode->index_cnt;
csum_root = btrfs_csum_root(fs_info, disk_bytenr);
ret = btrfs_lookup_csums_range(csum_root, disk_bytenr,
disk_bytenr + len - 1, &list, 0);
disk_bytenr + len - 1, &list, 0, false);
if (ret)
goto out;

View File

@ -337,7 +337,6 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
struct extent_buffer *leaf;
struct btrfs_key key;
unsigned long ptr;
int err = 0;
int ret;
path = btrfs_alloc_path();
@ -350,7 +349,6 @@ int btrfs_del_root_ref(struct btrfs_trans_handle *trans, u64 root_id,
again:
ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
if (ret < 0) {
err = ret;
goto out;
} else if (ret == 0) {
leaf = path->nodes[0];
@ -360,18 +358,18 @@ again:
if ((btrfs_root_ref_dirid(leaf, ref) != dirid) ||
(btrfs_root_ref_name_len(leaf, ref) != name_len) ||
memcmp_extent_buffer(leaf, name, ptr, name_len)) {
err = -ENOENT;
ret = -ENOENT;
goto out;
}
*sequence = btrfs_root_ref_sequence(leaf, ref);
ret = btrfs_del_item(trans, tree_root, path);
if (ret) {
err = ret;
if (ret)
goto out;
}
} else
err = -ENOENT;
} else {
ret = -ENOENT;
goto out;
}
if (key.type == BTRFS_ROOT_BACKREF_KEY) {
btrfs_release_path(path);
@ -383,7 +381,7 @@ again:
out:
btrfs_free_path(path);
return err;
return ret;
}
/*

File diff suppressed because it is too large Load Diff

View File

@ -15,6 +15,7 @@
#include <linux/string.h>
#include <linux/compat.h>
#include <linux/crc32c.h>
#include <linux/fsverity.h>
#include "send.h"
#include "ctree.h"
@ -127,6 +128,8 @@ struct send_ctx {
bool cur_inode_new_gen;
bool cur_inode_deleted;
bool ignore_cur_inode;
bool cur_inode_needs_verity;
void *verity_descriptor;
u64 send_progress;
@ -624,6 +627,7 @@ static int tlv_put(struct send_ctx *sctx, u16 attr, const void *data, int len)
return tlv_put(sctx, attr, &__tmp, sizeof(__tmp)); \
}
TLV_PUT_DEFINE_INT(8)
TLV_PUT_DEFINE_INT(32)
TLV_PUT_DEFINE_INT(64)
@ -842,17 +846,32 @@ out:
return ret;
}
struct btrfs_inode_info {
u64 size;
u64 gen;
u64 mode;
u64 uid;
u64 gid;
u64 rdev;
u64 fileattr;
u64 nlink;
};
/*
* Helper function to retrieve some fields from an inode item.
*/
static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
u64 *gid, u64 *rdev, u64 *fileattr)
static int get_inode_info(struct btrfs_root *root, u64 ino,
struct btrfs_inode_info *info)
{
int ret;
struct btrfs_path *path;
struct btrfs_inode_item *ii;
struct btrfs_key key;
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
key.objectid = ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
@ -860,47 +879,43 @@ static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
if (ret) {
if (ret > 0)
ret = -ENOENT;
return ret;
goto out;
}
if (!info)
goto out;
ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
if (size)
*size = btrfs_inode_size(path->nodes[0], ii);
if (gen)
*gen = btrfs_inode_generation(path->nodes[0], ii);
if (mode)
*mode = btrfs_inode_mode(path->nodes[0], ii);
if (uid)
*uid = btrfs_inode_uid(path->nodes[0], ii);
if (gid)
*gid = btrfs_inode_gid(path->nodes[0], ii);
if (rdev)
*rdev = btrfs_inode_rdev(path->nodes[0], ii);
info->size = btrfs_inode_size(path->nodes[0], ii);
info->gen = btrfs_inode_generation(path->nodes[0], ii);
info->mode = btrfs_inode_mode(path->nodes[0], ii);
info->uid = btrfs_inode_uid(path->nodes[0], ii);
info->gid = btrfs_inode_gid(path->nodes[0], ii);
info->rdev = btrfs_inode_rdev(path->nodes[0], ii);
info->nlink = btrfs_inode_nlink(path->nodes[0], ii);
/*
* Transfer the unchanged u64 value of btrfs_inode_item::flags, that's
* otherwise logically split to 32/32 parts.
*/
if (fileattr)
*fileattr = btrfs_inode_flags(path->nodes[0], ii);
info->fileattr = btrfs_inode_flags(path->nodes[0], ii);
out:
btrfs_free_path(path);
return ret;
}
static int get_inode_info(struct btrfs_root *root,
u64 ino, u64 *size, u64 *gen,
u64 *mode, u64 *uid, u64 *gid,
u64 *rdev, u64 *fileattr)
static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
{
struct btrfs_path *path;
int ret;
struct btrfs_inode_info info;
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
rdev, fileattr);
btrfs_free_path(path);
if (!gen)
return -EPERM;
ret = get_inode_info(root, ino, &info);
if (!ret)
*gen = info.gen;
return ret;
}
@ -1643,21 +1658,22 @@ static int get_cur_inode_state(struct send_ctx *sctx, u64 ino, u64 gen)
int right_ret;
u64 left_gen;
u64 right_gen;
struct btrfs_inode_info info;
ret = get_inode_info(sctx->send_root, ino, NULL, &left_gen, NULL, NULL,
NULL, NULL, NULL);
ret = get_inode_info(sctx->send_root, ino, &info);
if (ret < 0 && ret != -ENOENT)
goto out;
left_ret = ret;
left_ret = (info.nlink == 0) ? -ENOENT : ret;
left_gen = info.gen;
if (!sctx->parent_root) {
right_ret = -ENOENT;
} else {
ret = get_inode_info(sctx->parent_root, ino, NULL, &right_gen,
NULL, NULL, NULL, NULL, NULL);
ret = get_inode_info(sctx->parent_root, ino, &info);
if (ret < 0 && ret != -ENOENT)
goto out;
right_ret = ret;
right_ret = (info.nlink == 0) ? -ENOENT : ret;
right_gen = info.gen;
}
if (!left_ret && !right_ret) {
@ -1816,8 +1832,7 @@ static int get_first_ref(struct btrfs_root *root, u64 ino,
btrfs_release_path(path);
if (dir_gen) {
ret = get_inode_info(root, parent_dir, NULL, dir_gen, NULL,
NULL, NULL, NULL, NULL);
ret = get_inode_gen(root, parent_dir, dir_gen);
if (ret < 0)
goto out;
}
@ -1874,6 +1889,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
int ret = 0;
u64 gen;
u64 other_inode = 0;
struct btrfs_inode_info info;
if (!sctx->parent_root)
goto out;
@ -1888,8 +1904,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
* and we can just unlink this entry.
*/
if (sctx->parent_root && dir != BTRFS_FIRST_FREE_OBJECTID) {
ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->parent_root, dir, &gen);
if (ret < 0 && ret != -ENOENT)
goto out;
if (ret) {
@ -1916,13 +1931,14 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
*/
if (other_inode > sctx->send_progress ||
is_waiting_for_move(sctx, other_inode)) {
ret = get_inode_info(sctx->parent_root, other_inode, NULL,
who_gen, who_mode, NULL, NULL, NULL, NULL);
ret = get_inode_info(sctx->parent_root, other_inode, &info);
if (ret < 0)
goto out;
ret = 1;
*who_ino = other_inode;
*who_gen = info.gen;
*who_mode = info.mode;
} else {
ret = 0;
}
@ -1955,8 +1971,7 @@ static int did_overwrite_ref(struct send_ctx *sctx,
goto out;
if (dir != BTRFS_FIRST_FREE_OBJECTID) {
ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL,
NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->send_root, dir, &gen);
if (ret < 0 && ret != -ENOENT)
goto out;
if (ret) {
@ -1978,8 +1993,7 @@ static int did_overwrite_ref(struct send_ctx *sctx,
goto out;
}
ret = get_inode_info(sctx->send_root, ow_inode, NULL, &gen, NULL, NULL,
NULL, NULL, NULL);
ret = get_inode_gen(sctx->send_root, ow_inode, &gen);
if (ret < 0)
goto out;
@ -2645,6 +2659,7 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
int ret = 0;
struct fs_path *p;
int cmd;
struct btrfs_inode_info info;
u64 gen;
u64 mode;
u64 rdev;
@ -2656,10 +2671,12 @@ static int send_create_inode(struct send_ctx *sctx, u64 ino)
return -ENOMEM;
if (ino != sctx->cur_ino) {
ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode,
NULL, NULL, &rdev, NULL);
ret = get_inode_info(sctx->send_root, ino, &info);
if (ret < 0)
goto out;
gen = info.gen;
mode = info.mode;
rdev = info.rdev;
} else {
gen = sctx->cur_inode_gen;
mode = sctx->cur_inode_mode;
@ -3359,8 +3376,7 @@ finish:
/*
* The parent inode might have been deleted in the send snapshot
*/
ret = get_inode_info(sctx->send_root, cur->dir, NULL,
NULL, NULL, NULL, NULL, NULL, NULL);
ret = get_inode_info(sctx->send_root, cur->dir, NULL);
if (ret == -ENOENT) {
ret = 0;
continue;
@ -3534,12 +3550,10 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
goto out;
}
ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
&left_gen, NULL, NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->parent_root, di_key.objectid, &left_gen);
if (ret < 0)
goto out;
ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
&right_gen, NULL, NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->send_root, di_key.objectid, &right_gen);
if (ret < 0) {
if (ret == -ENOENT)
ret = 0;
@ -3669,8 +3683,7 @@ static int is_ancestor(struct btrfs_root *root,
cur_offset = item_size;
}
ret = get_inode_info(root, parent, NULL, &parent_gen,
NULL, NULL, NULL, NULL, NULL);
ret = get_inode_gen(root, parent, &parent_gen);
if (ret < 0)
goto out;
ret = check_ino_in_path(root, ino1, ino1_gen,
@ -3760,9 +3773,7 @@ static int wait_for_parent_move(struct send_ctx *sctx,
memcmp(path_before->start, path_after->start, len1))) {
u64 parent_ino_gen;
ret = get_inode_info(sctx->parent_root, ino, NULL,
&parent_ino_gen, NULL, NULL, NULL,
NULL, NULL);
ret = get_inode_gen(sctx->parent_root, ino, &parent_ino_gen);
if (ret < 0)
goto out;
if (ino_gen == parent_ino_gen) {
@ -4441,8 +4452,7 @@ static int record_new_ref_if_needed(int num, u64 dir, int index,
struct recorded_ref *ref;
u64 dir_gen;
ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->send_root, dir, &dir_gen);
if (ret < 0)
goto out;
@ -4472,8 +4482,7 @@ static int record_deleted_ref_if_needed(int num, u64 dir, int index,
struct recorded_ref *ref;
u64 dir_gen;
ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->parent_root, dir, &dir_gen);
if (ret < 0)
goto out;
@ -4886,6 +4895,84 @@ static int process_all_new_xattrs(struct send_ctx *sctx)
return ret;
}
static int send_verity(struct send_ctx *sctx, struct fs_path *path,
struct fsverity_descriptor *desc)
{
int ret;
ret = begin_cmd(sctx, BTRFS_SEND_C_ENABLE_VERITY);
if (ret < 0)
goto out;
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, path);
TLV_PUT_U8(sctx, BTRFS_SEND_A_VERITY_ALGORITHM,
le8_to_cpu(desc->hash_algorithm));
TLV_PUT_U32(sctx, BTRFS_SEND_A_VERITY_BLOCK_SIZE,
1U << le8_to_cpu(desc->log_blocksize));
TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SALT_DATA, desc->salt,
le8_to_cpu(desc->salt_size));
TLV_PUT(sctx, BTRFS_SEND_A_VERITY_SIG_DATA, desc->signature,
le32_to_cpu(desc->sig_size));
ret = send_cmd(sctx);
tlv_put_failure:
out:
return ret;
}
static int process_verity(struct send_ctx *sctx)
{
int ret = 0;
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
struct inode *inode;
struct fs_path *p;
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root);
if (IS_ERR(inode))
return PTR_ERR(inode);
ret = btrfs_get_verity_descriptor(inode, NULL, 0);
if (ret < 0)
goto iput;
if (ret > FS_VERITY_MAX_DESCRIPTOR_SIZE) {
ret = -EMSGSIZE;
goto iput;
}
if (!sctx->verity_descriptor) {
sctx->verity_descriptor = kvmalloc(FS_VERITY_MAX_DESCRIPTOR_SIZE,
GFP_KERNEL);
if (!sctx->verity_descriptor) {
ret = -ENOMEM;
goto iput;
}
}
ret = btrfs_get_verity_descriptor(inode, sctx->verity_descriptor, ret);
if (ret < 0)
goto iput;
p = fs_path_alloc();
if (!p) {
ret = -ENOMEM;
goto iput;
}
ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
if (ret < 0)
goto free_path;
ret = send_verity(sctx, p, sctx->verity_descriptor);
if (ret < 0)
goto free_path;
free_path:
fs_path_free(p);
iput:
iput(inode);
return ret;
}
static inline u64 max_send_read_size(const struct send_ctx *sctx)
{
return sctx->send_max_size - SZ_16K;
@ -5056,8 +5143,7 @@ static int send_clone(struct send_ctx *sctx,
TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
if (clone_root->root == sctx->send_root) {
ret = get_inode_info(sctx->send_root, clone_root->ino, NULL,
&gen, NULL, NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->send_root, clone_root->ino, &gen);
if (ret < 0)
goto out;
ret = get_cur_path(sctx, clone_root->ino, gen, p);
@ -5536,6 +5622,7 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
struct btrfs_path *path;
struct btrfs_key key;
int ret;
struct btrfs_inode_info info;
u64 clone_src_i_size = 0;
/*
@ -5565,12 +5652,11 @@ static int clone_range(struct send_ctx *sctx, struct btrfs_path *dst_path,
* There are inodes that have extents that lie behind its i_size. Don't
* accept clones from these extents.
*/
ret = __get_inode_info(clone_root->root, path, clone_root->ino,
&clone_src_i_size, NULL, NULL, NULL, NULL, NULL,
NULL);
ret = get_inode_info(clone_root->root, clone_root->ino, &info);
btrfs_release_path(path);
if (ret < 0)
goto out;
clone_src_i_size = info.size;
/*
* We can't send a clone operation for the entire range if we find
@ -6259,6 +6345,7 @@ out:
static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
{
int ret = 0;
struct btrfs_inode_info info;
u64 left_mode;
u64 left_uid;
u64 left_gid;
@ -6301,11 +6388,13 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
goto out;
if (!at_end && sctx->cmp_key->objectid == sctx->cur_ino)
goto out;
ret = get_inode_info(sctx->send_root, sctx->cur_ino, NULL, NULL,
&left_mode, &left_uid, &left_gid, NULL, &left_fileattr);
ret = get_inode_info(sctx->send_root, sctx->cur_ino, &info);
if (ret < 0)
goto out;
left_mode = info.mode;
left_uid = info.uid;
left_gid = info.gid;
left_fileattr = info.fileattr;
if (!sctx->parent_root || sctx->cur_inode_new) {
need_chown = 1;
@ -6316,11 +6405,14 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
} else {
u64 old_size;
ret = get_inode_info(sctx->parent_root, sctx->cur_ino,
&old_size, NULL, &right_mode, &right_uid,
&right_gid, NULL, &right_fileattr);
ret = get_inode_info(sctx->parent_root, sctx->cur_ino, &info);
if (ret < 0)
goto out;
old_size = info.size;
right_mode = info.mode;
right_uid = info.uid;
right_gid = info.gid;
right_fileattr = info.fileattr;
if (left_uid != right_uid || left_gid != right_gid)
need_chown = 1;
@ -6377,6 +6469,11 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
if (ret < 0)
goto out;
}
if (sctx->cur_inode_needs_verity) {
ret = process_verity(sctx);
if (ret < 0)
goto out;
}
ret = send_capabilities(sctx);
if (ret < 0)
@ -6407,86 +6504,6 @@ out:
return ret;
}
struct parent_paths_ctx {
struct list_head *refs;
struct send_ctx *sctx;
};
static int record_parent_ref(int num, u64 dir, int index, struct fs_path *name,
void *ctx)
{
struct parent_paths_ctx *ppctx = ctx;
/*
* Pass 0 as the generation for the directory, we don't care about it
* here as we have no new references to add, we just want to delete all
* references for an inode.
*/
return record_ref_in_tree(&ppctx->sctx->rbtree_deleted_refs, ppctx->refs,
name, dir, 0, ppctx->sctx);
}
/*
* Issue unlink operations for all paths of the current inode found in the
* parent snapshot.
*/
static int btrfs_unlink_all_paths(struct send_ctx *sctx)
{
LIST_HEAD(deleted_refs);
struct btrfs_path *path;
struct btrfs_root *root = sctx->parent_root;
struct btrfs_key key;
struct btrfs_key found_key;
struct parent_paths_ctx ctx;
int iter_ret = 0;
int ret;
path = alloc_path_for_send();
if (!path)
return -ENOMEM;
key.objectid = sctx->cur_ino;
key.type = BTRFS_INODE_REF_KEY;
key.offset = 0;
ctx.refs = &deleted_refs;
ctx.sctx = sctx;
btrfs_for_each_slot(root, &key, &found_key, path, iter_ret) {
if (found_key.objectid != key.objectid)
break;
if (found_key.type != key.type &&
found_key.type != BTRFS_INODE_EXTREF_KEY)
break;
ret = iterate_inode_ref(root, path, &found_key, 1,
record_parent_ref, &ctx);
if (ret < 0)
goto out;
}
/* Catch error found during iteration */
if (iter_ret < 0) {
ret = iter_ret;
goto out;
}
while (!list_empty(&deleted_refs)) {
struct recorded_ref *ref;
ref = list_first_entry(&deleted_refs, struct recorded_ref, list);
ret = send_unlink(sctx, ref->full_path);
if (ret < 0)
goto out;
recorded_ref_free(ref);
}
ret = 0;
out:
btrfs_free_path(path);
if (ret)
__free_recorded_refs(&deleted_refs);
return ret;
}
static void close_current_inode(struct send_ctx *sctx)
{
u64 i_size;
@ -6577,25 +6594,36 @@ static int changed_inode(struct send_ctx *sctx,
* file descriptor against it or turning a RO snapshot into RW mode,
* keep an open file descriptor against a file, delete it and then
* turn the snapshot back to RO mode before using it for a send
* operation. So if we find such cases, ignore the inode and all its
* items completely if it's a new inode, or if it's a changed inode
* make sure all its previous paths (from the parent snapshot) are all
* unlinked and all other the inode items are ignored.
* operation. The former is what the receiver operation does.
* Therefore, if we want to send these snapshots soon after they're
* received, we need to handle orphan inodes as well. Moreover, orphans
* can appear not only in the send snapshot but also in the parent
* snapshot. Here are several cases:
*
* Case 1: BTRFS_COMPARE_TREE_NEW
* | send snapshot | action
* --------------------------------
* nlink | 0 | ignore
*
* Case 2: BTRFS_COMPARE_TREE_DELETED
* | parent snapshot | action
* ----------------------------------
* nlink | 0 | as usual
* Note: No unlinks will be sent because there're no paths for it.
*
* Case 3: BTRFS_COMPARE_TREE_CHANGED
* | | parent snapshot | send snapshot | action
* -----------------------------------------------------------------------
* subcase 1 | nlink | 0 | 0 | ignore
* subcase 2 | nlink | >0 | 0 | new_gen(deletion)
* subcase 3 | nlink | 0 | >0 | new_gen(creation)
*
*/
if (result == BTRFS_COMPARE_TREE_NEW ||
result == BTRFS_COMPARE_TREE_CHANGED) {
u32 nlinks;
nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
if (nlinks == 0) {
if (result == BTRFS_COMPARE_TREE_NEW) {
if (btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii) == 0) {
sctx->ignore_cur_inode = true;
if (result == BTRFS_COMPARE_TREE_CHANGED)
ret = btrfs_unlink_all_paths(sctx);
goto out;
}
}
if (result == BTRFS_COMPARE_TREE_NEW) {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = true;
sctx->cur_inode_deleted = false;
@ -6616,6 +6644,16 @@ static int changed_inode(struct send_ctx *sctx,
sctx->cur_inode_mode = btrfs_inode_mode(
sctx->right_path->nodes[0], right_ii);
} else if (result == BTRFS_COMPARE_TREE_CHANGED) {
u32 new_nlinks, old_nlinks;
new_nlinks = btrfs_inode_nlink(sctx->left_path->nodes[0], left_ii);
old_nlinks = btrfs_inode_nlink(sctx->right_path->nodes[0], right_ii);
if (new_nlinks == 0 && old_nlinks == 0) {
sctx->ignore_cur_inode = true;
goto out;
} else if (new_nlinks == 0 || old_nlinks == 0) {
sctx->cur_inode_new_gen = 1;
}
/*
* We need to do some special handling in case the inode was
* reported as changed with a changed generation number. This
@ -6642,38 +6680,44 @@ static int changed_inode(struct send_ctx *sctx,
/*
* Now process the inode as if it was new.
*/
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = true;
sctx->cur_inode_deleted = false;
sctx->cur_inode_size = btrfs_inode_size(
sctx->left_path->nodes[0], left_ii);
sctx->cur_inode_mode = btrfs_inode_mode(
sctx->left_path->nodes[0], left_ii);
sctx->cur_inode_rdev = btrfs_inode_rdev(
sctx->left_path->nodes[0], left_ii);
ret = send_create_inode_if_needed(sctx);
if (ret < 0)
goto out;
if (new_nlinks > 0) {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = true;
sctx->cur_inode_deleted = false;
sctx->cur_inode_size = btrfs_inode_size(
sctx->left_path->nodes[0],
left_ii);
sctx->cur_inode_mode = btrfs_inode_mode(
sctx->left_path->nodes[0],
left_ii);
sctx->cur_inode_rdev = btrfs_inode_rdev(
sctx->left_path->nodes[0],
left_ii);
ret = send_create_inode_if_needed(sctx);
if (ret < 0)
goto out;
ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
if (ret < 0)
goto out;
/*
* Advance send_progress now as we did not get into
* process_recorded_refs_if_needed in the new_gen case.
*/
sctx->send_progress = sctx->cur_ino + 1;
ret = process_all_refs(sctx, BTRFS_COMPARE_TREE_NEW);
if (ret < 0)
goto out;
/*
* Advance send_progress now as we did not get
* into process_recorded_refs_if_needed in the
* new_gen case.
*/
sctx->send_progress = sctx->cur_ino + 1;
/*
* Now process all extents and xattrs of the inode as if
* they were all new.
*/
ret = process_all_extents(sctx);
if (ret < 0)
goto out;
ret = process_all_new_xattrs(sctx);
if (ret < 0)
goto out;
/*
* Now process all extents and xattrs of the
* inode as if they were all new.
*/
ret = process_all_extents(sctx);
if (ret < 0)
goto out;
ret = process_all_new_xattrs(sctx);
if (ret < 0)
goto out;
}
} else {
sctx->cur_inode_gen = left_gen;
sctx->cur_inode_new = false;
@ -6785,18 +6829,27 @@ static int changed_extent(struct send_ctx *sctx,
return ret;
}
static int changed_verity(struct send_ctx *sctx, enum btrfs_compare_tree_result result)
{
int ret = 0;
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result == BTRFS_COMPARE_TREE_NEW)
sctx->cur_inode_needs_verity = true;
}
return ret;
}
static int dir_changed(struct send_ctx *sctx, u64 dir)
{
u64 orig_gen, new_gen;
int ret;
ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
NULL, NULL, NULL);
ret = get_inode_gen(sctx->send_root, dir, &new_gen);
if (ret)
return ret;
ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
NULL, NULL, NULL, NULL);
ret = get_inode_gen(sctx->parent_root, dir, &orig_gen);
if (ret)
return ret;
@ -6939,6 +6992,9 @@ static int changed_cb(struct btrfs_path *left_path,
ret = changed_xattr(sctx, result);
else if (key->type == BTRFS_EXTENT_DATA_KEY)
ret = changed_extent(sctx, result);
else if (key->type == BTRFS_VERITY_DESC_ITEM_KEY &&
key->offset == 0)
ret = changed_verity(sctx, result);
}
out:
@ -8036,6 +8092,7 @@ out:
kvfree(sctx->clone_roots);
kfree(sctx->send_buf_pages);
kvfree(sctx->send_buf);
kvfree(sctx->verity_descriptor);
name_cache_free(sctx);

View File

@ -92,8 +92,11 @@ enum btrfs_send_cmd {
BTRFS_SEND_C_ENCODED_WRITE = 25,
BTRFS_SEND_C_MAX_V2 = 25,
/* Version 3 */
BTRFS_SEND_C_ENABLE_VERITY = 26,
BTRFS_SEND_C_MAX_V3 = 26,
/* End */
BTRFS_SEND_C_MAX = 25,
BTRFS_SEND_C_MAX = 26,
};
/* attributes in send stream */
@ -160,8 +163,14 @@ enum {
BTRFS_SEND_A_ENCRYPTION = 31,
BTRFS_SEND_A_MAX_V2 = 31,
/* End */
BTRFS_SEND_A_MAX = 31,
/* Version 3 */
BTRFS_SEND_A_VERITY_ALGORITHM = 32,
BTRFS_SEND_A_VERITY_BLOCK_SIZE = 33,
BTRFS_SEND_A_VERITY_SALT_DATA = 34,
BTRFS_SEND_A_VERITY_SIG_DATA = 35,
BTRFS_SEND_A_MAX_V3 = 35,
__BTRFS_SEND_A_MAX = 35,
};
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);

View File

@ -293,32 +293,36 @@ out:
return ret;
}
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
u64 bytes_readonly, u64 bytes_zone_unusable,
bool active, struct btrfs_space_info **space_info)
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
struct btrfs_block_group *block_group)
{
struct btrfs_space_info *found;
int factor;
int factor, index;
factor = btrfs_bg_type_to_factor(flags);
factor = btrfs_bg_type_to_factor(block_group->flags);
found = btrfs_find_space_info(info, flags);
found = btrfs_find_space_info(info, block_group->flags);
ASSERT(found);
spin_lock(&found->lock);
found->total_bytes += total_bytes;
if (active)
found->active_total_bytes += total_bytes;
found->disk_total += total_bytes * factor;
found->bytes_used += bytes_used;
found->disk_used += bytes_used * factor;
found->bytes_readonly += bytes_readonly;
found->bytes_zone_unusable += bytes_zone_unusable;
if (total_bytes > 0)
found->total_bytes += block_group->length;
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
found->active_total_bytes += block_group->length;
found->disk_total += block_group->length * factor;
found->bytes_used += block_group->used;
found->disk_used += block_group->used * factor;
found->bytes_readonly += block_group->bytes_super;
found->bytes_zone_unusable += block_group->zone_unusable;
if (block_group->length > 0)
found->full = 0;
btrfs_try_granting_tickets(info, found);
spin_unlock(&found->lock);
*space_info = found;
block_group->space_info = found;
index = btrfs_bg_flags_to_raid_index(block_group->flags);
down_write(&found->groups_sem);
list_add_tail(&block_group->list, &found->block_groups[index]);
up_write(&found->groups_sem);
}
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
@ -472,28 +476,47 @@ do { \
spin_unlock(&__rsv->lock); \
} while (0)
static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info)
static const char *space_info_flag_to_str(const struct btrfs_space_info *space_info)
{
lockdep_assert_held(&info->lock);
/* The free space could be negative in case of overcommit */
btrfs_info(fs_info, "space_info %llu has %lld free, is %sfull",
info->flags,
(s64)(info->total_bytes - btrfs_space_info_used(info, true)),
info->full ? "" : "not ");
btrfs_info(fs_info,
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
info->total_bytes, info->bytes_used, info->bytes_pinned,
info->bytes_reserved, info->bytes_may_use,
info->bytes_readonly, info->bytes_zone_unusable);
switch (space_info->flags) {
case BTRFS_BLOCK_GROUP_SYSTEM:
return "SYSTEM";
case BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA:
return "DATA+METADATA";
case BTRFS_BLOCK_GROUP_DATA:
return "DATA";
case BTRFS_BLOCK_GROUP_METADATA:
return "METADATA";
default:
return "UNKNOWN";
}
}
static void dump_global_block_rsv(struct btrfs_fs_info *fs_info)
{
DUMP_BLOCK_RSV(fs_info, global_block_rsv);
DUMP_BLOCK_RSV(fs_info, trans_block_rsv);
DUMP_BLOCK_RSV(fs_info, chunk_block_rsv);
DUMP_BLOCK_RSV(fs_info, delayed_block_rsv);
DUMP_BLOCK_RSV(fs_info, delayed_refs_rsv);
}
static void __btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *info)
{
const char *flag_str = space_info_flag_to_str(info);
lockdep_assert_held(&info->lock);
/* The free space could be negative in case of overcommit */
btrfs_info(fs_info, "space_info %s has %lld free, is %sfull",
flag_str,
(s64)(info->total_bytes - btrfs_space_info_used(info, true)),
info->full ? "" : "not ");
btrfs_info(fs_info,
"space_info total=%llu, used=%llu, pinned=%llu, reserved=%llu, may_use=%llu, readonly=%llu zone_unusable=%llu",
info->total_bytes, info->bytes_used, info->bytes_pinned,
info->bytes_reserved, info->bytes_may_use,
info->bytes_readonly, info->bytes_zone_unusable);
}
void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
@ -505,6 +528,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
spin_lock(&info->lock);
__btrfs_dump_space_info(fs_info, info);
dump_global_block_rsv(fs_info);
spin_unlock(&info->lock);
if (!dump_block_groups)
@ -1662,7 +1686,6 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
&space_info->priority_tickets);
}
} else if (!ret && space_info->flags & BTRFS_BLOCK_GROUP_METADATA) {
used += orig_bytes;
/*
* We will do the space reservation dance during log replay,
* which means we won't have fs_info->fs_root set, so don't do
@ -1737,7 +1760,8 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
int ret;
ASSERT(flush == BTRFS_RESERVE_FLUSH_DATA ||
flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE);
flush == BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE ||
flush == BTRFS_RESERVE_NO_FLUSH);
ASSERT(!current->journal_info || flush != BTRFS_RESERVE_FLUSH_DATA);
ret = __reserve_bytes(fs_info, data_sinfo, bytes, flush);
@ -1749,3 +1773,17 @@ int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
}
return ret;
}
/* Dump all the space infos when we abort a transaction due to ENOSPC. */
__cold void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info)
{
struct btrfs_space_info *space_info;
btrfs_info(fs_info, "dumping space info:");
list_for_each_entry(space_info, &fs_info->space_info, list) {
spin_lock(&space_info->lock);
__btrfs_dump_space_info(fs_info, space_info);
spin_unlock(&space_info->lock);
}
dump_global_block_rsv(fs_info);
}

View File

@ -123,10 +123,8 @@ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
u64 total_bytes, u64 bytes_used,
u64 bytes_readonly, u64 bytes_zone_unusable,
bool active, struct btrfs_space_info **space_info);
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
struct btrfs_block_group *block_group);
void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
u64 chunk_size);
struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
@ -159,4 +157,7 @@ static inline void btrfs_space_info_free_bytes_may_use(
}
int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes,
enum btrfs_reserve_flush_enum flush);
void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
#endif /* BTRFS_SPACE_INFO_H */

View File

@ -346,12 +346,14 @@ void __cold btrfs_err_32bit_limit(struct btrfs_fs_info *fs_info)
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
const char *function,
unsigned int line, int errno)
unsigned int line, int errno, bool first_hit)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
WRITE_ONCE(trans->aborted, errno);
WRITE_ONCE(trans->transaction->aborted, errno);
if (first_hit && errno == -ENOSPC)
btrfs_dump_space_info_for_trans_abort(fs_info);
/* Wake up anybody who may be waiting on this transaction */
wake_up(&fs_info->transaction_wait);
wake_up(&fs_info->transaction_blocked_wait);
@ -626,6 +628,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
int saved_compress_level;
bool saved_compress_force;
int no_compress = 0;
const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state);
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE))
btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
@ -1137,10 +1140,12 @@ out:
}
if (!ret)
ret = btrfs_check_mountopts_zoned(info);
if (!ret && btrfs_test_opt(info, SPACE_CACHE))
btrfs_info(info, "disk space caching is enabled");
if (!ret && btrfs_test_opt(info, FREE_SPACE_TREE))
btrfs_info(info, "using free space tree");
if (!ret && !remounting) {
if (btrfs_test_opt(info, SPACE_CACHE))
btrfs_info(info, "disk space caching is enabled");
if (btrfs_test_opt(info, FREE_SPACE_TREE))
btrfs_info(info, "using free space tree");
}
return ret;
}
@ -2009,14 +2014,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
if (ret)
goto restore;
/* V1 cache is not supported for subpage mount. */
if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
btrfs_warn(fs_info,
"v1 space cache is not supported for page size %lu with sectorsize %u",
PAGE_SIZE, fs_info->sectorsize);
ret = -EINVAL;
ret = btrfs_check_features(fs_info, sb);
if (ret < 0)
goto restore;
}
btrfs_remount_begin(fs_info, old_opts, *flags);
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
@ -2550,11 +2551,71 @@ static int btrfs_freeze(struct super_block *sb)
return btrfs_commit_transaction(trans);
}
static int check_dev_super(struct btrfs_device *dev)
{
struct btrfs_fs_info *fs_info = dev->fs_info;
struct btrfs_super_block *sb;
int ret = 0;
/* This should be called with fs still frozen. */
ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags));
/* Missing dev, no need to check. */
if (!dev->bdev)
return 0;
/* Only need to check the primary super block. */
sb = btrfs_read_dev_one_super(dev->bdev, 0, true);
if (IS_ERR(sb))
return PTR_ERR(sb);
/* Btrfs_validate_super() includes fsid check against super->fsid. */
ret = btrfs_validate_super(fs_info, sb, 0);
if (ret < 0)
goto out;
if (btrfs_super_generation(sb) != fs_info->last_trans_committed) {
btrfs_err(fs_info, "transid mismatch, has %llu expect %llu",
btrfs_super_generation(sb),
fs_info->last_trans_committed);
ret = -EUCLEAN;
goto out;
}
out:
btrfs_release_disk_super(sb);
return ret;
}
static int btrfs_unfreeze(struct super_block *sb)
{
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
struct btrfs_device *device;
int ret = 0;
/*
* Make sure the fs is not changed by accident (like hibernation then
* modified by other OS).
* If we found anything wrong, we mark the fs error immediately.
*
* And since the fs is frozen, no one can modify the fs yet, thus
* we don't need to hold device_list_mutex.
*/
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
ret = check_dev_super(device);
if (ret < 0) {
btrfs_handle_fs_error(fs_info, ret,
"super block on devid %llu got modified unexpectedly",
device->devid);
break;
}
}
clear_bit(BTRFS_FS_FROZEN, &fs_info->flags);
/*
* We still return 0, to allow VFS layer to unfreeze the fs even the
* above checks failed. Since the fs is either fine or read-only, we're
* safe to continue, without causing further damage.
*/
return 0;
}
@ -2662,17 +2723,21 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_compress;
err = extent_io_init();
err = extent_state_init_cachep();
if (err)
goto free_cachep;
err = extent_state_cache_init();
err = extent_buffer_init_cachep();
if (err)
goto free_extent_io;
goto free_extent_cachep;
err = btrfs_bioset_init();
if (err)
goto free_eb_cachep;
err = extent_map_init();
if (err)
goto free_extent_state_cache;
goto free_bioset;
err = ordered_data_init();
if (err)
@ -2724,10 +2789,12 @@ free_ordered_data:
ordered_data_exit();
free_extent_map:
extent_map_exit();
free_extent_state_cache:
extent_state_cache_exit();
free_extent_io:
extent_io_exit();
free_bioset:
btrfs_bioset_exit();
free_eb_cachep:
extent_buffer_free_cachep();
free_extent_cachep:
extent_state_free_cachep();
free_cachep:
btrfs_destroy_cachep();
free_compress:
@ -2746,8 +2813,9 @@ static void __exit exit_btrfs_fs(void)
btrfs_prelim_ref_exit();
ordered_data_exit();
extent_map_exit();
extent_state_cache_exit();
extent_io_exit();
btrfs_bioset_exit();
extent_state_free_cachep();
extent_buffer_free_cachep();
btrfs_interface_exit();
unregister_filesystem(&btrfs_fs_type);
btrfs_exit_sysfs();

View File

@ -35,12 +35,12 @@
* qgroup_attrs /sys/fs/btrfs/<uuid>/qgroups/<level>_<qgroupid>
* space_info_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>
* raid_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>/<bg-profile>
* discard_attrs /sys/fs/btrfs/<uuid>/discard
*
* When built with BTRFS_CONFIG_DEBUG:
*
* btrfs_debug_feature_attrs /sys/fs/btrfs/debug
* btrfs_debug_mount_attrs /sys/fs/btrfs/<uuid>/debug
* discard_debug_attrs /sys/fs/btrfs/<uuid>/debug/discard
*/
struct btrfs_feature_attr {
@ -286,6 +286,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA);
BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
BTRFS_FEAT_ATTR_COMPAT_RO(block_group_tree, BLOCK_GROUP_TREE);
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
#ifdef CONFIG_BLK_DEV_ZONED
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
@ -317,6 +318,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(metadata_uuid),
BTRFS_FEAT_ATTR_PTR(free_space_tree),
BTRFS_FEAT_ATTR_PTR(raid1c34),
BTRFS_FEAT_ATTR_PTR(block_group_tree),
#ifdef CONFIG_BLK_DEV_ZONED
BTRFS_FEAT_ATTR_PTR(zoned),
#endif
@ -429,12 +431,10 @@ static const struct attribute_group btrfs_static_feature_attr_group = {
.attrs = btrfs_supported_static_feature_attrs,
};
#ifdef CONFIG_BTRFS_DEBUG
/*
* Discard statistics and tunables
*/
#define discard_to_fs_info(_kobj) to_fs_info((_kobj)->parent->parent)
#define discard_to_fs_info(_kobj) to_fs_info(get_btrfs_kobj(_kobj))
static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
struct kobj_attribute *a,
@ -583,11 +583,11 @@ BTRFS_ATTR_RW(discard, max_discard_size, btrfs_discard_max_discard_size_show,
btrfs_discard_max_discard_size_store);
/*
* Per-filesystem debugging of discard (when mounted with discard=async).
* Per-filesystem stats for discard (when mounted with discard=async).
*
* Path: /sys/fs/btrfs/<uuid>/debug/discard/
* Path: /sys/fs/btrfs/<uuid>/discard/
*/
static const struct attribute *discard_debug_attrs[] = {
static const struct attribute *discard_attrs[] = {
BTRFS_ATTR_PTR(discard, discardable_bytes),
BTRFS_ATTR_PTR(discard, discardable_extents),
BTRFS_ATTR_PTR(discard, discard_bitmap_bytes),
@ -599,6 +599,8 @@ static const struct attribute *discard_debug_attrs[] = {
NULL,
};
#ifdef CONFIG_BTRFS_DEBUG
/*
* Per-filesystem runtime debugging exported via sysfs.
*
@ -837,11 +839,8 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
char *buf)
{
struct btrfs_space_info *space_info = to_space_info(kobj);
ssize_t ret;
ret = sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
return ret;
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
}
static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
@ -1150,25 +1149,6 @@ static ssize_t btrfs_generation_show(struct kobject *kobj,
}
BTRFS_ATTR(, generation, btrfs_generation_show);
/*
* Look for an exact string @string in @buffer with possible leading or
* trailing whitespace
*/
static bool strmatch(const char *buffer, const char *string)
{
const size_t len = strlen(string);
/* Skip leading whitespace */
buffer = skip_spaces(buffer);
/* Match entire string, check if the rest is whitespace or empty */
if (strncmp(string, buffer, len) == 0 &&
strlen(skip_spaces(buffer + len)) == 0)
return true;
return false;
}
static const char * const btrfs_read_policy_name[] = { "pid" };
static ssize_t btrfs_read_policy_show(struct kobject *kobj,
@ -1202,7 +1182,7 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
int i;
for (i = 0; i < BTRFS_NR_READ_POLICY; i++) {
if (strmatch(buf, btrfs_read_policy_name[i])) {
if (sysfs_streq(buf, btrfs_read_policy_name[i])) {
if (i != fs_devices->read_policy) {
fs_devices->read_policy = i;
btrfs_info(fs_devices->fs_info,
@ -1222,11 +1202,8 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
ssize_t ret;
ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
return ret;
return sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold));
}
static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
@ -1427,13 +1404,12 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
kobject_del(fs_info->space_info_kobj);
kobject_put(fs_info->space_info_kobj);
}
#ifdef CONFIG_BTRFS_DEBUG
if (fs_info->discard_debug_kobj) {
sysfs_remove_files(fs_info->discard_debug_kobj,
discard_debug_attrs);
kobject_del(fs_info->discard_debug_kobj);
kobject_put(fs_info->discard_debug_kobj);
if (fs_info->discard_kobj) {
sysfs_remove_files(fs_info->discard_kobj, discard_attrs);
kobject_del(fs_info->discard_kobj);
kobject_put(fs_info->discard_kobj);
}
#ifdef CONFIG_BTRFS_DEBUG
if (fs_info->debug_kobj) {
sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
kobject_del(fs_info->debug_kobj);
@ -2001,20 +1977,18 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
error = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
if (error)
goto failure;
#endif
/* Discard directory */
fs_info->discard_debug_kobj = kobject_create_and_add("discard",
fs_info->debug_kobj);
if (!fs_info->discard_debug_kobj) {
fs_info->discard_kobj = kobject_create_and_add("discard", fsid_kobj);
if (!fs_info->discard_kobj) {
error = -ENOMEM;
goto failure;
}
error = sysfs_create_files(fs_info->discard_debug_kobj,
discard_debug_attrs);
error = sysfs_create_files(fs_info->discard_kobj, discard_attrs);
if (error)
goto failure;
#endif
error = addrm_unknown_feature_attrs(fs_info, true);
if (error)
@ -2041,6 +2015,98 @@ failure:
return error;
}
static ssize_t qgroup_enabled_show(struct kobject *qgroups_kobj,
struct kobj_attribute *a,
char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
bool enabled;
spin_lock(&fs_info->qgroup_lock);
enabled = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON;
spin_unlock(&fs_info->qgroup_lock);
return sysfs_emit(buf, "%d\n", enabled);
}
BTRFS_ATTR(qgroups, enabled, qgroup_enabled_show);
static ssize_t qgroup_inconsistent_show(struct kobject *qgroups_kobj,
struct kobj_attribute *a,
char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
bool inconsistent;
spin_lock(&fs_info->qgroup_lock);
inconsistent = (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT);
spin_unlock(&fs_info->qgroup_lock);
return sysfs_emit(buf, "%d\n", inconsistent);
}
BTRFS_ATTR(qgroups, inconsistent, qgroup_inconsistent_show);
static ssize_t qgroup_drop_subtree_thres_show(struct kobject *qgroups_kobj,
struct kobj_attribute *a,
char *buf)
{
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
u8 result;
spin_lock(&fs_info->qgroup_lock);
result = fs_info->qgroup_drop_subtree_thres;
spin_unlock(&fs_info->qgroup_lock);
return sysfs_emit(buf, "%d\n", result);
}
static ssize_t qgroup_drop_subtree_thres_store(struct kobject *qgroups_kobj,
struct kobj_attribute *a,
const char *buf, size_t len)
{
struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent);
u8 new_thres;
int ret;
ret = kstrtou8(buf, 10, &new_thres);
if (ret)
return -EINVAL;
if (new_thres > BTRFS_MAX_LEVEL)
return -EINVAL;
spin_lock(&fs_info->qgroup_lock);
fs_info->qgroup_drop_subtree_thres = new_thres;
spin_unlock(&fs_info->qgroup_lock);
return len;
}
BTRFS_ATTR_RW(qgroups, drop_subtree_threshold, qgroup_drop_subtree_thres_show,
qgroup_drop_subtree_thres_store);
/*
* Qgroups global info
*
* Path: /sys/fs/btrfs/<uuid>/qgroups/
*/
static struct attribute *qgroups_attrs[] = {
BTRFS_ATTR_PTR(qgroups, enabled),
BTRFS_ATTR_PTR(qgroups, inconsistent),
BTRFS_ATTR_PTR(qgroups, drop_subtree_threshold),
NULL
};
ATTRIBUTE_GROUPS(qgroups);
static void qgroups_release(struct kobject *kobj)
{
kfree(kobj);
}
static struct kobj_type qgroups_ktype = {
.sysfs_ops = &kobj_sysfs_ops,
.default_groups = qgroups_groups,
.release = qgroups_release,
};
static inline struct btrfs_fs_info *qgroup_kobj_to_fs_info(struct kobject *kobj)
{
return to_fs_info(kobj->parent->parent);
@ -2166,11 +2232,15 @@ int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info)
if (fs_info->qgroups_kobj)
return 0;
fs_info->qgroups_kobj = kobject_create_and_add("qgroups", fsid_kobj);
if (!fs_info->qgroups_kobj) {
ret = -ENOMEM;
fs_info->qgroups_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL);
if (!fs_info->qgroups_kobj)
return -ENOMEM;
ret = kobject_init_and_add(fs_info->qgroups_kobj, &qgroups_ktype,
fsid_kobj, "qgroups");
if (ret < 0)
goto out;
}
rbtree_postorder_for_each_entry_safe(qgroup, next,
&fs_info->qgroup_tree, node) {
ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup);

View File

@ -243,7 +243,7 @@ void btrfs_free_dummy_block_group(struct btrfs_block_group *cache)
{
if (!cache)
return;
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
kfree(cache->free_space_ctl);
kfree(cache);
}

View File

@ -80,7 +80,6 @@ static void extent_flag_to_str(const struct extent_state *state, char *dest)
PRINT_ONE_FLAG(state, dest, cur, NODATASUM);
PRINT_ONE_FLAG(state, dest, cur, CLEAR_META_RESV);
PRINT_ONE_FLAG(state, dest, cur, NEED_WAIT);
PRINT_ONE_FLAG(state, dest, cur, DAMAGED);
PRINT_ONE_FLAG(state, dest, cur, NORESERVE);
PRINT_ONE_FLAG(state, dest, cur, QGROUP_RESERVED);
PRINT_ONE_FLAG(state, dest, cur, CLEAR_DATA_RESV);
@ -172,7 +171,7 @@ static int test_find_delalloc(u32 sectorsize)
sectorsize - 1, start, end);
goto out_bits;
}
unlock_extent(tmp, start, end);
unlock_extent(tmp, start, end, NULL);
unlock_page(locked_page);
put_page(locked_page);
@ -208,7 +207,7 @@ static int test_find_delalloc(u32 sectorsize)
test_err("there were unlocked pages in the range");
goto out_bits;
}
unlock_extent(tmp, start, end);
unlock_extent(tmp, start, end, NULL);
/* locked_page was unlocked above */
put_page(locked_page);
@ -263,7 +262,7 @@ static int test_find_delalloc(u32 sectorsize)
test_err("pages in range were not all locked");
goto out_bits;
}
unlock_extent(tmp, start, end);
unlock_extent(tmp, start, end, NULL);
/*
* Now to test where we run into a page that is no longer dirty in the

View File

@ -82,7 +82,7 @@ static int test_extents(struct btrfs_block_group *cache)
}
/* Cleanup */
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
return 0;
}
@ -149,7 +149,7 @@ static int test_bitmaps(struct btrfs_block_group *cache, u32 sectorsize)
return -1;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
return 0;
}
@ -230,7 +230,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
return -1;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
/* Now with the extent entry offset into the bitmap */
ret = test_add_free_space_entry(cache, SZ_4M, SZ_4M, 1);
@ -266,7 +266,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
* [ bitmap ]
* [ del ]
*/
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
ret = test_add_free_space_entry(cache, bitmap_offset + SZ_4M, SZ_4M, 1);
if (ret) {
test_err("couldn't add bitmap %d", ret);
@ -291,7 +291,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
return -1;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
/*
* This blew up before, we have part of the free space in a bitmap and
@ -317,7 +317,7 @@ static int test_bitmaps_and_extents(struct btrfs_block_group *cache,
return ret;
}
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
return 0;
}
@ -629,7 +629,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group *cache,
if (ret)
return ret;
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
/*
* Now test a similar scenario, but where our extent entry is located
@ -819,7 +819,7 @@ test_steal_space_from_bitmap_to_extent(struct btrfs_block_group *cache,
return ret;
cache->free_space_ctl->op = orig_free_space_ops;
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
return 0;
}
@ -868,7 +868,7 @@ static int test_bytes_index(struct btrfs_block_group *cache, u32 sectorsize)
}
/* Now validate bitmaps do the correct thing. */
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
for (i = 0; i < 2; i++) {
offset = i * BITS_PER_BITMAP * sectorsize;
bytes = (i + 1) * SZ_1M;
@ -891,7 +891,7 @@ static int test_bytes_index(struct btrfs_block_group *cache, u32 sectorsize)
}
/* Now validate bitmaps with different ->max_extent_size. */
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
orig_free_space_ops = cache->free_space_ctl->op;
cache->free_space_ctl->op = &test_free_space_ops;
@ -998,7 +998,7 @@ static int test_bytes_index(struct btrfs_block_group *cache, u32 sectorsize)
}
cache->free_space_ctl->op = orig_free_space_ops;
__btrfs_remove_free_space_cache(cache->free_space_ctl);
btrfs_remove_free_space_cache(cache);
return 0;
}

View File

@ -267,7 +267,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
goto out;
}
free_extent_map(em);
btrfs_drop_extent_cache(BTRFS_I(inode), 0, (u64)-1, 0);
btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (u64)-1, false);
/*
* All of the magic numbers are based on the mapping setup in
@ -975,7 +975,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE >> 1,
(BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_UPTODATE, 0, 0, NULL);
EXTENT_UPTODATE, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@ -1043,7 +1043,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
BTRFS_MAX_EXTENT_SIZE + sectorsize,
BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1,
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_UPTODATE, 0, 0, NULL);
EXTENT_UPTODATE, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@ -1076,7 +1076,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize)
/* Empty */
ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_UPTODATE, 0, 0, NULL);
EXTENT_UPTODATE, NULL);
if (ret) {
test_err("clear_extent_bit returned %d", ret);
goto out;
@ -1092,7 +1092,7 @@ out:
if (ret)
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
EXTENT_UPTODATE, 0, 0, NULL);
EXTENT_UPTODATE, NULL);
iput(inode);
btrfs_free_dummy_root(root);
btrfs_free_dummy_fs_info(fs_info);

View File

@ -161,7 +161,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
struct btrfs_transaction *cur_trans = trans->transaction;
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_root *root, *tmp;
struct btrfs_caching_control *caching_ctl, *next;
/*
* At this point no one can be using this transaction to modify any tree
@ -196,46 +195,6 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
}
spin_unlock(&cur_trans->dropped_roots_lock);
/*
* We have to update the last_byte_to_unpin under the commit_root_sem,
* at the same time we swap out the commit roots.
*
* This is because we must have a real view of the last spot the caching
* kthreads were while caching. Consider the following views of the
* extent tree for a block group
*
* commit root
* +----+----+----+----+----+----+----+
* |\\\\| |\\\\|\\\\| |\\\\|\\\\|
* +----+----+----+----+----+----+----+
* 0 1 2 3 4 5 6 7
*
* new commit root
* +----+----+----+----+----+----+----+
* | | | |\\\\| | |\\\\|
* +----+----+----+----+----+----+----+
* 0 1 2 3 4 5 6 7
*
* If the cache_ctl->progress was at 3, then we are only allowed to
* unpin [0,1) and [2,3], because the caching thread has already
* processed those extents. We are not allowed to unpin [5,6), because
* the caching thread will re-start it's search from 3, and thus find
* the hole from [4,6) to add to the free space cache.
*/
write_lock(&fs_info->block_group_cache_lock);
list_for_each_entry_safe(caching_ctl, next,
&fs_info->caching_block_groups, list) {
struct btrfs_block_group *cache = caching_ctl->block_group;
if (btrfs_block_group_done(cache)) {
cache->last_byte_to_unpin = (u64)-1;
list_del_init(&caching_ctl->list);
btrfs_put_caching_control(caching_ctl);
} else {
cache->last_byte_to_unpin = caching_ctl->progress;
}
}
write_unlock(&fs_info->block_group_cache_lock);
up_write(&fs_info->commit_root_sem);
}
@ -313,6 +272,8 @@ loop:
atomic_inc(&cur_trans->num_writers);
extwriter_counter_inc(cur_trans, type);
spin_unlock(&fs_info->trans_lock);
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
return 0;
}
spin_unlock(&fs_info->trans_lock);
@ -334,16 +295,23 @@ loop:
if (!cur_trans)
return -ENOMEM;
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_writers);
btrfs_lockdep_acquire(fs_info, btrfs_trans_num_extwriters);
spin_lock(&fs_info->trans_lock);
if (fs_info->running_transaction) {
/*
* someone started a transaction after we unlocked. Make sure
* to redo the checks above
*/
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
kfree(cur_trans);
goto loop;
} else if (BTRFS_FS_ERROR(fs_info)) {
spin_unlock(&fs_info->trans_lock);
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
kfree(cur_trans);
return -EROFS;
}
@ -397,7 +365,7 @@ loop:
spin_lock_init(&cur_trans->releasing_ebs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
IO_TREE_TRANS_DIRTY_PAGES, fs_info->btree_inode);
IO_TREE_TRANS_DIRTY_PAGES, NULL);
extent_io_tree_init(fs_info, &cur_trans->pinned_extents,
IO_TREE_FS_PINNED_EXTENTS, NULL);
fs_info->generation++;
@ -541,6 +509,7 @@ static void wait_current_trans(struct btrfs_fs_info *fs_info)
refcount_inc(&cur_trans->use_count);
spin_unlock(&fs_info->trans_lock);
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
wait_event(fs_info->transaction_wait,
cur_trans->state >= TRANS_STATE_UNBLOCKED ||
TRANS_ABORTED(cur_trans));
@ -625,7 +594,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
*/
num_bytes = btrfs_calc_insert_metadata_size(fs_info, num_items);
if (flush == BTRFS_RESERVE_FLUSH_ALL &&
delayed_refs_rsv->full == 0) {
btrfs_block_rsv_full(delayed_refs_rsv) == 0) {
delayed_refs_bytes = num_bytes;
num_bytes <<= 1;
}
@ -650,7 +619,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
if (rsv->space_info->force_alloc)
do_chunk_alloc = true;
} else if (num_items == 0 && flush == BTRFS_RESERVE_FLUSH_ALL &&
!delayed_refs_rsv->full) {
!btrfs_block_rsv_full(delayed_refs_rsv)) {
/*
* Some people call with btrfs_start_transaction(root, 0)
* because they can be throttled, but have some other mechanism
@ -859,6 +828,15 @@ static noinline void wait_for_commit(struct btrfs_transaction *commit,
u64 transid = commit->transid;
bool put = false;
/*
* At the moment this function is called with min_state either being
* TRANS_STATE_COMPLETED or TRANS_STATE_SUPER_COMMITTED.
*/
if (min_state == TRANS_STATE_COMPLETED)
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
else
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
while (1) {
wait_event(commit->commit_wait, commit->state >= min_state);
if (put)
@ -1022,6 +1000,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
extwriter_counter_dec(cur_trans, trans->type);
cond_wake_up(&cur_trans->writer_wait);
btrfs_lockdep_release(info, btrfs_trans_num_extwriters);
btrfs_lockdep_release(info, btrfs_trans_num_writers);
btrfs_put_transaction(cur_trans);
if (current->journal_info == trans)
@ -1134,7 +1116,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
* it's safe to do it (through extent_io_tree_release()).
*/
err = clear_extent_bit(dirty_pages, start, end,
EXTENT_NEED_WAIT, 0, 0, &cached_state);
EXTENT_NEED_WAIT, &cached_state);
if (err == -ENOMEM)
err = 0;
if (!err)
@ -1912,14 +1894,6 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super->cache_generation = 0;
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
super->uuid_tree_generation = root_item->generation;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
root_item = &fs_info->block_group_root->root_item;
super->block_group_root = root_item->bytenr;
super->block_group_root_generation = root_item->generation;
super->block_group_root_level = root_item->level;
}
}
int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@ -1967,6 +1941,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
* Wait for the current transaction commit to start and block
* subsequent transaction joins
*/
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
wait_event(fs_info->transaction_blocked_wait,
cur_trans->state >= TRANS_STATE_COMMIT_START ||
TRANS_ABORTED(cur_trans));
@ -1994,6 +1969,12 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
if (cur_trans == fs_info->running_transaction) {
cur_trans->state = TRANS_STATE_COMMIT_DOING;
spin_unlock(&fs_info->trans_lock);
/*
* The thread has already released the lockdep map as reader
* already in btrfs_commit_transaction().
*/
btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
@ -2118,12 +2099,12 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ktime_t interval;
ASSERT(refcount_read(&trans->use_count) == 1);
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
/* Stop the commit early if ->aborted is set */
if (TRANS_ABORTED(cur_trans)) {
ret = cur_trans->aborted;
btrfs_end_transaction(trans);
return ret;
goto lockdep_trans_commit_start_release;
}
btrfs_trans_release_metadata(trans);
@ -2140,10 +2121,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* Any running threads may add more while we are here.
*/
ret = btrfs_run_delayed_refs(trans, 0);
if (ret) {
btrfs_end_transaction(trans);
return ret;
}
if (ret)
goto lockdep_trans_commit_start_release;
}
btrfs_create_pending_block_groups(trans);
@ -2172,10 +2151,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (run_it) {
ret = btrfs_start_dirty_block_groups(trans);
if (ret) {
btrfs_end_transaction(trans);
return ret;
}
if (ret)
goto lockdep_trans_commit_start_release;
}
}
@ -2190,6 +2167,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (trans->in_fsync)
want_state = TRANS_STATE_SUPER_COMMITTED;
btrfs_trans_state_lockdep_release(fs_info,
BTRFS_LOCKDEP_TRANS_COMMIT_START);
ret = btrfs_end_transaction(trans);
wait_for_commit(cur_trans, want_state);
@ -2203,6 +2183,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&fs_info->transaction_blocked_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
if (cur_trans->list.prev != &fs_info->trans_list) {
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
@ -2222,7 +2203,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
btrfs_put_transaction(prev_trans);
if (ret)
goto cleanup_transaction;
goto lockdep_release;
} else {
spin_unlock(&fs_info->trans_lock);
}
@ -2236,7 +2217,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
if (BTRFS_FS_ERROR(fs_info)) {
ret = -EROFS;
goto cleanup_transaction;
goto lockdep_release;
}
}
@ -2250,19 +2231,28 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
ret = btrfs_start_delalloc_flush(fs_info);
if (ret)
goto cleanup_transaction;
goto lockdep_release;
ret = btrfs_run_delayed_items(trans);
if (ret)
goto cleanup_transaction;
goto lockdep_release;
/*
* The thread has started/joined the transaction thus it holds the
* lockdep map as a reader. It has to release it before acquiring the
* lockdep map as a writer.
*/
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
btrfs_might_wait_for_event(fs_info, btrfs_trans_num_extwriters);
wait_event(cur_trans->writer_wait,
extwriter_counter_read(cur_trans) == 0);
/* some pending stuffs might be added after the previous flush. */
ret = btrfs_run_delayed_items(trans);
if (ret)
if (ret) {
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
goto cleanup_transaction;
}
btrfs_wait_delalloc_flush(fs_info);
@ -2271,6 +2261,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* transaction. Otherwise if this transaction commits before the ordered
* extents complete we lose logged data after a power failure.
*/
btrfs_might_wait_for_event(fs_info, btrfs_trans_pending_ordered);
wait_event(cur_trans->pending_wait,
atomic_read(&cur_trans->pending_ordered) == 0);
@ -2284,9 +2275,27 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
add_pending_snapshot(trans);
cur_trans->state = TRANS_STATE_COMMIT_DOING;
spin_unlock(&fs_info->trans_lock);
/*
* The thread has started/joined the transaction thus it holds the
* lockdep map as a reader. It has to release it before acquiring the
* lockdep map as a writer.
*/
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
btrfs_might_wait_for_event(fs_info, btrfs_trans_num_writers);
wait_event(cur_trans->writer_wait,
atomic_read(&cur_trans->num_writers) == 1);
/*
* Make lockdep happy by acquiring the state locks after
* btrfs_trans_num_writers is released. If we acquired the state locks
* before releasing the btrfs_trans_num_writers lock then lockdep would
* complain because we did not follow the reverse order unlocking rule.
*/
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
/*
* We've started the commit, clear the flag in case we were triggered to
* do an async commit but somebody else started before the transaction
@ -2296,6 +2305,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
if (TRANS_ABORTED(cur_trans)) {
ret = cur_trans->aborted;
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
goto scrub_continue;
}
/*
@ -2430,6 +2440,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
mutex_unlock(&fs_info->reloc_mutex);
wake_up(&fs_info->transaction_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
ret = btrfs_write_and_wait_transaction(trans);
if (ret) {
@ -2461,6 +2472,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
cur_trans->state = TRANS_STATE_SUPER_COMMITTED;
wake_up(&cur_trans->commit_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
btrfs_finish_extent_commit(trans);
@ -2474,6 +2486,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
*/
cur_trans->state = TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
spin_lock(&fs_info->trans_lock);
list_del_init(&cur_trans->list);
@ -2502,7 +2515,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
unlock_reloc:
mutex_unlock(&fs_info->reloc_mutex);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_UNBLOCKED);
scrub_continue:
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED);
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMPLETED);
btrfs_scrub_continue(fs_info);
cleanup_transaction:
btrfs_trans_release_metadata(trans);
@ -2515,6 +2531,16 @@ cleanup_transaction:
cleanup_transaction(trans, ret);
return ret;
lockdep_release:
btrfs_lockdep_release(fs_info, btrfs_trans_num_extwriters);
btrfs_lockdep_release(fs_info, btrfs_trans_num_writers);
goto cleanup_transaction;
lockdep_trans_commit_start_release:
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
btrfs_end_transaction(trans);
return ret;
}
/*

File diff suppressed because it is too large Load Diff

View File

@ -20,6 +20,7 @@ struct btrfs_log_ctx {
int log_transid;
bool log_new_dentries;
bool logging_new_name;
bool logging_new_delayed_dentries;
/* Indicate if the inode being logged was logged before. */
bool logged_before;
/* Tracks the last logged dir item/index key offset. */
@ -28,6 +29,9 @@ struct btrfs_log_ctx {
struct list_head list;
/* Only used for fast fsyncs. */
struct list_head ordered_extents;
struct list_head conflict_inodes;
int num_conflict_inodes;
bool logging_conflict_inodes;
};
static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
@ -37,10 +41,14 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
ctx->log_transid = 0;
ctx->log_new_dentries = false;
ctx->logging_new_name = false;
ctx->logging_new_delayed_dentries = false;
ctx->logged_before = false;
ctx->inode = inode;
INIT_LIST_HEAD(&ctx->list);
INIT_LIST_HEAD(&ctx->ordered_extents);
INIT_LIST_HEAD(&ctx->conflict_inodes);
ctx->num_conflict_inodes = 0;
ctx->logging_conflict_inodes = false;
}
static inline void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)

View File

@ -659,8 +659,7 @@ rollback:
*
* Returns the size on success or a negative error code on failure.
*/
static int btrfs_get_verity_descriptor(struct inode *inode, void *buf,
size_t buf_size)
int btrfs_get_verity_descriptor(struct inode *inode, void *buf, size_t buf_size)
{
u64 true_size;
int ret = 0;

View File

@ -34,6 +34,8 @@
#include "discard.h"
#include "zoned.h"
static struct bio_set btrfs_bioset;
#define BTRFS_BLOCK_GROUP_STRIPE_MASK (BTRFS_BLOCK_GROUP_RAID0 | \
BTRFS_BLOCK_GROUP_RAID10 | \
BTRFS_BLOCK_GROUP_RAID56_MASK)
@ -247,10 +249,10 @@ static int init_first_rw_device(struct btrfs_trans_handle *trans);
static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op,
u64 logical, u64 *length,
enum btrfs_map_op op, u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
int mirror_num, int need_raid_map);
struct btrfs_io_stripe *smap,
int *mirror_num_ret, int need_raid_map);
/*
* Device locking
@ -2017,7 +2019,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
struct page *page;
int ret;
disk_super = btrfs_read_dev_one_super(bdev, copy_num);
disk_super = btrfs_read_dev_one_super(bdev, copy_num, false);
if (IS_ERR(disk_super))
continue;
@ -5595,7 +5597,7 @@ int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
if (ret)
goto out;
bg->chunk_item_inserted = 1;
set_bit(BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED, &bg->runtime_flags);
if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
@ -5896,7 +5898,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_
sizeof(u64) * (total_stripes),
GFP_NOFS|__GFP_NOFAIL);
atomic_set(&bioc->error, 0);
refcount_set(&bioc->refs, 1);
bioc->fs_info = fs_info;
@ -6092,7 +6093,7 @@ static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
int ret = 0;
ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
logical, &length, &bioc, 0, 0);
logical, &length, &bioc, NULL, NULL, 0);
if (ret) {
ASSERT(bioc == NULL);
return ret;
@ -6153,9 +6154,7 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
cache = btrfs_lookup_block_group(fs_info, logical);
spin_lock(&cache->lock);
ret = cache->to_copy;
spin_unlock(&cache->lock);
ret = test_bit(BLOCK_GROUP_FLAG_TO_COPY, &cache->runtime_flags);
btrfs_put_block_group(cache);
return ret;
@ -6351,11 +6350,19 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, struct extent_map *em,
return 0;
}
static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *map,
u32 stripe_index, u64 stripe_offset, u64 stripe_nr)
{
dst->dev = map->stripes[stripe_index].dev;
dst->physical = map->stripes[stripe_index].physical +
stripe_offset + stripe_nr * map->stripe_len;
}
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
enum btrfs_map_op op,
u64 logical, u64 *length,
enum btrfs_map_op op, u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
int mirror_num, int need_raid_map)
struct btrfs_io_stripe *smap,
int *mirror_num_ret, int need_raid_map)
{
struct extent_map *em;
struct map_lookup *map;
@ -6366,6 +6373,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
int data_stripes;
int i;
int ret = 0;
int mirror_num = (mirror_num_ret ? *mirror_num_ret : 0);
int num_stripes;
int max_errors = 0;
int tgtdev_indexes = 0;
@ -6526,6 +6534,29 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
tgtdev_indexes = num_stripes;
}
/*
* If this I/O maps to a single device, try to return the device and
* physical block information on the stack instead of allocating an
* I/O context structure.
*/
if (smap && num_alloc_stripes == 1 &&
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
(!need_full_stripe(op) || !dev_replace_is_ongoing ||
!dev_replace->tgtdev)) {
if (patch_the_first_stripe_for_dev_replace) {
smap->dev = dev_replace->tgtdev;
smap->physical = physical_to_patch_in_first_stripe;
*mirror_num_ret = map->num_stripes + 1;
} else {
set_io_stripe(smap, map, stripe_index, stripe_offset,
stripe_nr);
*mirror_num_ret = mirror_num;
}
*bioc_ret = NULL;
ret = 0;
goto out;
}
bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes);
if (!bioc) {
ret = -ENOMEM;
@ -6533,9 +6564,8 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
}
for (i = 0; i < num_stripes; i++) {
bioc->stripes[i].physical = map->stripes[stripe_index].physical +
stripe_offset + stripe_nr * map->stripe_len;
bioc->stripes[i].dev = map->stripes[stripe_index].dev;
set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset,
stripe_nr);
stripe_index++;
}
@ -6603,7 +6633,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
struct btrfs_io_context **bioc_ret, int mirror_num)
{
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
mirror_num, 0);
NULL, &mirror_num, 0);
}
/* For Scrub/replace */
@ -6611,14 +6641,77 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret)
{
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret, 0, 1);
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
NULL, NULL, 1);
}
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_io_context *bioc)
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
*/
static inline void btrfs_bio_init(struct btrfs_bio *bbio,
btrfs_bio_end_io_t end_io, void *private)
{
if (bioc->orig_bio->bi_opf & REQ_META)
return bioc->fs_info->endio_meta_workers;
return bioc->fs_info->endio_workers;
memset(bbio, 0, offsetof(struct btrfs_bio, bio));
bbio->end_io = end_io;
bbio->private = private;
}
/*
* Allocate a btrfs_bio structure. The btrfs_bio is the main I/O container for
* btrfs, and is used for all I/O submitted through btrfs_submit_bio.
*
* Just like the underlying bio_alloc_bioset it will not fail as it is backed by
* a mempool.
*/
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
bio = bio_alloc_bioset(NULL, nr_vecs, opf, GFP_NOFS, &btrfs_bioset);
btrfs_bio_init(btrfs_bio(bio), end_io, private);
return bio;
}
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
btrfs_bio_end_io_t end_io, void *private)
{
struct bio *bio;
struct btrfs_bio *bbio;
ASSERT(offset <= UINT_MAX && size <= UINT_MAX);
bio = bio_alloc_clone(orig->bi_bdev, orig, GFP_NOFS, &btrfs_bioset);
bbio = btrfs_bio(bio);
btrfs_bio_init(bbio, end_io, private);
bio_trim(bio, offset >> 9, size >> 9);
bbio->iter = bio->bi_iter;
return bio;
}
static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
{
if (!dev || !dev->bdev)
return;
if (bio->bi_status != BLK_STS_IOERR && bio->bi_status != BLK_STS_TARGET)
return;
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_FLUSH_ERRS);
}
static struct workqueue_struct *btrfs_end_io_wq(struct btrfs_fs_info *fs_info,
struct bio *bio)
{
if (bio->bi_opf & REQ_META)
return fs_info->endio_meta_workers;
return fs_info->endio_workers;
}
static void btrfs_end_bio_work(struct work_struct *work)
@ -6626,103 +6719,101 @@ static void btrfs_end_bio_work(struct work_struct *work)
struct btrfs_bio *bbio =
container_of(work, struct btrfs_bio, end_io_work);
bio_endio(&bbio->bio);
bbio->end_io(bbio);
}
static void btrfs_end_bioc(struct btrfs_io_context *bioc, bool async)
static void btrfs_simple_end_io(struct bio *bio)
{
struct bio *orig_bio = bioc->orig_bio;
struct btrfs_bio *bbio = btrfs_bio(orig_bio);
struct btrfs_fs_info *fs_info = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(fs_info);
if (bio->bi_status)
btrfs_log_dev_io_error(bio, bbio->device);
if (bio_op(bio) == REQ_OP_READ) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
bbio->end_io(bbio);
}
}
static void btrfs_raid56_end_io(struct bio *bio)
{
struct btrfs_io_context *bioc = bio->bi_private;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
orig_bio->bi_private = bioc->private;
orig_bio->bi_end_io = bioc->end_io;
bbio->end_io(bbio);
btrfs_put_bioc(bioc);
}
static void btrfs_orig_write_end_io(struct bio *bio)
{
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
struct btrfs_bio *bbio = btrfs_bio(bio);
btrfs_bio_counter_dec(bioc->fs_info);
if (bio->bi_status) {
atomic_inc(&bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
/*
* Only send an error to the higher layers if it is beyond the tolerance
* threshold.
*/
if (atomic_read(&bioc->error) > bioc->max_errors)
orig_bio->bi_status = BLK_STS_IOERR;
bio->bi_status = BLK_STS_IOERR;
else
orig_bio->bi_status = BLK_STS_OK;
if (btrfs_op(orig_bio) == BTRFS_MAP_READ && async) {
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(bioc), &bbio->end_io_work);
} else {
bio_endio(orig_bio);
}
bio->bi_status = BLK_STS_OK;
bbio->end_io(bbio);
btrfs_put_bioc(bioc);
}
static void btrfs_end_bio(struct bio *bio)
static void btrfs_clone_write_end_io(struct bio *bio)
{
struct btrfs_io_stripe *stripe = bio->bi_private;
struct btrfs_io_context *bioc = stripe->bioc;
if (bio->bi_status) {
atomic_inc(&bioc->error);
if (bio->bi_status == BLK_STS_IOERR ||
bio->bi_status == BLK_STS_TARGET) {
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_dev_stat_inc_and_print(stripe->dev,
BTRFS_DEV_STAT_WRITE_ERRS);
else if (!(bio->bi_opf & REQ_RAHEAD))
btrfs_dev_stat_inc_and_print(stripe->dev,
BTRFS_DEV_STAT_READ_ERRS);
if (bio->bi_opf & REQ_PREFLUSH)
btrfs_dev_stat_inc_and_print(stripe->dev,
BTRFS_DEV_STAT_FLUSH_ERRS);
}
atomic_inc(&stripe->bioc->error);
btrfs_log_dev_io_error(bio, stripe->dev);
}
if (bio != bioc->orig_bio)
bio_put(bio);
btrfs_bio_counter_dec(bioc->fs_info);
if (atomic_dec_and_test(&bioc->stripes_pending))
btrfs_end_bioc(bioc, true);
/* Pass on control to the original bio this one was cloned from */
bio_endio(stripe->bioc->orig_bio);
bio_put(bio);
}
static void submit_stripe_bio(struct btrfs_io_context *bioc,
struct bio *orig_bio, int dev_nr, bool clone)
static void btrfs_submit_dev_bio(struct btrfs_device *dev, struct bio *bio)
{
struct btrfs_fs_info *fs_info = bioc->fs_info;
struct btrfs_device *dev = bioc->stripes[dev_nr].dev;
u64 physical = bioc->stripes[dev_nr].physical;
struct bio *bio;
if (!dev || !dev->bdev ||
test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
(btrfs_op(orig_bio) == BTRFS_MAP_WRITE &&
(btrfs_op(bio) == BTRFS_MAP_WRITE &&
!test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
atomic_inc(&bioc->error);
if (atomic_dec_and_test(&bioc->stripes_pending))
btrfs_end_bioc(bioc, false);
bio_io_error(bio);
return;
}
if (clone) {
bio = bio_alloc_clone(dev->bdev, orig_bio, GFP_NOFS, &fs_bio_set);
} else {
bio = orig_bio;
bio_set_dev(bio, dev->bdev);
btrfs_bio(bio)->device = dev;
}
bio_set_dev(bio, dev->bdev);
bioc->stripes[dev_nr].bioc = bioc;
bio->bi_private = &bioc->stripes[dev_nr];
bio->bi_end_io = btrfs_end_bio;
bio->bi_iter.bi_sector = physical >> 9;
/*
* For zone append writing, bi_sector must point the beginning of the
* zone
*/
if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
if (btrfs_dev_is_sequential(dev, physical)) {
u64 zone_start = round_down(physical, fs_info->zone_size);
u64 zone_start = round_down(physical,
dev->fs_info->zone_size);
bio->bi_iter.bi_sector = zone_start >> SECTOR_SHIFT;
} else {
@ -6730,50 +6821,53 @@ static void submit_stripe_bio(struct btrfs_io_context *bioc,
bio->bi_opf |= REQ_OP_WRITE;
}
}
btrfs_debug_in_rcu(fs_info,
btrfs_debug_in_rcu(dev->fs_info,
"%s: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
__func__, bio_op(bio), bio->bi_opf, bio->bi_iter.bi_sector,
(unsigned long)dev->bdev->bd_dev, rcu_str_deref(dev->name),
dev->devid, bio->bi_iter.bi_size);
btrfs_bio_counter_inc_noblocked(fs_info);
btrfsic_check_bio(bio);
submit_bio(bio);
}
static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
{
struct bio *orig_bio = bioc->orig_bio, *bio;
ASSERT(bio_op(orig_bio) != REQ_OP_READ);
/* Reuse the bio embedded into the btrfs_bio for the last mirror */
if (dev_nr == bioc->num_stripes - 1) {
bio = orig_bio;
bio->bi_end_io = btrfs_orig_write_end_io;
} else {
bio = bio_alloc_clone(NULL, orig_bio, GFP_NOFS, &fs_bio_set);
bio_inc_remaining(orig_bio);
bio->bi_end_io = btrfs_clone_write_end_io;
}
bio->bi_private = &bioc->stripes[dev_nr];
bio->bi_iter.bi_sector = bioc->stripes[dev_nr].physical >> SECTOR_SHIFT;
bioc->stripes[dev_nr].bioc = bioc;
btrfs_submit_dev_bio(bioc->stripes[dev_nr].dev, bio);
}
void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror_num)
{
u64 logical = bio->bi_iter.bi_sector << 9;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
int ret;
int dev_nr;
int total_devs;
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap;
int ret;
btrfs_bio_counter_inc_blocked(fs_info);
ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
&map_length, &bioc, mirror_num, 1);
ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
if (ret) {
btrfs_bio_counter_dec(fs_info);
bio->bi_status = errno_to_blk_status(ret);
bio_endio(bio);
return;
}
total_devs = bioc->num_stripes;
bioc->orig_bio = bio;
bioc->private = bio->bi_private;
bioc->end_io = bio->bi_end_io;
atomic_set(&bioc->stripes_pending, total_devs);
if ((bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
((btrfs_op(bio) == BTRFS_MAP_WRITE) || (mirror_num > 1))) {
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
raid56_parity_write(bio, bioc);
else
raid56_parity_recover(bio, bioc, mirror_num, true);
btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret));
return;
}
@ -6784,12 +6878,31 @@ void btrfs_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, int mirror
BUG();
}
for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
const bool should_clone = (dev_nr < total_devs - 1);
if (!bioc) {
/* Single mirror read/write fast path */
btrfs_bio(bio)->mirror_num = mirror_num;
btrfs_bio(bio)->device = smap.dev;
bio->bi_iter.bi_sector = smap.physical >> SECTOR_SHIFT;
bio->bi_private = fs_info;
bio->bi_end_io = btrfs_simple_end_io;
btrfs_submit_dev_bio(smap.dev, bio);
} else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
/* Parity RAID write or read recovery */
bio->bi_private = bioc;
bio->bi_end_io = btrfs_raid56_end_io;
if (bio_op(bio) == REQ_OP_READ)
raid56_parity_recover(bio, bioc, mirror_num);
else
raid56_parity_write(bio, bioc);
} else {
/* Write to multiple mirrors */
int total_devs = bioc->num_stripes;
int dev_nr;
submit_stripe_bio(bioc, bio, dev_nr, should_clone);
bioc->orig_bio = bio;
for (dev_nr = 0; dev_nr < total_devs; dev_nr++)
btrfs_submit_mirrored_bio(bioc, dev_nr);
}
btrfs_bio_counter_dec(fs_info);
}
static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
@ -8244,7 +8357,7 @@ static int relocating_repair_kthread(void *data)
if (!cache)
goto out;
if (!cache->relocating_repair)
if (!test_bit(BLOCK_GROUP_FLAG_RELOCATING_REPAIR, &cache->runtime_flags))
goto out;
ret = btrfs_may_alloc_data_chunk(fs_info, target);
@ -8281,17 +8394,27 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
if (!cache)
return true;
spin_lock(&cache->lock);
if (cache->relocating_repair) {
spin_unlock(&cache->lock);
if (test_and_set_bit(BLOCK_GROUP_FLAG_RELOCATING_REPAIR, &cache->runtime_flags)) {
btrfs_put_block_group(cache);
return true;
}
cache->relocating_repair = 1;
spin_unlock(&cache->lock);
kthread_run(relocating_repair_kthread, cache,
"btrfs-relocating-repair");
return true;
}
int __init btrfs_bioset_init(void)
{
if (bioset_init(&btrfs_bioset, BIO_POOL_SIZE,
offsetof(struct btrfs_bio, bio),
BIOSET_NEED_BVECS))
return -ENOMEM;
return 0;
}
void __cold btrfs_bioset_exit(void)
{
bioset_exit(&btrfs_bioset);
}

View File

@ -180,6 +180,31 @@ struct btrfs_device {
u64 scrub_speed_max;
};
/*
* Block group or device which contains an active swapfile. Used for preventing
* unsafe operations while a swapfile is active.
*
* These are sorted on (ptr, inode) (note that a block group or device can
* contain more than one swapfile). We compare the pointer values because we
* don't actually care what the object is, we just need a quick check whether
* the object exists in the rbtree.
*/
struct btrfs_swapfile_pin {
struct rb_node node;
void *ptr;
struct inode *inode;
/*
* If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
* points to a struct btrfs_device.
*/
bool is_block_group;
/*
* Only used when 'is_block_group' is true and it is the number of
* extents used by a swapfile for this block group ('ptr' field).
*/
int bg_extent_count;
};
/*
* If we read those variants at the context of their own lock, we needn't
* use the following helpers, reading them directly is safe.
@ -361,6 +386,8 @@ struct btrfs_fs_devices {
*/
#define BTRFS_MAX_BIO_SECTORS (256)
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
/*
* Additional info to pass along bio.
*
@ -378,6 +405,10 @@ struct btrfs_bio {
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
struct bvec_iter iter;
/* End I/O information supplied to btrfs_bio_alloc */
btrfs_bio_end_io_t end_io;
void *private;
/* For read end I/O handling */
struct work_struct end_io_work;
@ -393,6 +424,20 @@ static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
return container_of(bio, struct btrfs_bio, bio);
}
int __init btrfs_bioset_init(void);
void __cold btrfs_bioset_exit(void);
struct bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
btrfs_bio_end_io_t end_io, void *private);
struct bio *btrfs_bio_clone_partial(struct bio *orig, u64 offset, u64 size,
btrfs_bio_end_io_t end_io, void *private);
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
bbio->end_io(bbio);
}
static inline void btrfs_bio_free_csum(struct btrfs_bio *bbio)
{
if (bbio->csum != bbio->csum_inline) {
@ -451,12 +496,9 @@ struct btrfs_discard_stripe {
*/
struct btrfs_io_context {
refcount_t refs;
atomic_t stripes_pending;
struct btrfs_fs_info *fs_info;
u64 map_type; /* get from map_lookup->type */
bio_end_io_t *end_io;
struct bio *orig_bio;
void *private;
atomic_t error;
int max_errors;
int num_stripes;
@ -714,4 +756,6 @@ const char *btrfs_bg_type_to_raid_name(u64 flags);
int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
#endif

View File

@ -652,80 +652,55 @@ int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
return 0;
}
static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info)
{
struct btrfs_device *device;
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
if (device->bdev &&
bdev_zoned_model(device->bdev) == BLK_ZONED_HM) {
btrfs_err(fs_info,
"zoned: mode not enabled but zoned device found: %pg",
device->bdev);
return -EINVAL;
}
}
return 0;
}
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
{
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device;
u64 zoned_devices = 0;
u64 nr_devices = 0;
u64 zone_size = 0;
u64 max_zone_append_size = 0;
const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
int ret = 0;
int ret;
/* Count zoned devices */
list_for_each_entry(device, &fs_devices->devices, dev_list) {
enum blk_zoned_model model;
/*
* Host-Managed devices can't be used without the ZONED flag. With the
* ZONED all devices can be used, using zone emulation if required.
*/
if (!btrfs_fs_incompat(fs_info, ZONED))
return btrfs_check_for_zoned_device(fs_info);
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
struct btrfs_zoned_device_info *zone_info = device->zone_info;
if (!device->bdev)
continue;
model = bdev_zoned_model(device->bdev);
/*
* A Host-Managed zoned device must be used as a zoned device.
* A Host-Aware zoned device and a non-zoned devices can be
* treated as a zoned device, if ZONED flag is enabled in the
* superblock.
*/
if (model == BLK_ZONED_HM ||
(model == BLK_ZONED_HA && incompat_zoned) ||
(model == BLK_ZONED_NONE && incompat_zoned)) {
struct btrfs_zoned_device_info *zone_info;
zone_info = device->zone_info;
zoned_devices++;
if (!zone_size) {
zone_size = zone_info->zone_size;
} else if (zone_info->zone_size != zone_size) {
btrfs_err(fs_info,
if (!zone_size) {
zone_size = zone_info->zone_size;
} else if (zone_info->zone_size != zone_size) {
btrfs_err(fs_info,
"zoned: unequal block device zone sizes: have %llu found %llu",
device->zone_info->zone_size,
zone_size);
ret = -EINVAL;
goto out;
}
if (!max_zone_append_size ||
(zone_info->max_zone_append_size &&
zone_info->max_zone_append_size < max_zone_append_size))
max_zone_append_size =
zone_info->max_zone_append_size;
zone_info->zone_size, zone_size);
return -EINVAL;
}
nr_devices++;
}
if (!zoned_devices && !incompat_zoned)
goto out;
if (!zoned_devices && incompat_zoned) {
/* No zoned block device found on ZONED filesystem */
btrfs_err(fs_info,
"zoned: no zoned devices found on a zoned filesystem");
ret = -EINVAL;
goto out;
}
if (zoned_devices && !incompat_zoned) {
btrfs_err(fs_info,
"zoned: mode not enabled but zoned device found");
ret = -EINVAL;
goto out;
}
if (zoned_devices != nr_devices) {
btrfs_err(fs_info,
"zoned: cannot mix zoned and regular devices");
ret = -EINVAL;
goto out;
if (!max_zone_append_size ||
(zone_info->max_zone_append_size &&
zone_info->max_zone_append_size < max_zone_append_size))
max_zone_append_size = zone_info->max_zone_append_size;
}
/*
@ -737,14 +712,12 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
btrfs_err(fs_info,
"zoned: zone size %llu not aligned to stripe %u",
zone_size, BTRFS_STRIPE_LEN);
ret = -EINVAL;
goto out;
return -EINVAL;
}
if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
btrfs_err(fs_info, "zoned: mixed block groups not supported");
ret = -EINVAL;
goto out;
return -EINVAL;
}
fs_info->zone_size = zone_size;
@ -760,11 +733,10 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
*/
ret = btrfs_check_mountopts_zoned(fs_info);
if (ret)
goto out;
return ret;
btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size);
out:
return ret;
return 0;
}
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
@ -1436,7 +1408,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
} else if (map->num_stripes == num_conventional) {
cache->alloc_offset = last_alloc;
cache->zone_is_active = 1;
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
goto out;
}
}
@ -1452,7 +1424,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
}
cache->alloc_offset = alloc_offsets[0];
cache->zone_capacity = caps[0];
cache->zone_is_active = test_bit(0, active);
if (test_bit(0, active))
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags);
break;
case BTRFS_BLOCK_GROUP_DUP:
if (map->type & BTRFS_BLOCK_GROUP_DATA) {
@ -1486,7 +1459,9 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out;
}
} else {
cache->zone_is_active = test_bit(0, active);
if (test_bit(0, active))
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
&cache->runtime_flags);
}
cache->alloc_offset = alloc_offsets[0];
cache->zone_capacity = min(caps[0], caps[1]);
@ -1530,7 +1505,7 @@ out:
if (!ret) {
cache->meta_write_pointer = cache->alloc_offset + cache->start;
if (cache->zone_is_active) {
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) {
btrfs_get_block_group(cache);
spin_lock(&fs_info->zone_active_bgs_lock);
list_add_tail(&cache->active_bg_list,
@ -1563,7 +1538,6 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
free = cache->zone_capacity - cache->alloc_offset;
/* We only need ->free_space in ALLOC_SEQ block groups */
cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED;
cache->free_space_ctl->free_space = free;
cache->zone_unusable = unusable;
@ -1871,7 +1845,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (block_group->zone_is_active) {
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
ret = true;
goto out_unlock;
}
@ -1897,7 +1871,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
}
/* Successfully activated all the zones */
block_group->zone_is_active = 1;
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
space_info->active_total_bytes += block_group->length;
spin_unlock(&block_group->lock);
btrfs_try_granting_tickets(fs_info, space_info);
@ -1960,7 +1934,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
int i;
spin_lock(&block_group->lock);
if (!block_group->zone_is_active) {
if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
return 0;
}
@ -2001,7 +1975,8 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
* Bail out if someone already deactivated the block group, or
* allocated space is left in the block group.
*/
if (!block_group->zone_is_active) {
if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
&block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
btrfs_dec_block_group_ro(block_group);
return 0;
@ -2014,7 +1989,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
}
}
block_group->zone_is_active = 0;
clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
btrfs_clear_treelog_bg(block_group);
@ -2222,13 +2197,14 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));
spin_lock(&block_group->lock);
if (!block_group->zoned_data_reloc_ongoing)
if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))
goto out;
/* All relocation extents are written. */
if (block_group->start + block_group->alloc_offset == logical + length) {
/* Now, release this block group for further allocations. */
block_group->zoned_data_reloc_ongoing = 0;
clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
&block_group->runtime_flags);
}
out:
@ -2300,7 +2276,9 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
list) {
if (!spin_trylock(&bg->lock))
continue;
if (btrfs_zoned_bg_is_full(bg) || bg->zone_is_active) {
if (btrfs_zoned_bg_is_full(bg) ||
test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
&bg->runtime_flags)) {
spin_unlock(&bg->lock);
continue;
}

View File

@ -70,8 +70,6 @@ struct fsverity_info {
const struct inode *inode;
};
/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
#define FS_VERITY_MAX_SIGNATURE_SIZE (FS_VERITY_MAX_DESCRIPTOR_SIZE - \
sizeof(struct fsverity_descriptor))

View File

@ -22,6 +22,9 @@
*/
#define FS_VERITY_MAX_DIGEST_SIZE SHA512_DIGEST_SIZE
/* Arbitrary limit to bound the kmalloc() size. Can be changed. */
#define FS_VERITY_MAX_DESCRIPTOR_SIZE 16384
/* Verity operations for filesystems */
struct fsverity_operations {

View File

@ -84,7 +84,6 @@ struct raid56_bio_trace_info;
EM( IO_TREE_FS_EXCLUDED_EXTENTS, "EXCLUDED_EXTENTS") \
EM( IO_TREE_BTREE_INODE_IO, "BTREE_INODE_IO") \
EM( IO_TREE_INODE_IO, "INODE_IO") \
EM( IO_TREE_INODE_IO_FAILURE, "INODE_IO_FAILURE") \
EM( IO_TREE_RELOC_BLOCKS, "RELOC_BLOCKS") \
EM( IO_TREE_TRANS_DIRTY_PAGES, "TRANS_DIRTY_PAGES") \
EM( IO_TREE_ROOT_DIRTY_LOG_PAGES, "ROOT_DIRTY_LOG_PAGES") \
@ -154,7 +153,6 @@ FLUSH_STATES
{ EXTENT_NODATASUM, "NODATASUM"}, \
{ EXTENT_CLEAR_META_RESV, "CLEAR_META_RESV"}, \
{ EXTENT_NEED_WAIT, "NEED_WAIT"}, \
{ EXTENT_DAMAGED, "DAMAGED"}, \
{ EXTENT_NORESERVE, "NORESERVE"}, \
{ EXTENT_QGROUP_RESERVED, "QGROUP_RESERVED"}, \
{ EXTENT_CLEAR_DATA_RESV, "CLEAR_DATA_RESV"}, \

View File

@ -290,6 +290,12 @@ struct btrfs_ioctl_fs_info_args {
#define BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID (1ULL << 1)
#define BTRFS_FEATURE_COMPAT_RO_VERITY (1ULL << 2)
/*
* Put all block group items into a dedicated block group tree, greatly
* reducing mount time for large filesystem due to better locality.
*/
#define BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE (1ULL << 3)
#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)

View File

@ -965,6 +965,10 @@ static inline __u16 btrfs_qgroup_level(__u64 qgroupid)
*/
#define BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT (1ULL << 2)
#define BTRFS_QGROUP_STATUS_FLAGS_MASK (BTRFS_QGROUP_STATUS_FLAG_ON | \
BTRFS_QGROUP_STATUS_FLAG_RESCAN | \
BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)
#define BTRFS_QGROUP_STATUS_VERSION 1
struct btrfs_qgroup_status_item {

View File

@ -1933,6 +1933,7 @@ int balance_dirty_pages_ratelimited_flags(struct address_space *mapping,
wb_put(wb);
return ret;
}
EXPORT_SYMBOL_GPL(balance_dirty_pages_ratelimited_flags);
/**
* balance_dirty_pages_ratelimited - balance dirty memory state.