mirror of
https://github.com/torvalds/linux.git
synced 2024-12-30 14:52:05 +00:00
for-5.6-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAl4vDYkACgkQxWXV+ddt WDsNJQ//WJEcYoRpN5Y7oOIk/vo5ulF68P3kUh3hl206A13xpaHorvTvZKAD5s2o C6xACJk839sGEhMdDRWvdeBDCHTedMk7EXjiZ6kJD+7EPpWmDllI5O6DTolT7SR2 b9zId4KCO+m8LiLZccRsxCJbdkJ7nJnz2c5+063TjsS3uq1BFudctRUjW/XnFCCZ JIE5iOkdXrA+bFqc+l2zKTwgByQyJg+hVKRTZEJBT0QZsyNQvHKzXAmXxGopW8bO SeuzFkiFTA0raK8xBz6mUwaZbk40Qlzm9v9AitFZx0x2nvQnMu447N3xyaiuyDWd Li1aMN0uFZNgSz+AemuLfG0Wj70x1HrQisEj958XKzn4cPpUuMcc3lr1PZ2NIX+C p6pSgaLOEq8Rc0U78/euZX6oyiLJPAmQO1TdkVMHrcMi36esBI6uG11rds+U+xeK XoP20qXLFVYLLrl3wH9F4yIzydfMYu66Us1AeRPRB14NSSa7tbCOG//aCafOoLM6 518sJCazSWlv1kDewK8dtLiXc8eM6XJN+KI4NygFZrUj2Rq376q5oovUUKKkn3iN pdHtF/7gAxIx6bZ+jY/gyt/Xe5AdPi7sKggahvrSOL3X+LLINwC4r+vAnnpd6yh4 NfJj5fobvc/mO9PEVMwgJ8PmHw5uNqeMlORGjk7stQs7Oez3tCw= =4OkE -----END PGP SIGNATURE----- Merge tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "Features, highlights: - async discard - "mount -o discard=async" to enable it - freed extents are not discarded immediatelly, but grouped together and trimmed later, with IO rate limiting - the "sync" mode submits short extents that could have been ignored completely by the device, for SATA prior to 3.1 the requests are unqueued and have a big impact on performance - the actual discard IO requests have been moved out of transaction commit to a worker thread, improving commit latency - IO rate and request size can be tuned by sysfs files, for now enabled only with CONFIG_BTRFS_DEBUG as we might need to add/delete the files and don't have a stable-ish ABI for general use, defaults are conservative - export device state info in sysfs, eg. missing, writeable - no discard of extents known to be untouched on disk (eg. after reservation) - device stats reset is logged with process name and PID that called the ioctl Fixes: - fix missing hole after hole punching and fsync when using NO_HOLES - writeback: range cyclic mode could miss some dirty pages and lead to OOM - two more corner cases for metadata_uuid change after power loss during the change - fix infinite loop during fsync after mix of rename operations Core changes: - qgroup assign returns ENOTCONN when quotas not enabled, used to return EINVAL that was confusing - device closing does not need to allocate memory anymore - snapshot aware code got removed, disabled for years due to performance problems, reimplmentation will allow to select wheter defrag breaks or does not break COW on shared extents - tree-checker: - check leaf chunk item size, cross check against number of stripes - verify location keys for DIR_ITEM, DIR_INDEX and XATTR items - new self test for physical -> logical mapping code, used for super block range exclusion - assertion helpers/macros updated to avoid objtool "unreachable code" reports on older compilers or config option combinations" * tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (84 commits) btrfs: free block groups after free'ing fs trees btrfs: Fix split-brain handling when changing FSID to metadata uuid btrfs: Handle another split brain scenario with metadata uuid feature btrfs: Factor out metadata_uuid code from find_fsid. btrfs: Call find_fsid from find_fsid_inprogress Btrfs: fix infinite loop during fsync after rename operations btrfs: set trans->drity in btrfs_commit_transaction btrfs: drop log root for dropped roots btrfs: sysfs, add devid/dev_state kobject and device attributes btrfs: Refactor btrfs_rmap_block to improve readability btrfs: Add self-tests for btrfs_rmap_block btrfs: selftests: Add support for dummy devices btrfs: Move and unexport btrfs_rmap_block btrfs: separate definition of assertion failure handlers btrfs: device stats, log when stats are zeroed btrfs: fix improper setting of scanned for range cyclic write cache pages btrfs: safely advance counter when looking up bio csums btrfs: remove unused member btrfs_device::work btrfs: remove unnecessary wrapper get_alloc_profile btrfs: add correction to handle -1 edge case in async discard ...
This commit is contained in:
commit
81a046b18b
@ -11,7 +11,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
|
||||
reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||
block-rsv.o delalloc-space.o block-group.o
|
||||
block-rsv.o delalloc-space.o block-group.o discard.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include "sysfs.h"
|
||||
#include "tree-log.h"
|
||||
#include "delalloc-space.h"
|
||||
#include "discard.h"
|
||||
#include "raid56.h"
|
||||
|
||||
/*
|
||||
* Return target flags in extended format or 0 if restripe for this chunk_type
|
||||
@ -95,7 +97,7 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
|
||||
return extended_to_chunk(flags | allowed);
|
||||
}
|
||||
|
||||
static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
||||
{
|
||||
unsigned seq;
|
||||
u64 flags;
|
||||
@ -115,11 +117,6 @@ static u64 get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
||||
return btrfs_reduce_alloc_profile(fs_info, flags);
|
||||
}
|
||||
|
||||
u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags)
|
||||
{
|
||||
return get_alloc_profile(fs_info, orig_flags);
|
||||
}
|
||||
|
||||
void btrfs_get_block_group(struct btrfs_block_group *cache)
|
||||
{
|
||||
atomic_inc(&cache->count);
|
||||
@ -131,6 +128,15 @@ void btrfs_put_block_group(struct btrfs_block_group *cache)
|
||||
WARN_ON(cache->pinned > 0);
|
||||
WARN_ON(cache->reserved > 0);
|
||||
|
||||
/*
|
||||
* A block_group shouldn't be on the discard_list anymore.
|
||||
* Remove the block_group from the discard_list to prevent us
|
||||
* from causing a panic due to NULL pointer dereference.
|
||||
*/
|
||||
if (WARN_ON(!list_empty(&cache->discard_list)))
|
||||
btrfs_discard_cancel_work(&cache->fs_info->discard_ctl,
|
||||
cache);
|
||||
|
||||
/*
|
||||
* If not empty, someone is still holding mutex of
|
||||
* full_stripe_lock, which can only be released by caller.
|
||||
@ -466,8 +472,8 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
|
||||
} else if (extent_start > start && extent_start < end) {
|
||||
size = extent_start - start;
|
||||
total_added += size;
|
||||
ret = btrfs_add_free_space(block_group, start,
|
||||
size);
|
||||
ret = btrfs_add_free_space_async_trimmed(block_group,
|
||||
start, size);
|
||||
BUG_ON(ret); /* -ENOMEM or logic error */
|
||||
start = extent_end + 1;
|
||||
} else {
|
||||
@ -478,7 +484,8 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
|
||||
if (start < end) {
|
||||
size = end - start;
|
||||
total_added += size;
|
||||
ret = btrfs_add_free_space(block_group, start, size);
|
||||
ret = btrfs_add_free_space_async_trimmed(block_group, start,
|
||||
size);
|
||||
BUG_ON(ret); /* -ENOMEM or logic error */
|
||||
}
|
||||
|
||||
@ -1185,21 +1192,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
|
||||
struct btrfs_space_info *sinfo = cache->space_info;
|
||||
u64 num_bytes;
|
||||
u64 sinfo_used;
|
||||
u64 min_allocable_bytes;
|
||||
int ret = -ENOSPC;
|
||||
|
||||
/*
|
||||
* We need some metadata space and system metadata space for
|
||||
* allocating chunks in some corner cases until we force to set
|
||||
* it to be readonly.
|
||||
*/
|
||||
if ((sinfo->flags &
|
||||
(BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) &&
|
||||
!force)
|
||||
min_allocable_bytes = SZ_1M;
|
||||
else
|
||||
min_allocable_bytes = 0;
|
||||
|
||||
spin_lock(&sinfo->lock);
|
||||
spin_lock(&cache->lock);
|
||||
|
||||
@ -1217,10 +1211,9 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
|
||||
* sinfo_used + num_bytes should always <= sinfo->total_bytes.
|
||||
*
|
||||
* Here we make sure if we mark this bg RO, we still have enough
|
||||
* free space as buffer (if min_allocable_bytes is not 0).
|
||||
* free space as buffer.
|
||||
*/
|
||||
if (sinfo_used + num_bytes + min_allocable_bytes <=
|
||||
sinfo->total_bytes) {
|
||||
if (sinfo_used + num_bytes <= sinfo->total_bytes) {
|
||||
sinfo->bytes_readonly += num_bytes;
|
||||
cache->ro++;
|
||||
list_add_tail(&cache->ro_list, &sinfo->ro_bgs);
|
||||
@ -1233,8 +1226,8 @@ out:
|
||||
btrfs_info(cache->fs_info,
|
||||
"unable to make block group %llu ro", cache->start);
|
||||
btrfs_info(cache->fs_info,
|
||||
"sinfo_used=%llu bg_num_bytes=%llu min_allocable=%llu",
|
||||
sinfo_used, num_bytes, min_allocable_bytes);
|
||||
"sinfo_used=%llu bg_num_bytes=%llu",
|
||||
sinfo_used, num_bytes);
|
||||
btrfs_dump_space_info(cache->fs_info, cache->space_info, 0, 0);
|
||||
}
|
||||
return ret;
|
||||
@ -1249,6 +1242,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
struct btrfs_block_group *block_group;
|
||||
struct btrfs_space_info *space_info;
|
||||
struct btrfs_trans_handle *trans;
|
||||
const bool async_trim_enabled = btrfs_test_opt(fs_info, DISCARD_ASYNC);
|
||||
int ret = 0;
|
||||
|
||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
@ -1272,10 +1266,28 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
|
||||
btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
|
||||
|
||||
mutex_lock(&fs_info->delete_unused_bgs_mutex);
|
||||
|
||||
/* Don't want to race with allocators so take the groups_sem */
|
||||
down_write(&space_info->groups_sem);
|
||||
|
||||
/*
|
||||
* Async discard moves the final block group discard to be prior
|
||||
* to the unused_bgs code path. Therefore, if it's not fully
|
||||
* trimmed, punt it back to the async discard lists.
|
||||
*/
|
||||
if (btrfs_test_opt(fs_info, DISCARD_ASYNC) &&
|
||||
!btrfs_is_free_space_trimmed(block_group)) {
|
||||
trace_btrfs_skip_unused_block_group(block_group);
|
||||
up_write(&space_info->groups_sem);
|
||||
/* Requeue if we failed because of async discard */
|
||||
btrfs_discard_queue_work(&fs_info->discard_ctl,
|
||||
block_group);
|
||||
goto next;
|
||||
}
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->reserved || block_group->pinned ||
|
||||
block_group->used || block_group->ro ||
|
||||
@ -1347,6 +1359,23 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
|
||||
/*
|
||||
* At this point, the block_group is read only and should fail
|
||||
* new allocations. However, btrfs_finish_extent_commit() can
|
||||
* cause this block_group to be placed back on the discard
|
||||
* lists because now the block_group isn't fully discarded.
|
||||
* Bail here and try again later after discarding everything.
|
||||
*/
|
||||
spin_lock(&fs_info->discard_ctl.lock);
|
||||
if (!list_empty(&block_group->discard_list)) {
|
||||
spin_unlock(&fs_info->discard_ctl.lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
btrfs_discard_queue_work(&fs_info->discard_ctl,
|
||||
block_group);
|
||||
goto end_trans;
|
||||
}
|
||||
spin_unlock(&fs_info->discard_ctl.lock);
|
||||
|
||||
/* Reset pinned so btrfs_put_block_group doesn't complain */
|
||||
spin_lock(&space_info->lock);
|
||||
spin_lock(&block_group->lock);
|
||||
@ -1362,8 +1391,18 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
|
||||
spin_unlock(&block_group->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
/*
|
||||
* The normal path here is an unused block group is passed here,
|
||||
* then trimming is handled in the transaction commit path.
|
||||
* Async discard interposes before this to do the trimming
|
||||
* before coming down the unused block group path as trimming
|
||||
* will no longer be done later in the transaction commit path.
|
||||
*/
|
||||
if (!async_trim_enabled && btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||
goto flip_async;
|
||||
|
||||
/* DISCARD can flip during remount */
|
||||
trimming = btrfs_test_opt(fs_info, DISCARD);
|
||||
trimming = btrfs_test_opt(fs_info, DISCARD_SYNC);
|
||||
|
||||
/* Implicit trim during transaction commit. */
|
||||
if (trimming)
|
||||
@ -1406,6 +1445,13 @@ next:
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
return;
|
||||
|
||||
flip_async:
|
||||
btrfs_end_transaction(trans);
|
||||
mutex_unlock(&fs_info->delete_unused_bgs_mutex);
|
||||
btrfs_put_block_group(block_group);
|
||||
btrfs_discard_punt_unused_bgs_list(fs_info);
|
||||
}
|
||||
|
||||
void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
|
||||
@ -1516,6 +1562,102 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
||||
write_sequnlock(&fs_info->profiles_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_rmap_block - Map a physical disk address to a list of logical addresses
|
||||
* @chunk_start: logical address of block group
|
||||
* @physical: physical address to map to logical addresses
|
||||
* @logical: return array of logical addresses which map to @physical
|
||||
* @naddrs: length of @logical
|
||||
* @stripe_len: size of IO stripe for the given block group
|
||||
*
|
||||
* Maps a particular @physical disk address to a list of @logical addresses.
|
||||
* Used primarily to exclude those portions of a block group that contain super
|
||||
* block copies.
|
||||
*/
|
||||
EXPORT_FOR_TESTS
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
u64 *buf;
|
||||
u64 bytenr;
|
||||
u64 data_stripe_length;
|
||||
u64 io_stripe_size;
|
||||
int i, nr = 0;
|
||||
int ret = 0;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
|
||||
if (IS_ERR(em))
|
||||
return -EIO;
|
||||
|
||||
map = em->map_lookup;
|
||||
data_stripe_length = em->len;
|
||||
io_stripe_size = map->stripe_len;
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
|
||||
data_stripe_length = div_u64(data_stripe_length,
|
||||
map->num_stripes / map->sub_stripes);
|
||||
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
|
||||
data_stripe_length = div_u64(data_stripe_length, map->num_stripes);
|
||||
else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
data_stripe_length = div_u64(data_stripe_length,
|
||||
nr_data_stripes(map));
|
||||
io_stripe_size = map->stripe_len * nr_data_stripes(map);
|
||||
}
|
||||
|
||||
buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
|
||||
if (!buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
bool already_inserted = false;
|
||||
u64 stripe_nr;
|
||||
int j;
|
||||
|
||||
if (!in_range(physical, map->stripes[i].physical,
|
||||
data_stripe_length))
|
||||
continue;
|
||||
|
||||
stripe_nr = physical - map->stripes[i].physical;
|
||||
stripe_nr = div64_u64(stripe_nr, map->stripe_len);
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
|
||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||
stripe_nr = div_u64(stripe_nr, map->sub_stripes);
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
|
||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||
}
|
||||
/*
|
||||
* The remaining case would be for RAID56, multiply by
|
||||
* nr_data_stripes(). Alternatively, just use rmap_len below
|
||||
* instead of map->stripe_len
|
||||
*/
|
||||
|
||||
bytenr = chunk_start + stripe_nr * io_stripe_size;
|
||||
|
||||
/* Ensure we don't add duplicate addresses */
|
||||
for (j = 0; j < nr; j++) {
|
||||
if (buf[j] == bytenr) {
|
||||
already_inserted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!already_inserted)
|
||||
buf[nr++] = bytenr;
|
||||
}
|
||||
|
||||
*logical = buf;
|
||||
*naddrs = nr;
|
||||
*stripe_len = io_stripe_size;
|
||||
out:
|
||||
free_extent_map(em);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
@ -1610,6 +1752,8 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
cache->full_stripe_len = btrfs_full_stripe_len(fs_info, start);
|
||||
set_free_space_tree_thresholds(cache);
|
||||
|
||||
cache->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
|
||||
|
||||
atomic_set(&cache->count, 1);
|
||||
spin_lock_init(&cache->lock);
|
||||
init_rwsem(&cache->data_rwsem);
|
||||
@ -1617,6 +1761,7 @@ static struct btrfs_block_group *btrfs_create_block_group_cache(
|
||||
INIT_LIST_HEAD(&cache->cluster_list);
|
||||
INIT_LIST_HEAD(&cache->bg_list);
|
||||
INIT_LIST_HEAD(&cache->ro_list);
|
||||
INIT_LIST_HEAD(&cache->discard_list);
|
||||
INIT_LIST_HEAD(&cache->dirty_list);
|
||||
INIT_LIST_HEAD(&cache->io_list);
|
||||
btrfs_init_free_space_ctl(cache);
|
||||
@ -1775,7 +1920,10 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
inc_block_group_ro(cache, 1);
|
||||
} else if (cache->used == 0) {
|
||||
ASSERT(list_empty(&cache->bg_list));
|
||||
btrfs_mark_bg_unused(cache);
|
||||
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
||||
btrfs_discard_queue_work(&info->discard_ctl, cache);
|
||||
else
|
||||
btrfs_mark_bg_unused(cache);
|
||||
}
|
||||
return 0;
|
||||
error:
|
||||
@ -2738,8 +2886,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
* dirty list to avoid races between cleaner kthread and space
|
||||
* cache writeout.
|
||||
*/
|
||||
if (!alloc && old_val == 0)
|
||||
btrfs_mark_bg_unused(cache);
|
||||
if (!alloc && old_val == 0) {
|
||||
if (!btrfs_test_opt(info, DISCARD_ASYNC))
|
||||
btrfs_mark_bg_unused(cache);
|
||||
}
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
total -= num_bytes;
|
||||
|
@ -12,6 +12,19 @@ enum btrfs_disk_cache_state {
|
||||
BTRFS_DC_SETUP,
|
||||
};
|
||||
|
||||
/*
|
||||
* This describes the state of the block_group for async discard. This is due
|
||||
* to the two pass nature of it where extent discarding is prioritized over
|
||||
* bitmap discarding. BTRFS_DISCARD_RESET_CURSOR is set when we are resetting
|
||||
* between lists to prevent contention for discard state variables
|
||||
* (eg. discard_cursor).
|
||||
*/
|
||||
enum btrfs_discard_state {
|
||||
BTRFS_DISCARD_EXTENTS,
|
||||
BTRFS_DISCARD_BITMAPS,
|
||||
BTRFS_DISCARD_RESET_CURSOR,
|
||||
};
|
||||
|
||||
/*
|
||||
* Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
|
||||
* only allocate a chunk if we really need one.
|
||||
@ -116,7 +129,13 @@ struct btrfs_block_group {
|
||||
/* For read-only block groups */
|
||||
struct list_head ro_list;
|
||||
|
||||
/* For discard operations */
|
||||
atomic_t trimming;
|
||||
struct list_head discard_list;
|
||||
int discard_index;
|
||||
u64 discard_eligible_time;
|
||||
u64 discard_cursor;
|
||||
enum btrfs_discard_state discard_state;
|
||||
|
||||
/* For dirty block groups */
|
||||
struct list_head dirty_list;
|
||||
@ -158,6 +177,22 @@ struct btrfs_block_group {
|
||||
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
|
||||
};
|
||||
|
||||
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
||||
{
|
||||
return (block_group->start + block_group->length);
|
||||
}
|
||||
|
||||
static inline bool btrfs_is_block_group_data_only(
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
/*
|
||||
* In mixed mode the fragmentation is expected to be high, lowering the
|
||||
* efficiency, so only proper data block groups are considered.
|
||||
*/
|
||||
return (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
|
||||
!(block_group->flags & BTRFS_BLOCK_GROUP_METADATA);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
static inline int btrfs_should_fragment_free_space(
|
||||
struct btrfs_block_group *block_group)
|
||||
@ -248,4 +283,9 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
|
||||
cache->cached == BTRFS_CACHE_ERROR;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
|
||||
#endif
|
||||
|
||||
#endif /* BTRFS_BLOCK_GROUP_H */
|
||||
|
@ -629,7 +629,6 @@ static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(dev_t dev,
|
||||
static int btrfsic_process_superblock(struct btrfsic_state *state,
|
||||
struct btrfs_fs_devices *fs_devices)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = state->fs_info;
|
||||
struct btrfs_super_block *selected_super;
|
||||
struct list_head *dev_head = &fs_devices->devices;
|
||||
struct btrfs_device *device;
|
||||
@ -637,7 +636,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
|
||||
int ret = 0;
|
||||
int pass;
|
||||
|
||||
BUG_ON(NULL == state);
|
||||
selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
|
||||
if (NULL == selected_super) {
|
||||
pr_info("btrfsic: error, kmalloc failed!\n");
|
||||
@ -700,7 +698,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
|
||||
break;
|
||||
}
|
||||
|
||||
num_copies = btrfs_num_copies(fs_info, next_bytenr,
|
||||
num_copies = btrfs_num_copies(state->fs_info, next_bytenr,
|
||||
state->metablock_size);
|
||||
if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
|
||||
pr_info("num_copies(log_bytenr=%llu) = %d\n",
|
||||
|
@ -763,7 +763,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
|
||||
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio,
|
||||
sums);
|
||||
(u64)-1, sums);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
|
||||
@ -791,7 +791,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
|
||||
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, sums);
|
||||
ret = btrfs_lookup_bio_sums(inode, comp_bio, (u64)-1, sums);
|
||||
BUG_ON(ret); /* -ENOMEM */
|
||||
}
|
||||
|
||||
|
@ -101,6 +101,14 @@ struct btrfs_ref;
|
||||
|
||||
#define BTRFS_MAX_EXTENT_SIZE SZ_128M
|
||||
|
||||
/*
|
||||
* Deltas are an effective way to populate global statistics. Give macro names
|
||||
* to make it clear what we're doing. An example is discard_extents in
|
||||
* btrfs_free_space_ctl.
|
||||
*/
|
||||
#define BTRFS_STAT_NR_ENTRIES 2
|
||||
#define BTRFS_STAT_CURR 0
|
||||
#define BTRFS_STAT_PREV 1
|
||||
|
||||
/*
|
||||
* Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
|
||||
@ -440,6 +448,36 @@ struct btrfs_full_stripe_locks_tree {
|
||||
struct mutex lock;
|
||||
};
|
||||
|
||||
/* Discard control. */
|
||||
/*
|
||||
* Async discard uses multiple lists to differentiate the discard filter
|
||||
* parameters. Index 0 is for completely free block groups where we need to
|
||||
* ensure the entire block group is trimmed without being lossy. Indices
|
||||
* afterwards represent monotonically decreasing discard filter sizes to
|
||||
* prioritize what should be discarded next.
|
||||
*/
|
||||
#define BTRFS_NR_DISCARD_LISTS 3
|
||||
#define BTRFS_DISCARD_INDEX_UNUSED 0
|
||||
#define BTRFS_DISCARD_INDEX_START 1
|
||||
|
||||
struct btrfs_discard_ctl {
|
||||
struct workqueue_struct *discard_workers;
|
||||
struct delayed_work work;
|
||||
spinlock_t lock;
|
||||
struct btrfs_block_group *block_group;
|
||||
struct list_head discard_list[BTRFS_NR_DISCARD_LISTS];
|
||||
u64 prev_discard;
|
||||
atomic_t discardable_extents;
|
||||
atomic64_t discardable_bytes;
|
||||
u64 max_discard_size;
|
||||
unsigned long delay;
|
||||
u32 iops_limit;
|
||||
u32 kbps_limit;
|
||||
u64 discard_extent_bytes;
|
||||
u64 discard_bitmap_bytes;
|
||||
atomic64_t discard_bytes_saved;
|
||||
};
|
||||
|
||||
/* delayed seq elem */
|
||||
struct seq_list {
|
||||
struct list_head list;
|
||||
@ -526,6 +564,9 @@ enum {
|
||||
* so we don't need to offload checksums to workqueues.
|
||||
*/
|
||||
BTRFS_FS_CSUM_IMPL_FAST,
|
||||
|
||||
/* Indicate that the discard workqueue can service discards. */
|
||||
BTRFS_FS_DISCARD_RUNNING,
|
||||
};
|
||||
|
||||
struct btrfs_fs_info {
|
||||
@ -816,6 +857,8 @@ struct btrfs_fs_info {
|
||||
struct btrfs_workqueue *scrub_wr_completion_workers;
|
||||
struct btrfs_workqueue *scrub_parity_workers;
|
||||
|
||||
struct btrfs_discard_ctl discard_ctl;
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
u32 check_integrity_print_mask;
|
||||
#endif
|
||||
@ -902,6 +945,11 @@ struct btrfs_fs_info {
|
||||
spinlock_t ref_verify_lock;
|
||||
struct rb_root block_tree;
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
struct kobject *debug_kobj;
|
||||
struct kobject *discard_debug_kobj;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
|
||||
@ -1170,7 +1218,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
||||
#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
|
||||
#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
|
||||
#define BTRFS_MOUNT_NOSSD (1 << 9)
|
||||
#define BTRFS_MOUNT_DISCARD (1 << 10)
|
||||
#define BTRFS_MOUNT_DISCARD_SYNC (1 << 10)
|
||||
#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
|
||||
#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
|
||||
#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
|
||||
@ -1189,6 +1237,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info)
|
||||
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
|
||||
#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
|
||||
#define BTRFS_MOUNT_REF_VERIFY (1 << 28)
|
||||
#define BTRFS_MOUNT_DISCARD_ASYNC (1 << 29)
|
||||
|
||||
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
|
||||
#define BTRFS_DEFAULT_MAX_INLINE (2048)
|
||||
@ -2449,8 +2498,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
|
||||
|
||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len, int delalloc);
|
||||
int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len);
|
||||
int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start,
|
||||
u64 len);
|
||||
void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans);
|
||||
int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
|
||||
@ -2789,9 +2838,7 @@ struct btrfs_dio_private;
|
||||
int btrfs_del_csums(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 bytenr, u64 len);
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||
u8 *dst);
|
||||
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio,
|
||||
u64 logical_offset);
|
||||
u64 offset, u8 *dst);
|
||||
int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
u64 objectid, u64 pos,
|
||||
@ -2877,7 +2924,7 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
|
||||
struct btrfs_root *root);
|
||||
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
struct page *page, size_t pg_offset,
|
||||
u64 start, u64 end, int create);
|
||||
u64 start, u64 end);
|
||||
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct inode *inode);
|
||||
@ -3110,17 +3157,21 @@ do { \
|
||||
rcu_read_unlock(); \
|
||||
} while (0)
|
||||
|
||||
__cold
|
||||
static inline void assfail(const char *expr, const char *file, int line)
|
||||
#ifdef CONFIG_BTRFS_ASSERT
|
||||
__cold __noreturn
|
||||
static inline void assertfail(const char *expr, const char *file, int line)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT)) {
|
||||
pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
|
||||
BUG();
|
||||
}
|
||||
pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
|
||||
BUG();
|
||||
}
|
||||
|
||||
#define ASSERT(expr) \
|
||||
(likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
|
||||
#define ASSERT(expr) \
|
||||
(likely(expr) ? (void)0 : assertfail(#expr, __FILE__, __LINE__))
|
||||
|
||||
#else
|
||||
static inline void assertfail(const char *expr, const char* file, int line) { }
|
||||
#define ASSERT(expr) (void)(expr)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Use that for functions that are conditionally exported for sanity tests but
|
||||
|
@ -704,6 +704,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
|
||||
/* replace the sysfs entry */
|
||||
btrfs_sysfs_rm_device_link(fs_info->fs_devices, src_device);
|
||||
btrfs_sysfs_update_devid(tgt_device);
|
||||
btrfs_rm_dev_replace_free_srcdev(src_device);
|
||||
|
||||
/* write back the superblocks */
|
||||
|
702
fs/btrfs/discard.c
Normal file
702
fs/btrfs/discard.c
Normal file
@ -0,0 +1,702 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/ktime.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/math64.h>
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include "ctree.h"
|
||||
#include "block-group.h"
|
||||
#include "discard.h"
|
||||
#include "free-space-cache.h"
|
||||
|
||||
/*
|
||||
* This contains the logic to handle async discard.
|
||||
*
|
||||
* Async discard manages trimming of free space outside of transaction commit.
|
||||
* Discarding is done by managing the block_groups on a LRU list based on free
|
||||
* space recency. Two passes are used to first prioritize discarding extents
|
||||
* and then allow for trimming in the bitmap the best opportunity to coalesce.
|
||||
* The block_groups are maintained on multiple lists to allow for multiple
|
||||
* passes with different discard filter requirements. A delayed work item is
|
||||
* used to manage discarding with timeout determined by a max of the delay
|
||||
* incurred by the iops rate limit, the byte rate limit, and the max delay of
|
||||
* BTRFS_DISCARD_MAX_DELAY.
|
||||
*
|
||||
* Note, this only keeps track of block_groups that are explicitly for data.
|
||||
* Mixed block_groups are not supported.
|
||||
*
|
||||
* The first list is special to manage discarding of fully free block groups.
|
||||
* This is necessary because we issue a final trim for a full free block group
|
||||
* after forgetting it. When a block group becomes unused, instead of directly
|
||||
* being added to the unused_bgs list, we add it to this first list. Then
|
||||
* from there, if it becomes fully discarded, we place it onto the unused_bgs
|
||||
* list.
|
||||
*
|
||||
* The in-memory free space cache serves as the backing state for discard.
|
||||
* Consequently this means there is no persistence. We opt to load all the
|
||||
* block groups in as not discarded, so the mount case degenerates to the
|
||||
* crashing case.
|
||||
*
|
||||
* As the free space cache uses bitmaps, there exists a tradeoff between
|
||||
* ease/efficiency for find_free_extent() and the accuracy of discard state.
|
||||
* Here we opt to let untrimmed regions merge with everything while only letting
|
||||
* trimmed regions merge with other trimmed regions. This can cause
|
||||
* overtrimming, but the coalescing benefit seems to be worth it. Additionally,
|
||||
* bitmap state is tracked as a whole. If we're able to fully trim a bitmap,
|
||||
* the trimmed flag is set on the bitmap. Otherwise, if an allocation comes in,
|
||||
* this resets the state and we will retry trimming the whole bitmap. This is a
|
||||
* tradeoff between discard state accuracy and the cost of accounting.
|
||||
*/
|
||||
|
||||
/* This is an initial delay to give some chance for block reuse */
|
||||
#define BTRFS_DISCARD_DELAY (120ULL * NSEC_PER_SEC)
|
||||
#define BTRFS_DISCARD_UNUSED_DELAY (10ULL * NSEC_PER_SEC)
|
||||
|
||||
/* Target completion latency of discarding all discardable extents */
|
||||
#define BTRFS_DISCARD_TARGET_MSEC (6 * 60 * 60UL * MSEC_PER_SEC)
|
||||
#define BTRFS_DISCARD_MIN_DELAY_MSEC (1UL)
|
||||
#define BTRFS_DISCARD_MAX_DELAY_MSEC (1000UL)
|
||||
#define BTRFS_DISCARD_MAX_IOPS (10U)
|
||||
|
||||
/* Montonically decreasing minimum length filters after index 0 */
|
||||
static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
|
||||
0,
|
||||
BTRFS_ASYNC_DISCARD_MAX_FILTER,
|
||||
BTRFS_ASYNC_DISCARD_MIN_FILTER
|
||||
};
|
||||
|
||||
static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
return &discard_ctl->discard_list[block_group->discard_index];
|
||||
}
|
||||
|
||||
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
if (!btrfs_run_discard_work(discard_ctl))
|
||||
return;
|
||||
|
||||
if (list_empty(&block_group->discard_list) ||
|
||||
block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
|
||||
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
|
||||
block_group->discard_index = BTRFS_DISCARD_INDEX_START;
|
||||
block_group->discard_eligible_time = (ktime_get_ns() +
|
||||
BTRFS_DISCARD_DELAY);
|
||||
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
|
||||
}
|
||||
|
||||
list_move_tail(&block_group->discard_list,
|
||||
get_discard_list(discard_ctl, block_group));
|
||||
}
|
||||
|
||||
static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
if (!btrfs_is_block_group_data_only(block_group))
|
||||
return;
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
__add_to_discard_list(discard_ctl, block_group);
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
spin_lock(&discard_ctl->lock);
|
||||
|
||||
if (!btrfs_run_discard_work(discard_ctl)) {
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
list_del_init(&block_group->discard_list);
|
||||
|
||||
block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
|
||||
block_group->discard_eligible_time = (ktime_get_ns() +
|
||||
BTRFS_DISCARD_UNUSED_DELAY);
|
||||
block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
|
||||
list_add_tail(&block_group->discard_list,
|
||||
&discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
|
||||
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
bool running = false;
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
|
||||
if (block_group == discard_ctl->block_group) {
|
||||
running = true;
|
||||
discard_ctl->block_group = NULL;
|
||||
}
|
||||
|
||||
block_group->discard_eligible_time = 0;
|
||||
list_del_init(&block_group->discard_list);
|
||||
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
|
||||
return running;
|
||||
}
|
||||
|
||||
/**
|
||||
* find_next_block_group - find block_group that's up next for discarding
|
||||
* @discard_ctl: discard control
|
||||
* @now: current time
|
||||
*
|
||||
* Iterate over the discard lists to find the next block_group up for
|
||||
* discarding checking the discard_eligible_time of block_group.
|
||||
*/
|
||||
static struct btrfs_block_group *find_next_block_group(
|
||||
struct btrfs_discard_ctl *discard_ctl,
|
||||
u64 now)
|
||||
{
|
||||
struct btrfs_block_group *ret_block_group = NULL, *block_group;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
|
||||
struct list_head *discard_list = &discard_ctl->discard_list[i];
|
||||
|
||||
if (!list_empty(discard_list)) {
|
||||
block_group = list_first_entry(discard_list,
|
||||
struct btrfs_block_group,
|
||||
discard_list);
|
||||
|
||||
if (!ret_block_group)
|
||||
ret_block_group = block_group;
|
||||
|
||||
if (ret_block_group->discard_eligible_time < now)
|
||||
break;
|
||||
|
||||
if (ret_block_group->discard_eligible_time >
|
||||
block_group->discard_eligible_time)
|
||||
ret_block_group = block_group;
|
||||
}
|
||||
}
|
||||
|
||||
return ret_block_group;
|
||||
}
|
||||
|
||||
/**
|
||||
* peek_discard_list - wrap find_next_block_group()
|
||||
* @discard_ctl: discard control
|
||||
* @discard_state: the discard_state of the block_group after state management
|
||||
* @discard_index: the discard_index of the block_group after state management
|
||||
*
|
||||
* This wraps find_next_block_group() and sets the block_group to be in use.
|
||||
* discard_state's control flow is managed here. Variables related to
|
||||
* discard_state are reset here as needed (eg discard_cursor). @discard_state
|
||||
* and @discard_index are remembered as it may change while we're discarding,
|
||||
* but we want the discard to execute in the context determined here.
|
||||
*/
|
||||
static struct btrfs_block_group *peek_discard_list(
|
||||
struct btrfs_discard_ctl *discard_ctl,
|
||||
enum btrfs_discard_state *discard_state,
|
||||
int *discard_index)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
const u64 now = ktime_get_ns();
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
again:
|
||||
block_group = find_next_block_group(discard_ctl, now);
|
||||
|
||||
if (block_group && now > block_group->discard_eligible_time) {
|
||||
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
|
||||
block_group->used != 0) {
|
||||
if (btrfs_is_block_group_data_only(block_group))
|
||||
__add_to_discard_list(discard_ctl, block_group);
|
||||
else
|
||||
list_del_init(&block_group->discard_list);
|
||||
goto again;
|
||||
}
|
||||
if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
|
||||
block_group->discard_cursor = block_group->start;
|
||||
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
|
||||
}
|
||||
discard_ctl->block_group = block_group;
|
||||
*discard_state = block_group->discard_state;
|
||||
*discard_index = block_group->discard_index;
|
||||
} else {
|
||||
block_group = NULL;
|
||||
}
|
||||
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
|
||||
return block_group;
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_check_filter - updates a block groups filters
|
||||
* @block_group: block group of interest
|
||||
* @bytes: recently freed region size after coalescing
|
||||
*
|
||||
* Async discard maintains multiple lists with progressively smaller filters
|
||||
* to prioritize discarding based on size. Should a free space that matches
|
||||
* a larger filter be returned to the free_space_cache, prioritize that discard
|
||||
* by moving @block_group to the proper filter.
|
||||
*/
|
||||
void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
|
||||
u64 bytes)
|
||||
{
|
||||
struct btrfs_discard_ctl *discard_ctl;
|
||||
|
||||
if (!block_group ||
|
||||
!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
|
||||
return;
|
||||
|
||||
discard_ctl = &block_group->fs_info->discard_ctl;
|
||||
|
||||
if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
|
||||
bytes >= discard_minlen[block_group->discard_index - 1]) {
|
||||
int i;
|
||||
|
||||
remove_from_discard_list(discard_ctl, block_group);
|
||||
|
||||
for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
|
||||
i++) {
|
||||
if (bytes >= discard_minlen[i]) {
|
||||
block_group->discard_index = i;
|
||||
add_to_discard_list(discard_ctl, block_group);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_update_discard_index - moves a block group along the discard lists
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* Increment @block_group's discard_index. If it falls of the list, let it be.
|
||||
* Otherwise add it back to the appropriate list.
|
||||
*/
|
||||
static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
block_group->discard_index++;
|
||||
if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
|
||||
block_group->discard_index = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
add_to_discard_list(discard_ctl, block_group);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_cancel_work - remove a block_group from the discard lists
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* This removes @block_group from the discard lists. If necessary, it waits on
|
||||
* the current work and then reschedules the delayed work.
|
||||
*/
|
||||
void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
if (remove_from_discard_list(discard_ctl, block_group)) {
|
||||
cancel_delayed_work_sync(&discard_ctl->work);
|
||||
btrfs_discard_schedule_work(discard_ctl, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_queue_work - handles queuing the block_groups
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* This maintains the LRU order of the discard lists.
|
||||
*/
|
||||
void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
|
||||
return;
|
||||
|
||||
if (block_group->used == 0)
|
||||
add_to_discard_unused_list(discard_ctl, block_group);
|
||||
else
|
||||
add_to_discard_list(discard_ctl, block_group);
|
||||
|
||||
if (!delayed_work_pending(&discard_ctl->work))
|
||||
btrfs_discard_schedule_work(discard_ctl, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_schedule_work - responsible for scheduling the discard work
|
||||
* @discard_ctl: discard control
|
||||
* @override: override the current timer
|
||||
*
|
||||
* Discards are issued by a delayed workqueue item. @override is used to
|
||||
* update the current delay as the baseline delay interval is reevaluated on
|
||||
* transaction commit. This is also maxed with any other rate limit.
|
||||
*/
|
||||
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
bool override)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
const u64 now = ktime_get_ns();
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
|
||||
if (!btrfs_run_discard_work(discard_ctl))
|
||||
goto out;
|
||||
|
||||
if (!override && delayed_work_pending(&discard_ctl->work))
|
||||
goto out;
|
||||
|
||||
block_group = find_next_block_group(discard_ctl, now);
|
||||
if (block_group) {
|
||||
unsigned long delay = discard_ctl->delay;
|
||||
u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
|
||||
|
||||
/*
|
||||
* A single delayed workqueue item is responsible for
|
||||
* discarding, so we can manage the bytes rate limit by keeping
|
||||
* track of the previous discard.
|
||||
*/
|
||||
if (kbps_limit && discard_ctl->prev_discard) {
|
||||
u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
|
||||
u64 bps_delay = div64_u64(discard_ctl->prev_discard *
|
||||
MSEC_PER_SEC, bps_limit);
|
||||
|
||||
delay = max(delay, msecs_to_jiffies(bps_delay));
|
||||
}
|
||||
|
||||
/*
|
||||
* This timeout is to hopefully prevent immediate discarding
|
||||
* in a recently allocated block group.
|
||||
*/
|
||||
if (now < block_group->discard_eligible_time) {
|
||||
u64 bg_timeout = block_group->discard_eligible_time - now;
|
||||
|
||||
delay = max(delay, nsecs_to_jiffies(bg_timeout));
|
||||
}
|
||||
|
||||
mod_delayed_work(discard_ctl->discard_workers,
|
||||
&discard_ctl->work, delay);
|
||||
}
|
||||
out:
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_finish_discard_pass - determine next step of a block_group
|
||||
* @discard_ctl: discard control
|
||||
* @block_group: block_group of interest
|
||||
*
|
||||
* This determines the next step for a block group after it's finished going
|
||||
* through a pass on a discard list. If it is unused and fully trimmed, we can
|
||||
* mark it unused and send it to the unused_bgs path. Otherwise, pass it onto
|
||||
* the appropriate filter list or let it fall off.
|
||||
*/
|
||||
static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group)
|
||||
{
|
||||
remove_from_discard_list(discard_ctl, block_group);
|
||||
|
||||
if (block_group->used == 0) {
|
||||
if (btrfs_is_free_space_trimmed(block_group))
|
||||
btrfs_mark_bg_unused(block_group);
|
||||
else
|
||||
add_to_discard_unused_list(discard_ctl, block_group);
|
||||
} else {
|
||||
btrfs_update_discard_index(discard_ctl, block_group);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_workfn - discard work function
|
||||
* @work: work
|
||||
*
|
||||
* This finds the next block_group to start discarding and then discards a
|
||||
* single region. It does this in a two-pass fashion: first extents and second
|
||||
* bitmaps. Completely discarded block groups are sent to the unused_bgs path.
|
||||
*/
|
||||
static void btrfs_discard_workfn(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_discard_ctl *discard_ctl;
|
||||
struct btrfs_block_group *block_group;
|
||||
enum btrfs_discard_state discard_state;
|
||||
int discard_index = 0;
|
||||
u64 trimmed = 0;
|
||||
u64 minlen = 0;
|
||||
|
||||
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
|
||||
|
||||
block_group = peek_discard_list(discard_ctl, &discard_state,
|
||||
&discard_index);
|
||||
if (!block_group || !btrfs_run_discard_work(discard_ctl))
|
||||
return;
|
||||
|
||||
/* Perform discarding */
|
||||
minlen = discard_minlen[discard_index];
|
||||
|
||||
if (discard_state == BTRFS_DISCARD_BITMAPS) {
|
||||
u64 maxlen = 0;
|
||||
|
||||
/*
|
||||
* Use the previous levels minimum discard length as the max
|
||||
* length filter. In the case something is added to make a
|
||||
* region go beyond the max filter, the entire bitmap is set
|
||||
* back to BTRFS_TRIM_STATE_UNTRIMMED.
|
||||
*/
|
||||
if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
|
||||
maxlen = discard_minlen[discard_index - 1];
|
||||
|
||||
btrfs_trim_block_group_bitmaps(block_group, &trimmed,
|
||||
block_group->discard_cursor,
|
||||
btrfs_block_group_end(block_group),
|
||||
minlen, maxlen, true);
|
||||
discard_ctl->discard_bitmap_bytes += trimmed;
|
||||
} else {
|
||||
btrfs_trim_block_group_extents(block_group, &trimmed,
|
||||
block_group->discard_cursor,
|
||||
btrfs_block_group_end(block_group),
|
||||
minlen, true);
|
||||
discard_ctl->discard_extent_bytes += trimmed;
|
||||
}
|
||||
|
||||
discard_ctl->prev_discard = trimmed;
|
||||
|
||||
/* Determine next steps for a block_group */
|
||||
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
|
||||
if (discard_state == BTRFS_DISCARD_BITMAPS) {
|
||||
btrfs_finish_discard_pass(discard_ctl, block_group);
|
||||
} else {
|
||||
block_group->discard_cursor = block_group->start;
|
||||
spin_lock(&discard_ctl->lock);
|
||||
if (block_group->discard_state !=
|
||||
BTRFS_DISCARD_RESET_CURSOR)
|
||||
block_group->discard_state =
|
||||
BTRFS_DISCARD_BITMAPS;
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
}
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
discard_ctl->block_group = NULL;
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
|
||||
btrfs_discard_schedule_work(discard_ctl, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_run_discard_work - determines if async discard should be running
|
||||
* @discard_ctl: discard control
|
||||
*
|
||||
* Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
|
||||
*/
|
||||
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
|
||||
struct btrfs_fs_info,
|
||||
discard_ctl);
|
||||
|
||||
return (!(fs_info->sb->s_flags & SB_RDONLY) &&
|
||||
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_calc_delay - recalculate the base delay
|
||||
* @discard_ctl: discard control
|
||||
*
|
||||
* Recalculate the base delay which is based off the total number of
|
||||
* discardable_extents. Clamp this between the lower_limit (iops_limit or 1ms)
|
||||
* and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
|
||||
*/
|
||||
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
|
||||
{
|
||||
s32 discardable_extents;
|
||||
s64 discardable_bytes;
|
||||
u32 iops_limit;
|
||||
unsigned long delay;
|
||||
unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC;
|
||||
|
||||
discardable_extents = atomic_read(&discard_ctl->discardable_extents);
|
||||
if (!discardable_extents)
|
||||
return;
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
|
||||
/*
|
||||
* The following is to fix a potential -1 discrepenancy that we're not
|
||||
* sure how to reproduce. But given that this is the only place that
|
||||
* utilizes these numbers and this is only called by from
|
||||
* btrfs_finish_extent_commit() which is synchronized, we can correct
|
||||
* here.
|
||||
*/
|
||||
if (discardable_extents < 0)
|
||||
atomic_add(-discardable_extents,
|
||||
&discard_ctl->discardable_extents);
|
||||
|
||||
discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
|
||||
if (discardable_bytes < 0)
|
||||
atomic64_add(-discardable_bytes,
|
||||
&discard_ctl->discardable_bytes);
|
||||
|
||||
if (discardable_extents <= 0) {
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
iops_limit = READ_ONCE(discard_ctl->iops_limit);
|
||||
if (iops_limit)
|
||||
lower_limit = max_t(unsigned long, lower_limit,
|
||||
MSEC_PER_SEC / iops_limit);
|
||||
|
||||
delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
|
||||
delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC);
|
||||
discard_ctl->delay = msecs_to_jiffies(delay);
|
||||
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_update_discardable - propagate discard counters
|
||||
* @block_group: block_group of interest
|
||||
* @ctl: free_space_ctl of @block_group
|
||||
*
|
||||
* This propagates deltas of counters up to the discard_ctl. It maintains a
|
||||
* current counter and a previous counter passing the delta up to the global
|
||||
* stat. Then the current counter value becomes the previous counter value.
|
||||
*/
|
||||
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
|
||||
struct btrfs_free_space_ctl *ctl)
|
||||
{
|
||||
struct btrfs_discard_ctl *discard_ctl;
|
||||
s32 extents_delta;
|
||||
s64 bytes_delta;
|
||||
|
||||
if (!block_group ||
|
||||
!btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
|
||||
!btrfs_is_block_group_data_only(block_group))
|
||||
return;
|
||||
|
||||
discard_ctl = &block_group->fs_info->discard_ctl;
|
||||
|
||||
extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
|
||||
ctl->discardable_extents[BTRFS_STAT_PREV];
|
||||
if (extents_delta) {
|
||||
atomic_add(extents_delta, &discard_ctl->discardable_extents);
|
||||
ctl->discardable_extents[BTRFS_STAT_PREV] =
|
||||
ctl->discardable_extents[BTRFS_STAT_CURR];
|
||||
}
|
||||
|
||||
bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
|
||||
ctl->discardable_bytes[BTRFS_STAT_PREV];
|
||||
if (bytes_delta) {
|
||||
atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
|
||||
ctl->discardable_bytes[BTRFS_STAT_PREV] =
|
||||
ctl->discardable_bytes[BTRFS_STAT_CURR];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
|
||||
* @fs_info: fs_info of interest
|
||||
*
|
||||
* The unused_bgs list needs to be punted to the discard lists because the
|
||||
* order of operations is changed. In the normal sychronous discard path, the
|
||||
* block groups are trimmed via a single large trim in transaction commit. This
|
||||
* is ultimately what we are trying to avoid with asynchronous discard. Thus,
|
||||
* it must be done before going down the unused_bgs path.
|
||||
*/
|
||||
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_block_group *block_group, *next;
|
||||
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
/* We enabled async discard, so punt all to the queue */
|
||||
list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
|
||||
bg_list) {
|
||||
list_del_init(&block_group->bg_list);
|
||||
btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
|
||||
}
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* btrfs_discard_purge_list - purge discard lists
|
||||
* @discard_ctl: discard control
|
||||
*
|
||||
* If we are disabling async discard, we may have intercepted block groups that
|
||||
* are completely free and ready for the unused_bgs path. As discarding will
|
||||
* now happen in transaction commit or not at all, we can safely mark the
|
||||
* corresponding block groups as unused and they will be sent on their merry
|
||||
* way to the unused_bgs list.
|
||||
*/
|
||||
static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
|
||||
{
|
||||
struct btrfs_block_group *block_group, *next;
|
||||
int i;
|
||||
|
||||
spin_lock(&discard_ctl->lock);
|
||||
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
|
||||
list_for_each_entry_safe(block_group, next,
|
||||
&discard_ctl->discard_list[i],
|
||||
discard_list) {
|
||||
list_del_init(&block_group->discard_list);
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
if (block_group->used == 0)
|
||||
btrfs_mark_bg_unused(block_group);
|
||||
spin_lock(&discard_ctl->lock);
|
||||
}
|
||||
}
|
||||
spin_unlock(&discard_ctl->lock);
|
||||
}
|
||||
|
||||
void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
|
||||
btrfs_discard_cleanup(fs_info);
|
||||
return;
|
||||
}
|
||||
|
||||
btrfs_discard_punt_unused_bgs_list(fs_info);
|
||||
|
||||
set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
|
||||
}
|
||||
|
||||
void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
|
||||
}
|
||||
|
||||
void btrfs_discard_init(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||
int i;
|
||||
|
||||
spin_lock_init(&discard_ctl->lock);
|
||||
INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
|
||||
|
||||
for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
|
||||
INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
|
||||
|
||||
discard_ctl->prev_discard = 0;
|
||||
atomic_set(&discard_ctl->discardable_extents, 0);
|
||||
atomic64_set(&discard_ctl->discardable_bytes, 0);
|
||||
discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
|
||||
discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC;
|
||||
discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
|
||||
discard_ctl->kbps_limit = 0;
|
||||
discard_ctl->discard_extent_bytes = 0;
|
||||
discard_ctl->discard_bitmap_bytes = 0;
|
||||
atomic64_set(&discard_ctl->discard_bytes_saved, 0);
|
||||
}
|
||||
|
||||
void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_discard_stop(fs_info);
|
||||
cancel_delayed_work_sync(&fs_info->discard_ctl.work);
|
||||
btrfs_discard_purge_list(&fs_info->discard_ctl);
|
||||
}
|
41
fs/btrfs/discard.h
Normal file
41
fs/btrfs/discard.h
Normal file
@ -0,0 +1,41 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#ifndef BTRFS_DISCARD_H
|
||||
#define BTRFS_DISCARD_H
|
||||
|
||||
#include <linux/sizes.h>
|
||||
|
||||
struct btrfs_fs_info;
|
||||
struct btrfs_discard_ctl;
|
||||
struct btrfs_block_group;
|
||||
|
||||
/* Discard size limits */
|
||||
#define BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE (SZ_64M)
|
||||
#define BTRFS_ASYNC_DISCARD_MAX_FILTER (SZ_1M)
|
||||
#define BTRFS_ASYNC_DISCARD_MIN_FILTER (SZ_32K)
|
||||
|
||||
/* List operations */
|
||||
void btrfs_discard_check_filter(struct btrfs_block_group *block_group, u64 bytes);
|
||||
|
||||
/* Work operations */
|
||||
void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group);
|
||||
void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
struct btrfs_block_group *block_group);
|
||||
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
|
||||
bool override);
|
||||
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
|
||||
|
||||
/* Update operations */
|
||||
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);
|
||||
void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
|
||||
struct btrfs_free_space_ctl *ctl);
|
||||
|
||||
/* Setup/cleanup operations */
|
||||
void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_discard_resume(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_discard_stop(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_discard_init(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info);
|
||||
|
||||
#endif
|
@ -41,6 +41,7 @@
|
||||
#include "tree-checker.h"
|
||||
#include "ref-verify.h"
|
||||
#include "block-group.h"
|
||||
#include "discard.h"
|
||||
|
||||
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
|
||||
BTRFS_HEADER_FLAG_RELOC |\
|
||||
@ -202,8 +203,8 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
|
||||
* that covers the entire device
|
||||
*/
|
||||
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
|
||||
struct page *page, size_t pg_offset, u64 start, u64 len,
|
||||
int create)
|
||||
struct page *page, size_t pg_offset,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &inode->extent_tree;
|
||||
struct extent_map *em;
|
||||
@ -1953,6 +1954,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
|
||||
btrfs_destroy_workqueue(fs_info->readahead_workers);
|
||||
btrfs_destroy_workqueue(fs_info->flush_workers);
|
||||
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
|
||||
if (fs_info->discard_ctl.discard_workers)
|
||||
destroy_workqueue(fs_info->discard_ctl.discard_workers);
|
||||
/*
|
||||
* Now that all other work queues are destroyed, we can safely destroy
|
||||
* the queues used for metadata I/O, since tasks from those other work
|
||||
@ -2148,6 +2151,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
|
||||
max_active, 2);
|
||||
fs_info->qgroup_rescan_workers =
|
||||
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
|
||||
fs_info->discard_ctl.discard_workers =
|
||||
alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1);
|
||||
|
||||
if (!(fs_info->workers && fs_info->delalloc_workers &&
|
||||
fs_info->flush_workers &&
|
||||
@ -2158,7 +2163,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
|
||||
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
|
||||
fs_info->caching_workers && fs_info->readahead_workers &&
|
||||
fs_info->fixup_workers && fs_info->delayed_workers &&
|
||||
fs_info->qgroup_rescan_workers)) {
|
||||
fs_info->qgroup_rescan_workers &&
|
||||
fs_info->discard_ctl.discard_workers)) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -2792,6 +2798,7 @@ int __cold open_ctree(struct super_block *sb,
|
||||
|
||||
btrfs_init_dev_replace_locks(fs_info);
|
||||
btrfs_init_qgroup(fs_info);
|
||||
btrfs_discard_init(fs_info);
|
||||
|
||||
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
|
||||
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
|
||||
@ -3082,20 +3089,13 @@ int __cold open_ctree(struct super_block *sb,
|
||||
|
||||
btrfs_free_extra_devids(fs_devices, 1);
|
||||
|
||||
ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
|
||||
ret = btrfs_sysfs_add_fsid(fs_devices);
|
||||
if (ret) {
|
||||
btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
|
||||
ret);
|
||||
goto fail_block_groups;
|
||||
}
|
||||
|
||||
ret = btrfs_sysfs_add_device(fs_devices);
|
||||
if (ret) {
|
||||
btrfs_err(fs_info, "failed to init sysfs device interface: %d",
|
||||
ret);
|
||||
goto fail_fsdev_sysfs;
|
||||
}
|
||||
|
||||
ret = btrfs_sysfs_add_mounted(fs_info);
|
||||
if (ret) {
|
||||
btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
|
||||
@ -3262,6 +3262,7 @@ int __cold open_ctree(struct super_block *sb,
|
||||
}
|
||||
|
||||
btrfs_qgroup_rescan_resume(fs_info);
|
||||
btrfs_discard_resume(fs_info);
|
||||
|
||||
if (!fs_info->uuid_root) {
|
||||
btrfs_info(fs_info, "creating UUID tree");
|
||||
@ -3978,6 +3979,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
|
||||
cancel_work_sync(&fs_info->async_reclaim_work);
|
||||
|
||||
/* Cancel or finish ongoing discard work */
|
||||
btrfs_discard_cleanup(fs_info);
|
||||
|
||||
if (!sb_rdonly(fs_info->sb)) {
|
||||
/*
|
||||
* The cleaner kthread is stopped, so do one final pass over
|
||||
@ -4026,11 +4030,18 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
|
||||
btrfs_stop_all_workers(fs_info);
|
||||
|
||||
btrfs_free_block_groups(fs_info);
|
||||
|
||||
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
|
||||
free_root_pointers(fs_info, true);
|
||||
|
||||
/*
|
||||
* We must free the block groups after dropping the fs_roots as we could
|
||||
* have had an IO error and have left over tree log blocks that aren't
|
||||
* cleaned up until the fs roots are freed. This makes the block group
|
||||
* accounting appear to be wrong because there's pending reserved bytes,
|
||||
* so make sure we do the block group cleanup afterwards.
|
||||
*/
|
||||
btrfs_free_block_groups(fs_info);
|
||||
|
||||
iput(fs_info->btree_inode);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
|
@ -134,8 +134,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||
int btree_lock_page_hook(struct page *page, void *data,
|
||||
void (*flush_fn)(void *));
|
||||
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
|
||||
struct page *page, size_t pg_offset, u64 start, u64 len,
|
||||
int create);
|
||||
struct page *page, size_t pg_offset,
|
||||
u64 start, u64 len);
|
||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
||||
int __init btrfs_end_io_wq_init(void);
|
||||
void __cold btrfs_end_io_wq_exit(void);
|
||||
|
@ -32,6 +32,7 @@
|
||||
#include "block-rsv.h"
|
||||
#include "delalloc-space.h"
|
||||
#include "block-group.h"
|
||||
#include "discard.h"
|
||||
|
||||
#undef SCRAMBLE_DELAYED_REFS
|
||||
|
||||
@ -2923,7 +2924,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
||||
break;
|
||||
}
|
||||
|
||||
if (btrfs_test_opt(fs_info, DISCARD))
|
||||
if (btrfs_test_opt(fs_info, DISCARD_SYNC))
|
||||
ret = btrfs_discard_extent(fs_info, start,
|
||||
end + 1 - start, NULL);
|
||||
|
||||
@ -2934,6 +2935,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
if (btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
|
||||
btrfs_discard_calc_delay(&fs_info->discard_ctl);
|
||||
btrfs_discard_schedule_work(&fs_info->discard_ctl, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Transaction is finished. We don't need the lock anymore. We
|
||||
* do need to clean up the block groups in case of a transaction
|
||||
@ -3438,7 +3444,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
|
||||
*/
|
||||
struct find_free_extent_ctl {
|
||||
/* Basic allocation info */
|
||||
u64 ram_bytes;
|
||||
u64 num_bytes;
|
||||
u64 empty_size;
|
||||
u64 flags;
|
||||
@ -3810,7 +3815,6 @@ static noinline int find_free_extent(struct btrfs_fs_info *fs_info,
|
||||
|
||||
WARN_ON(num_bytes < fs_info->sectorsize);
|
||||
|
||||
ffe_ctl.ram_bytes = ram_bytes;
|
||||
ffe_ctl.num_bytes = num_bytes;
|
||||
ffe_ctl.empty_size = empty_size;
|
||||
ffe_ctl.flags = flags;
|
||||
@ -4165,12 +4169,10 @@ again:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len,
|
||||
int pin, int delalloc)
|
||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len, int delalloc)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
int ret = 0;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, start);
|
||||
if (!cache) {
|
||||
@ -4179,32 +4181,30 @@ static int __btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
if (pin)
|
||||
pin_down_extent(cache, start, len, 1);
|
||||
else {
|
||||
if (btrfs_test_opt(fs_info, DISCARD))
|
||||
ret = btrfs_discard_extent(fs_info, start, len, NULL);
|
||||
btrfs_add_free_space(cache, start, len);
|
||||
btrfs_free_reserved_bytes(cache, len, delalloc);
|
||||
trace_btrfs_reserved_extent_free(fs_info, start, len);
|
||||
}
|
||||
btrfs_add_free_space(cache, start, len);
|
||||
btrfs_free_reserved_bytes(cache, len, delalloc);
|
||||
trace_btrfs_reserved_extent_free(fs_info, start, len);
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_pin_reserved_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
int ret = 0;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, start);
|
||||
if (!cache) {
|
||||
btrfs_err(fs_info, "unable to find block group for %llu", start);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
ret = pin_down_extent(cache, start, len, 1);
|
||||
btrfs_put_block_group(cache);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len, int delalloc)
|
||||
{
|
||||
return __btrfs_free_reserved_extent(fs_info, start, len, 0, delalloc);
|
||||
}
|
||||
|
||||
int btrfs_free_and_pin_reserved_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
return __btrfs_free_reserved_extent(fs_info, start, len, 1, 0);
|
||||
}
|
||||
|
||||
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
u64 parent, u64 root_objectid,
|
||||
u64 flags, u64 owner, u64 offset,
|
||||
|
@ -3043,7 +3043,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
|
||||
*em_cached = NULL;
|
||||
}
|
||||
|
||||
em = get_extent(BTRFS_I(inode), page, pg_offset, start, len, 0);
|
||||
em = get_extent(BTRFS_I(inode), page, pg_offset, start, len);
|
||||
if (em_cached && !IS_ERR_OR_NULL(em)) {
|
||||
BUG_ON(*em_cached);
|
||||
refcount_inc(&em->refs);
|
||||
@ -3455,11 +3455,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
update_nr_written(wbc, nr_written + 1);
|
||||
|
||||
end = page_end;
|
||||
if (i_size <= start) {
|
||||
btrfs_writepage_endio_finish_ordered(page, start, page_end, 1);
|
||||
goto done;
|
||||
}
|
||||
|
||||
blocksize = inode->i_sb->s_blocksize;
|
||||
|
||||
while (cur <= end) {
|
||||
@ -3471,8 +3466,8 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
page_end, 1);
|
||||
break;
|
||||
}
|
||||
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, cur,
|
||||
end - cur + 1, 1);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur,
|
||||
end - cur + 1);
|
||||
if (IS_ERR_OR_NULL(em)) {
|
||||
SetPageError(page);
|
||||
ret = PTR_ERR_OR_ZERO(em);
|
||||
@ -3497,22 +3492,11 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
*/
|
||||
if (compressed || block_start == EXTENT_MAP_HOLE ||
|
||||
block_start == EXTENT_MAP_INLINE) {
|
||||
/*
|
||||
* end_io notification does not happen here for
|
||||
* compressed extents
|
||||
*/
|
||||
if (!compressed)
|
||||
btrfs_writepage_endio_finish_ordered(page, cur,
|
||||
cur + iosize - 1,
|
||||
1);
|
||||
else if (compressed) {
|
||||
/* we don't want to end_page_writeback on
|
||||
* a compressed extent. this happens
|
||||
* elsewhere
|
||||
*/
|
||||
if (compressed)
|
||||
nr++;
|
||||
}
|
||||
|
||||
else
|
||||
btrfs_writepage_endio_finish_ordered(page, cur,
|
||||
cur + iosize - 1, 1);
|
||||
cur += iosize;
|
||||
pg_offset += iosize;
|
||||
continue;
|
||||
@ -3540,7 +3524,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
pg_offset += iosize;
|
||||
nr++;
|
||||
}
|
||||
done:
|
||||
*nr_ret = nr;
|
||||
return ret;
|
||||
}
|
||||
@ -3562,7 +3545,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
||||
u64 page_end = start + PAGE_SIZE - 1;
|
||||
int ret;
|
||||
int nr = 0;
|
||||
size_t pg_offset = 0;
|
||||
size_t pg_offset;
|
||||
loff_t i_size = i_size_read(inode);
|
||||
unsigned long end_index = i_size >> PAGE_SHIFT;
|
||||
unsigned long nr_written = 0;
|
||||
@ -3591,14 +3574,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
||||
flush_dcache_page(page);
|
||||
}
|
||||
|
||||
pg_offset = 0;
|
||||
|
||||
set_page_extent_mapped(page);
|
||||
|
||||
if (!epd->extent_locked) {
|
||||
ret = writepage_delalloc(inode, page, wbc, start, &nr_written);
|
||||
if (ret == 1)
|
||||
goto done_unlocked;
|
||||
return 0;
|
||||
if (ret)
|
||||
goto done;
|
||||
}
|
||||
@ -3606,7 +3587,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
|
||||
ret = __extent_writepage_io(inode, page, wbc, epd,
|
||||
i_size, nr_written, &nr);
|
||||
if (ret == 1)
|
||||
goto done_unlocked;
|
||||
return 0;
|
||||
|
||||
done:
|
||||
if (nr == 0) {
|
||||
@ -3621,9 +3602,6 @@ done:
|
||||
unlock_page(page);
|
||||
ASSERT(ret <= 0);
|
||||
return ret;
|
||||
|
||||
done_unlocked:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void wait_on_extent_buffer_writeback(struct extent_buffer *eb)
|
||||
@ -3941,6 +3919,11 @@ int btree_write_cache_pages(struct address_space *mapping,
|
||||
if (wbc->range_cyclic) {
|
||||
index = mapping->writeback_index; /* Start from prev offset */
|
||||
end = -1;
|
||||
/*
|
||||
* Start from the beginning does not need to cycle over the
|
||||
* range, mark it as scanned.
|
||||
*/
|
||||
scanned = (index == 0);
|
||||
} else {
|
||||
index = wbc->range_start >> PAGE_SHIFT;
|
||||
end = wbc->range_end >> PAGE_SHIFT;
|
||||
@ -3958,7 +3941,6 @@ retry:
|
||||
tag))) {
|
||||
unsigned i;
|
||||
|
||||
scanned = 1;
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
@ -4087,6 +4069,11 @@ static int extent_write_cache_pages(struct address_space *mapping,
|
||||
if (wbc->range_cyclic) {
|
||||
index = mapping->writeback_index; /* Start from prev offset */
|
||||
end = -1;
|
||||
/*
|
||||
* Start from the beginning does not need to cycle over the
|
||||
* range, mark it as scanned.
|
||||
*/
|
||||
scanned = (index == 0);
|
||||
} else {
|
||||
index = wbc->range_start >> PAGE_SHIFT;
|
||||
end = wbc->range_end >> PAGE_SHIFT;
|
||||
@ -4120,7 +4107,6 @@ retry:
|
||||
&index, end, tag))) {
|
||||
unsigned i;
|
||||
|
||||
scanned = 1;
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
|
@ -183,10 +183,8 @@ static inline int extent_compress_type(unsigned long bio_flags)
|
||||
struct extent_map_tree;
|
||||
|
||||
typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode,
|
||||
struct page *page,
|
||||
size_t pg_offset,
|
||||
u64 start, u64 len,
|
||||
int create);
|
||||
struct page *page, size_t pg_offset,
|
||||
u64 start, u64 len);
|
||||
|
||||
int try_release_extent_mapping(struct page *page, gfp_t mask);
|
||||
int try_release_extent_buffer(struct page *page);
|
||||
|
@ -148,8 +148,19 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||
u64 logical_offset, u8 *dst, int dio)
|
||||
/**
|
||||
* btrfs_lookup_bio_sums - Look up checksums for a bio.
|
||||
* @inode: inode that the bio is for.
|
||||
* @bio: bio embedded in btrfs_io_bio.
|
||||
* @offset: Unless (u64)-1, look up checksums for this offset in the file.
|
||||
* If (u64)-1, use the page offsets from the bio instead.
|
||||
* @dst: Buffer of size btrfs_super_csum_size() used to return checksum. If
|
||||
* NULL, the checksum is returned in btrfs_io_bio(bio)->csum instead.
|
||||
*
|
||||
* Return: BLK_STS_RESOURCE if allocating memory fails, BLK_STS_OK otherwise.
|
||||
*/
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||
u64 offset, u8 *dst)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct bio_vec bvec;
|
||||
@ -158,8 +169,8 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
|
||||
struct btrfs_csum_item *item = NULL;
|
||||
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
|
||||
struct btrfs_path *path;
|
||||
const bool page_offsets = (offset == (u64)-1);
|
||||
u8 *csum;
|
||||
u64 offset = 0;
|
||||
u64 item_start_offset = 0;
|
||||
u64 item_last_offset = 0;
|
||||
u64 disk_bytenr;
|
||||
@ -205,15 +216,13 @@ static blk_status_t __btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio
|
||||
}
|
||||
|
||||
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
|
||||
if (dio)
|
||||
offset = logical_offset;
|
||||
|
||||
bio_for_each_segment(bvec, bio, iter) {
|
||||
page_bytes_left = bvec.bv_len;
|
||||
if (count)
|
||||
goto next;
|
||||
|
||||
if (!dio)
|
||||
if (page_offsets)
|
||||
offset = page_offset(bvec.bv_page) + bvec.bv_offset;
|
||||
count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
|
||||
csum, nblocks);
|
||||
@ -274,7 +283,8 @@ found:
|
||||
csum += count * csum_size;
|
||||
nblocks -= count;
|
||||
next:
|
||||
while (count--) {
|
||||
while (count > 0) {
|
||||
count--;
|
||||
disk_bytenr += fs_info->sectorsize;
|
||||
offset += fs_info->sectorsize;
|
||||
page_bytes_left -= fs_info->sectorsize;
|
||||
@ -285,18 +295,7 @@ next:
|
||||
|
||||
WARN_ON_ONCE(count);
|
||||
btrfs_free_path(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio,
|
||||
u8 *dst)
|
||||
{
|
||||
return __btrfs_lookup_bio_sums(inode, bio, 0, dst, 0);
|
||||
}
|
||||
|
||||
blk_status_t btrfs_lookup_bio_sums_dio(struct inode *inode, struct bio *bio, u64 offset)
|
||||
{
|
||||
return __btrfs_lookup_bio_sums(inode, bio, offset, NULL, 1);
|
||||
return BLK_STS_OK;
|
||||
}
|
||||
|
||||
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
@ -483,8 +482,8 @@ blk_status_t btrfs_csum_one_bio(struct inode *inode, struct bio *bio,
|
||||
- 1);
|
||||
|
||||
for (i = 0; i < nr_sectors; i++) {
|
||||
if (offset >= ordered->file_offset + ordered->len ||
|
||||
offset < ordered->file_offset) {
|
||||
if (offset >= ordered->file_offset + ordered->num_bytes ||
|
||||
offset < ordered->file_offset) {
|
||||
unsigned long bytes_left;
|
||||
|
||||
sums->len = this_sum_bytes;
|
||||
|
@ -477,8 +477,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
|
||||
u64 em_len;
|
||||
int ret = 0;
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, 0, search_start,
|
||||
search_len, 0);
|
||||
em = btrfs_get_extent(inode, NULL, 0, search_start, search_len);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
@ -1501,7 +1500,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
||||
ordered = btrfs_lookup_ordered_range(inode, start_pos,
|
||||
last_pos - start_pos + 1);
|
||||
if (ordered &&
|
||||
ordered->file_offset + ordered->len > start_pos &&
|
||||
ordered->file_offset + ordered->num_bytes > start_pos &&
|
||||
ordered->file_offset <= last_pos) {
|
||||
unlock_extent_cached(&inode->io_tree, start_pos,
|
||||
last_pos, cached_state);
|
||||
@ -2390,7 +2389,7 @@ static int find_first_non_hole(struct inode *inode, u64 *start, u64 *len)
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
||||
round_down(*start, fs_info->sectorsize),
|
||||
round_up(*len, fs_info->sectorsize), 0);
|
||||
round_up(*len, fs_info->sectorsize));
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
@ -2426,7 +2425,7 @@ static int btrfs_punch_hole_lock_range(struct inode *inode,
|
||||
* we need to try again.
|
||||
*/
|
||||
if ((!ordered ||
|
||||
(ordered->file_offset + ordered->len <= lockstart ||
|
||||
(ordered->file_offset + ordered->num_bytes <= lockstart ||
|
||||
ordered->file_offset > lockend)) &&
|
||||
!filemap_range_has_page(inode->i_mapping,
|
||||
lockstart, lockend)) {
|
||||
@ -2957,7 +2956,7 @@ static int btrfs_zero_range_check_range_boundary(struct inode *inode,
|
||||
int ret;
|
||||
|
||||
offset = round_down(offset, sectorsize);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
@ -2990,8 +2989,8 @@ static int btrfs_zero_range(struct inode *inode,
|
||||
|
||||
inode_dio_wait(inode);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
||||
alloc_start, alloc_end - alloc_start, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
|
||||
alloc_end - alloc_start);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto out;
|
||||
@ -3034,8 +3033,8 @@ static int btrfs_zero_range(struct inode *inode,
|
||||
|
||||
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
|
||||
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0,
|
||||
alloc_start, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
|
||||
sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
goto out;
|
||||
@ -3248,7 +3247,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
ordered = btrfs_lookup_first_ordered_extent(inode, locked_end);
|
||||
|
||||
if (ordered &&
|
||||
ordered->file_offset + ordered->len > alloc_start &&
|
||||
ordered->file_offset + ordered->num_bytes > alloc_start &&
|
||||
ordered->file_offset < alloc_end) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
|
||||
@ -3273,7 +3272,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
INIT_LIST_HEAD(&reserve_list);
|
||||
while (cur_offset < alloc_end) {
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
|
||||
alloc_end - cur_offset, 0);
|
||||
alloc_end - cur_offset);
|
||||
if (IS_ERR(em)) {
|
||||
ret = PTR_ERR(em);
|
||||
break;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -6,6 +6,20 @@
|
||||
#ifndef BTRFS_FREE_SPACE_CACHE_H
|
||||
#define BTRFS_FREE_SPACE_CACHE_H
|
||||
|
||||
/*
|
||||
* This is the trim state of an extent or bitmap.
|
||||
*
|
||||
* BTRFS_TRIM_STATE_TRIMMING is special and used to maintain the state of a
|
||||
* bitmap as we may need several trims to fully trim a single bitmap entry.
|
||||
* This is reset should any free space other than trimmed space be added to the
|
||||
* bitmap.
|
||||
*/
|
||||
enum btrfs_trim_state {
|
||||
BTRFS_TRIM_STATE_UNTRIMMED,
|
||||
BTRFS_TRIM_STATE_TRIMMED,
|
||||
BTRFS_TRIM_STATE_TRIMMING,
|
||||
};
|
||||
|
||||
struct btrfs_free_space {
|
||||
struct rb_node offset_index;
|
||||
u64 offset;
|
||||
@ -13,8 +27,21 @@ struct btrfs_free_space {
|
||||
u64 max_extent_size;
|
||||
unsigned long *bitmap;
|
||||
struct list_head list;
|
||||
enum btrfs_trim_state trim_state;
|
||||
s32 bitmap_extents;
|
||||
};
|
||||
|
||||
static inline bool btrfs_free_space_trimmed(struct btrfs_free_space *info)
|
||||
{
|
||||
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMED);
|
||||
}
|
||||
|
||||
static inline bool btrfs_free_space_trimming_bitmap(
|
||||
struct btrfs_free_space *info)
|
||||
{
|
||||
return (info->trim_state == BTRFS_TRIM_STATE_TRIMMING);
|
||||
}
|
||||
|
||||
struct btrfs_free_space_ctl {
|
||||
spinlock_t tree_lock;
|
||||
struct rb_root free_space_offset;
|
||||
@ -24,6 +51,8 @@ struct btrfs_free_space_ctl {
|
||||
int total_bitmaps;
|
||||
int unit;
|
||||
u64 start;
|
||||
s32 discardable_extents[BTRFS_STAT_NR_ENTRIES];
|
||||
s64 discardable_bytes[BTRFS_STAT_NR_ENTRIES];
|
||||
const struct btrfs_free_space_op *op;
|
||||
void *private;
|
||||
struct mutex cache_writeout_mutex;
|
||||
@ -83,13 +112,17 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
|
||||
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group);
|
||||
int __btrfs_add_free_space(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_free_space_ctl *ctl,
|
||||
u64 bytenr, u64 size);
|
||||
u64 bytenr, u64 size,
|
||||
enum btrfs_trim_state trim_state);
|
||||
int btrfs_add_free_space(struct btrfs_block_group *block_group,
|
||||
u64 bytenr, u64 size);
|
||||
int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group,
|
||||
u64 bytenr, u64 size);
|
||||
int btrfs_remove_free_space(struct btrfs_block_group *block_group,
|
||||
u64 bytenr, u64 size);
|
||||
void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
|
||||
void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group);
|
||||
bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group);
|
||||
u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group,
|
||||
u64 offset, u64 bytes, u64 empty_size,
|
||||
u64 *max_extent_size);
|
||||
@ -108,6 +141,12 @@ int btrfs_return_cluster_to_free_space(
|
||||
struct btrfs_free_cluster *cluster);
|
||||
int btrfs_trim_block_group(struct btrfs_block_group *block_group,
|
||||
u64 *trimmed, u64 start, u64 end, u64 minlen);
|
||||
int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group,
|
||||
u64 *trimmed, u64 start, u64 end, u64 minlen,
|
||||
bool async);
|
||||
int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group,
|
||||
u64 *trimmed, u64 start, u64 end, u64 minlen,
|
||||
u64 maxlen, bool async);
|
||||
|
||||
/* Support functions for running our sanity tests */
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
|
@ -107,7 +107,7 @@ again:
|
||||
|
||||
if (last != (u64)-1 && last + 1 != key.objectid) {
|
||||
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
||||
key.objectid - last - 1);
|
||||
key.objectid - last - 1, 0);
|
||||
wake_up(&root->ino_cache_wait);
|
||||
}
|
||||
|
||||
@ -118,7 +118,7 @@ next:
|
||||
|
||||
if (last < root->highest_objectid - 1) {
|
||||
__btrfs_add_free_space(fs_info, ctl, last + 1,
|
||||
root->highest_objectid - last - 1);
|
||||
root->highest_objectid - last - 1, 0);
|
||||
}
|
||||
|
||||
spin_lock(&root->ino_cache_lock);
|
||||
@ -175,7 +175,8 @@ static void start_caching(struct btrfs_root *root)
|
||||
ret = btrfs_find_free_objectid(root, &objectid);
|
||||
if (!ret && objectid <= BTRFS_LAST_FREE_OBJECTID) {
|
||||
__btrfs_add_free_space(fs_info, ctl, objectid,
|
||||
BTRFS_LAST_FREE_OBJECTID - objectid + 1);
|
||||
BTRFS_LAST_FREE_OBJECTID - objectid + 1,
|
||||
0);
|
||||
wake_up(&root->ino_cache_wait);
|
||||
}
|
||||
|
||||
@ -221,7 +222,7 @@ void btrfs_return_ino(struct btrfs_root *root, u64 objectid)
|
||||
return;
|
||||
again:
|
||||
if (root->ino_cache_state == BTRFS_CACHE_FINISHED) {
|
||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1);
|
||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
|
||||
} else {
|
||||
down_write(&fs_info->commit_root_sem);
|
||||
spin_lock(&root->ino_cache_lock);
|
||||
@ -234,7 +235,7 @@ again:
|
||||
|
||||
start_caching(root);
|
||||
|
||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1);
|
||||
__btrfs_add_free_space(fs_info, pinned, objectid, 1, 0);
|
||||
|
||||
up_write(&fs_info->commit_root_sem);
|
||||
}
|
||||
@ -281,7 +282,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
|
||||
spin_unlock(rbroot_lock);
|
||||
if (count)
|
||||
__btrfs_add_free_space(root->fs_info, ctl,
|
||||
info->offset, count);
|
||||
info->offset, count, 0);
|
||||
kmem_cache_free(btrfs_free_space_cachep, info);
|
||||
}
|
||||
}
|
||||
|
834
fs/btrfs/inode.c
834
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
@ -1128,7 +1128,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
|
||||
|
||||
/* get the big lock and read metadata off disk */
|
||||
lock_extent_bits(io_tree, start, end, &cached);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len);
|
||||
unlock_extent_cached(io_tree, start, end, &cached);
|
||||
|
||||
if (IS_ERR(em))
|
||||
|
@ -20,9 +20,9 @@ static struct kmem_cache *btrfs_ordered_extent_cache;
|
||||
|
||||
static u64 entry_end(struct btrfs_ordered_extent *entry)
|
||||
{
|
||||
if (entry->file_offset + entry->len < entry->file_offset)
|
||||
if (entry->file_offset + entry->num_bytes < entry->file_offset)
|
||||
return (u64)-1;
|
||||
return entry->file_offset + entry->len;
|
||||
return entry->file_offset + entry->num_bytes;
|
||||
}
|
||||
|
||||
/* returns NULL if the insertion worked, or it returns the node it did find
|
||||
@ -52,14 +52,6 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 file_offset,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void ordered_data_tree_panic(struct inode *inode, int errno,
|
||||
u64 offset)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
btrfs_panic(fs_info, errno,
|
||||
"Inconsistency in ordered tree at offset %llu", offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* look for a given offset in the tree, and if it can't be found return the
|
||||
* first lesser offset
|
||||
@ -120,7 +112,7 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 file_offset,
|
||||
static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset)
|
||||
{
|
||||
if (file_offset < entry->file_offset ||
|
||||
entry->file_offset + entry->len <= file_offset)
|
||||
entry->file_offset + entry->num_bytes <= file_offset)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@ -129,7 +121,7 @@ static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset,
|
||||
u64 len)
|
||||
{
|
||||
if (file_offset + len <= entry->file_offset ||
|
||||
entry->file_offset + entry->len <= file_offset)
|
||||
entry->file_offset + entry->num_bytes <= file_offset)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
@ -161,19 +153,14 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
|
||||
}
|
||||
|
||||
/* allocate and add a new ordered_extent into the per-inode tree.
|
||||
* file_offset is the logical offset in the file
|
||||
*
|
||||
* start is the disk block number of an extent already reserved in the
|
||||
* extent allocation tree
|
||||
*
|
||||
* len is the length of the extent
|
||||
*
|
||||
* The tree is given a single reference on the ordered extent that was
|
||||
* inserted.
|
||||
*/
|
||||
static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len,
|
||||
int type, int dio, int compress_type)
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type, int dio,
|
||||
int compress_type)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
@ -187,10 +174,10 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
return -ENOMEM;
|
||||
|
||||
entry->file_offset = file_offset;
|
||||
entry->start = start;
|
||||
entry->len = len;
|
||||
entry->disk_len = disk_len;
|
||||
entry->bytes_left = len;
|
||||
entry->disk_bytenr = disk_bytenr;
|
||||
entry->num_bytes = num_bytes;
|
||||
entry->disk_num_bytes = disk_num_bytes;
|
||||
entry->bytes_left = num_bytes;
|
||||
entry->inode = igrab(inode);
|
||||
entry->compress_type = compress_type;
|
||||
entry->truncated_len = (u64)-1;
|
||||
@ -198,7 +185,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
set_bit(type, &entry->flags);
|
||||
|
||||
if (dio) {
|
||||
percpu_counter_add_batch(&fs_info->dio_bytes, len,
|
||||
percpu_counter_add_batch(&fs_info->dio_bytes, num_bytes,
|
||||
fs_info->delalloc_batch);
|
||||
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
|
||||
}
|
||||
@ -219,7 +206,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
node = tree_insert(&tree->tree, file_offset,
|
||||
&entry->rb_node);
|
||||
if (node)
|
||||
ordered_data_tree_panic(inode, -EEXIST, file_offset);
|
||||
btrfs_panic(fs_info, -EEXIST,
|
||||
"inconsistency in ordered tree at offset %llu",
|
||||
file_offset);
|
||||
spin_unlock_irq(&tree->lock);
|
||||
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
@ -247,27 +236,30 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int type)
|
||||
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
|
||||
int type)
|
||||
{
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
||||
disk_len, type, 0,
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||
num_bytes, disk_num_bytes, type, 0,
|
||||
BTRFS_COMPRESS_NONE);
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int type)
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type)
|
||||
{
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
||||
disk_len, type, 1,
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||
num_bytes, disk_num_bytes, type, 1,
|
||||
BTRFS_COMPRESS_NONE);
|
||||
}
|
||||
|
||||
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len,
|
||||
int type, int compress_type)
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type,
|
||||
int compress_type)
|
||||
{
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, start, len,
|
||||
disk_len, type, 0,
|
||||
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
|
||||
num_bytes, disk_num_bytes, type, 0,
|
||||
compress_type);
|
||||
}
|
||||
|
||||
@ -328,8 +320,8 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
|
||||
}
|
||||
|
||||
dec_start = max(*file_offset, entry->file_offset);
|
||||
dec_end = min(*file_offset + io_size, entry->file_offset +
|
||||
entry->len);
|
||||
dec_end = min(*file_offset + io_size,
|
||||
entry->file_offset + entry->num_bytes);
|
||||
*file_offset = dec_end;
|
||||
if (dec_start > dec_end) {
|
||||
btrfs_crit(fs_info, "bad ordering dec_start %llu end %llu",
|
||||
@ -471,10 +463,11 @@ void btrfs_remove_ordered_extent(struct inode *inode,
|
||||
btrfs_mod_outstanding_extents(btrfs_inode, -1);
|
||||
spin_unlock(&btrfs_inode->lock);
|
||||
if (root != fs_info->tree_root)
|
||||
btrfs_delalloc_release_metadata(btrfs_inode, entry->len, false);
|
||||
btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
|
||||
false);
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
|
||||
percpu_counter_add_batch(&fs_info->dio_bytes, -entry->len,
|
||||
percpu_counter_add_batch(&fs_info->dio_bytes, -entry->num_bytes,
|
||||
fs_info->delalloc_batch);
|
||||
|
||||
tree = &btrfs_inode->ordered_tree;
|
||||
@ -534,8 +527,8 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
ordered = list_first_entry(&splice, struct btrfs_ordered_extent,
|
||||
root_extent_list);
|
||||
|
||||
if (range_end <= ordered->start ||
|
||||
ordered->start + ordered->disk_len <= range_start) {
|
||||
if (range_end <= ordered->disk_bytenr ||
|
||||
ordered->disk_bytenr + ordered->disk_num_bytes <= range_start) {
|
||||
list_move_tail(&ordered->root_extent_list, &skipped);
|
||||
cond_resched_lock(&root->ordered_extent_lock);
|
||||
continue;
|
||||
@ -619,7 +612,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
|
||||
int wait)
|
||||
{
|
||||
u64 start = entry->file_offset;
|
||||
u64 end = start + entry->len - 1;
|
||||
u64 end = start + entry->num_bytes - 1;
|
||||
|
||||
trace_btrfs_ordered_extent_start(inode, entry);
|
||||
|
||||
@ -680,7 +673,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
break;
|
||||
}
|
||||
if (ordered->file_offset + ordered->len <= start) {
|
||||
if (ordered->file_offset + ordered->num_bytes <= start) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
break;
|
||||
}
|
||||
|
@ -67,14 +67,13 @@ struct btrfs_ordered_extent {
|
||||
/* logical offset in the file */
|
||||
u64 file_offset;
|
||||
|
||||
/* disk byte number */
|
||||
u64 start;
|
||||
|
||||
/* ram length of the extent in bytes */
|
||||
u64 len;
|
||||
|
||||
/* extent length on disk */
|
||||
u64 disk_len;
|
||||
/*
|
||||
* These fields directly correspond to the same fields in
|
||||
* btrfs_file_extent_item.
|
||||
*/
|
||||
u64 disk_bytenr;
|
||||
u64 num_bytes;
|
||||
u64 disk_num_bytes;
|
||||
|
||||
/* number of bytes that still need writing */
|
||||
u64 bytes_left;
|
||||
@ -161,12 +160,15 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
|
||||
u64 *file_offset, u64 io_size,
|
||||
int uptodate);
|
||||
int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int type);
|
||||
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
|
||||
int type);
|
||||
int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len, int type);
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type);
|
||||
int btrfs_add_ordered_extent_compress(struct inode *inode, u64 file_offset,
|
||||
u64 start, u64 len, u64 disk_len,
|
||||
int type, int compress_type);
|
||||
u64 disk_bytenr, u64 num_bytes,
|
||||
u64 disk_num_bytes, int type,
|
||||
int compress_type);
|
||||
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
|
||||
struct btrfs_ordered_sum *sum);
|
||||
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
|
||||
|
@ -317,7 +317,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
|
||||
print_uuid_item(l, btrfs_item_ptr_offset(l, i),
|
||||
btrfs_item_size_nr(l, i));
|
||||
break;
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1243,7 +1243,6 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
||||
u64 dst)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *parent;
|
||||
struct btrfs_qgroup *member;
|
||||
struct btrfs_qgroup_list *list;
|
||||
@ -1259,9 +1258,8 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
||||
return -ENOMEM;
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root) {
|
||||
ret = -EINVAL;
|
||||
if (!fs_info->quota_root) {
|
||||
ret = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
member = find_qgroup_rb(fs_info, src);
|
||||
@ -1307,7 +1305,6 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
||||
u64 dst)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *parent;
|
||||
struct btrfs_qgroup *member;
|
||||
struct btrfs_qgroup_list *list;
|
||||
@ -1320,9 +1317,8 @@ static int __del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
||||
if (!tmp)
|
||||
return -ENOMEM;
|
||||
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root) {
|
||||
ret = -EINVAL;
|
||||
if (!fs_info->quota_root) {
|
||||
ret = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1387,11 +1383,11 @@ int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root) {
|
||||
ret = -EINVAL;
|
||||
if (!fs_info->quota_root) {
|
||||
ret = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
quota_root = fs_info->quota_root;
|
||||
qgroup = find_qgroup_rb(fs_info, qgroupid);
|
||||
if (qgroup) {
|
||||
ret = -EEXIST;
|
||||
@ -1416,15 +1412,13 @@ out:
|
||||
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
struct btrfs_qgroup_list *list;
|
||||
int ret = 0;
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root) {
|
||||
ret = -EINVAL;
|
||||
if (!fs_info->quota_root) {
|
||||
ret = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1465,7 +1459,6 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
||||
struct btrfs_qgroup_limit *limit)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
int ret = 0;
|
||||
/* Sometimes we would want to clear the limit on this qgroup.
|
||||
@ -1475,9 +1468,8 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
||||
const u64 CLEAR_VALUE = -1;
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root) {
|
||||
ret = -EINVAL;
|
||||
if (!fs_info->quota_root) {
|
||||
ret = -ENOTCONN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2582,10 +2574,9 @@ cleanup:
|
||||
int btrfs_run_qgroups(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *quota_root = fs_info->quota_root;
|
||||
int ret = 0;
|
||||
|
||||
if (!quota_root)
|
||||
if (!fs_info->quota_root)
|
||||
return ret;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
@ -2879,7 +2870,6 @@ static bool qgroup_check_limits(struct btrfs_fs_info *fs_info,
|
||||
static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
|
||||
enum btrfs_qgroup_rsv_type type)
|
||||
{
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
u64 ref_root = root->root_key.objectid;
|
||||
@ -2898,8 +2888,7 @@ static int qgroup_reserve(struct btrfs_root *root, u64 num_bytes, bool enforce,
|
||||
enforce = false;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root)
|
||||
if (!fs_info->quota_root)
|
||||
goto out;
|
||||
|
||||
qgroup = find_qgroup_rb(fs_info, ref_root);
|
||||
@ -2966,7 +2955,6 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
u64 ref_root, u64 num_bytes,
|
||||
enum btrfs_qgroup_rsv_type type)
|
||||
{
|
||||
struct btrfs_root *quota_root;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
@ -2984,8 +2972,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
|
||||
quota_root = fs_info->quota_root;
|
||||
if (!quota_root)
|
||||
if (!fs_info->quota_root)
|
||||
goto out;
|
||||
|
||||
qgroup = find_qgroup_rb(fs_info, ref_root);
|
||||
@ -3685,7 +3672,6 @@ void __btrfs_qgroup_free_meta(struct btrfs_root *root, int num_bytes,
|
||||
static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
|
||||
int num_bytes)
|
||||
{
|
||||
struct btrfs_root *quota_root = fs_info->quota_root;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
struct ulist_node *unode;
|
||||
struct ulist_iterator uiter;
|
||||
@ -3693,7 +3679,7 @@ static void qgroup_convert_meta(struct btrfs_fs_info *fs_info, u64 ref_root,
|
||||
|
||||
if (num_bytes == 0)
|
||||
return;
|
||||
if (!quota_root)
|
||||
if (!fs_info->quota_root)
|
||||
return;
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
|
@ -4332,6 +4332,15 @@ static void describe_relocation(struct btrfs_fs_info *fs_info,
|
||||
block_group->start, buf);
|
||||
}
|
||||
|
||||
static const char *stage_to_string(int stage)
|
||||
{
|
||||
if (stage == MOVE_DATA_EXTENTS)
|
||||
return "move data extents";
|
||||
if (stage == UPDATE_DATA_PTRS)
|
||||
return "update data pointers";
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
/*
|
||||
* function to relocate all extents in a block group.
|
||||
*/
|
||||
@ -4406,12 +4415,15 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
||||
rc->block_group->length);
|
||||
|
||||
while (1) {
|
||||
int finishes_stage;
|
||||
|
||||
mutex_lock(&fs_info->cleaner_mutex);
|
||||
ret = relocate_block_group(rc);
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
if (ret < 0)
|
||||
err = ret;
|
||||
|
||||
finishes_stage = rc->stage;
|
||||
/*
|
||||
* We may have gotten ENOSPC after we already dirtied some
|
||||
* extents. If writeout happens while we're relocating a
|
||||
@ -4437,8 +4449,8 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
||||
if (rc->extents_found == 0)
|
||||
break;
|
||||
|
||||
btrfs_info(fs_info, "found %llu extents", rc->extents_found);
|
||||
|
||||
btrfs_info(fs_info, "found %llu extents, stage: %s",
|
||||
rc->extents_found, stage_to_string(finishes_stage));
|
||||
}
|
||||
|
||||
WARN_ON(rc->block_group->pinned > 0);
|
||||
@ -4656,7 +4668,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
|
||||
LIST_HEAD(list);
|
||||
|
||||
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
|
||||
BUG_ON(ordered->file_offset != file_pos || ordered->len != len);
|
||||
BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
|
||||
|
||||
disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;
|
||||
ret = btrfs_lookup_csums_range(fs_info->csum_root, disk_bytenr,
|
||||
@ -4680,7 +4692,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
|
||||
* disk_len vs real len like with real inodes since it's all
|
||||
* disk length.
|
||||
*/
|
||||
new_bytenr = ordered->start + (sums->bytenr - disk_bytenr);
|
||||
new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
|
||||
sums->bytenr = new_bytenr;
|
||||
|
||||
btrfs_add_ordered_sum(ordered, sums);
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <linux/sched/mm.h>
|
||||
#include <crypto/hash.h>
|
||||
#include "ctree.h"
|
||||
#include "discard.h"
|
||||
#include "volumes.h"
|
||||
#include "disk-io.h"
|
||||
#include "ordered-data.h"
|
||||
@ -3682,7 +3683,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||
if (!cache->removed && !cache->ro && cache->reserved == 0 &&
|
||||
cache->used == 0) {
|
||||
spin_unlock(&cache->lock);
|
||||
btrfs_mark_bg_unused(cache);
|
||||
if (btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||
btrfs_discard_queue_work(&fs_info->discard_ctl,
|
||||
cache);
|
||||
else
|
||||
btrfs_mark_bg_unused(cache);
|
||||
} else {
|
||||
spin_unlock(&cache->lock);
|
||||
}
|
||||
|
@ -161,8 +161,7 @@ static inline u64 calc_global_rsv_need_space(struct btrfs_block_rsv *global)
|
||||
|
||||
static int can_overcommit(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, u64 bytes,
|
||||
enum btrfs_reserve_flush_enum flush,
|
||||
bool system_chunk)
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
u64 profile;
|
||||
u64 avail;
|
||||
@ -173,7 +172,7 @@ static int can_overcommit(struct btrfs_fs_info *fs_info,
|
||||
if (space_info->flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
return 0;
|
||||
|
||||
if (system_chunk)
|
||||
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
profile = btrfs_system_alloc_profile(fs_info);
|
||||
else
|
||||
profile = btrfs_metadata_alloc_profile(fs_info);
|
||||
@ -227,8 +226,7 @@ again:
|
||||
|
||||
/* Check and see if our ticket can be satisified now. */
|
||||
if ((used + ticket->bytes <= space_info->total_bytes) ||
|
||||
can_overcommit(fs_info, space_info, ticket->bytes, flush,
|
||||
false)) {
|
||||
can_overcommit(fs_info, space_info, ticket->bytes, flush)) {
|
||||
btrfs_space_info_update_bytes_may_use(fs_info,
|
||||
space_info,
|
||||
ticket->bytes);
|
||||
@ -626,8 +624,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
|
||||
static inline u64
|
||||
btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
bool system_chunk)
|
||||
struct btrfs_space_info *space_info)
|
||||
{
|
||||
struct reserve_ticket *ticket;
|
||||
u64 used;
|
||||
@ -643,13 +640,12 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
|
||||
to_reclaim = min_t(u64, num_online_cpus() * SZ_1M, SZ_16M);
|
||||
if (can_overcommit(fs_info, space_info, to_reclaim,
|
||||
BTRFS_RESERVE_FLUSH_ALL, system_chunk))
|
||||
BTRFS_RESERVE_FLUSH_ALL))
|
||||
return 0;
|
||||
|
||||
used = btrfs_space_info_used(space_info, true);
|
||||
|
||||
if (can_overcommit(fs_info, space_info, SZ_1M,
|
||||
BTRFS_RESERVE_FLUSH_ALL, system_chunk))
|
||||
if (can_overcommit(fs_info, space_info, SZ_1M, BTRFS_RESERVE_FLUSH_ALL))
|
||||
expected = div_factor_fine(space_info->total_bytes, 95);
|
||||
else
|
||||
expected = div_factor_fine(space_info->total_bytes, 90);
|
||||
@ -665,7 +661,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
|
||||
|
||||
static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
u64 used, bool system_chunk)
|
||||
u64 used)
|
||||
{
|
||||
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
|
||||
|
||||
@ -673,8 +669,7 @@ static inline int need_do_async_reclaim(struct btrfs_fs_info *fs_info,
|
||||
if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
|
||||
return 0;
|
||||
|
||||
if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info,
|
||||
system_chunk))
|
||||
if (!btrfs_calc_reclaim_metadata_size(fs_info, space_info))
|
||||
return 0;
|
||||
|
||||
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
|
||||
@ -765,8 +760,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
||||
space_info = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
|
||||
false);
|
||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
|
||||
if (!to_reclaim) {
|
||||
space_info->flush = 0;
|
||||
spin_unlock(&space_info->lock);
|
||||
@ -785,8 +779,7 @@ static void btrfs_async_reclaim_metadata_space(struct work_struct *work)
|
||||
return;
|
||||
}
|
||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info,
|
||||
space_info,
|
||||
false);
|
||||
space_info);
|
||||
if (last_tickets_id == space_info->tickets_id) {
|
||||
flush_state++;
|
||||
} else {
|
||||
@ -858,8 +851,7 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
||||
int flush_state;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info,
|
||||
false);
|
||||
to_reclaim = btrfs_calc_reclaim_metadata_size(fs_info, space_info);
|
||||
if (!to_reclaim) {
|
||||
spin_unlock(&space_info->lock);
|
||||
return;
|
||||
@ -990,8 +982,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
|
||||
static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
u64 orig_bytes,
|
||||
enum btrfs_reserve_flush_enum flush,
|
||||
bool system_chunk)
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
struct reserve_ticket ticket;
|
||||
u64 used;
|
||||
@ -1013,8 +1004,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||
*/
|
||||
if (!pending_tickets &&
|
||||
((used + orig_bytes <= space_info->total_bytes) ||
|
||||
can_overcommit(fs_info, space_info, orig_bytes, flush,
|
||||
system_chunk))) {
|
||||
can_overcommit(fs_info, space_info, orig_bytes, flush))) {
|
||||
btrfs_space_info_update_bytes_may_use(fs_info, space_info,
|
||||
orig_bytes);
|
||||
ret = 0;
|
||||
@ -1054,8 +1044,7 @@ static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info,
|
||||
* the async reclaim as we will panic.
|
||||
*/
|
||||
if (!test_bit(BTRFS_FS_LOG_RECOVERING, &fs_info->flags) &&
|
||||
need_do_async_reclaim(fs_info, space_info,
|
||||
used, system_chunk) &&
|
||||
need_do_async_reclaim(fs_info, space_info, used) &&
|
||||
!work_busy(&fs_info->async_reclaim_work)) {
|
||||
trace_btrfs_trigger_flush(fs_info, space_info->flags,
|
||||
orig_bytes, flush, "preempt");
|
||||
@ -1092,10 +1081,9 @@ int btrfs_reserve_metadata_bytes(struct btrfs_root *root,
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv;
|
||||
int ret;
|
||||
bool system_chunk = (root == fs_info->chunk_root);
|
||||
|
||||
ret = __reserve_metadata_bytes(fs_info, block_rsv->space_info,
|
||||
orig_bytes, flush, system_chunk);
|
||||
orig_bytes, flush);
|
||||
if (ret == -ENOSPC &&
|
||||
unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
|
||||
if (block_rsv != global_rsv &&
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include "sysfs.h"
|
||||
#include "tests/btrfs-tests.h"
|
||||
#include "block-group.h"
|
||||
#include "discard.h"
|
||||
|
||||
#include "qgroup.h"
|
||||
#define CREATE_TRACE_POINTS
|
||||
@ -146,6 +147,8 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
|
||||
if (sb_rdonly(sb))
|
||||
return;
|
||||
|
||||
btrfs_discard_stop(fs_info);
|
||||
|
||||
/* btrfs handle error by forcing the filesystem readonly */
|
||||
sb->s_flags |= SB_RDONLY;
|
||||
btrfs_info(fs_info, "forced readonly");
|
||||
@ -313,6 +316,7 @@ enum {
|
||||
Opt_datasum, Opt_nodatasum,
|
||||
Opt_defrag, Opt_nodefrag,
|
||||
Opt_discard, Opt_nodiscard,
|
||||
Opt_discard_mode,
|
||||
Opt_nologreplay,
|
||||
Opt_norecovery,
|
||||
Opt_ratio,
|
||||
@ -375,6 +379,7 @@ static const match_table_t tokens = {
|
||||
{Opt_defrag, "autodefrag"},
|
||||
{Opt_nodefrag, "noautodefrag"},
|
||||
{Opt_discard, "discard"},
|
||||
{Opt_discard_mode, "discard=%s"},
|
||||
{Opt_nodiscard, "nodiscard"},
|
||||
{Opt_nologreplay, "nologreplay"},
|
||||
{Opt_norecovery, "norecovery"},
|
||||
@ -695,12 +700,26 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
info->metadata_ratio);
|
||||
break;
|
||||
case Opt_discard:
|
||||
btrfs_set_and_info(info, DISCARD,
|
||||
"turning on discard");
|
||||
case Opt_discard_mode:
|
||||
if (token == Opt_discard ||
|
||||
strcmp(args[0].from, "sync") == 0) {
|
||||
btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC);
|
||||
btrfs_set_and_info(info, DISCARD_SYNC,
|
||||
"turning on sync discard");
|
||||
} else if (strcmp(args[0].from, "async") == 0) {
|
||||
btrfs_clear_opt(info->mount_opt, DISCARD_SYNC);
|
||||
btrfs_set_and_info(info, DISCARD_ASYNC,
|
||||
"turning on async discard");
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
case Opt_nodiscard:
|
||||
btrfs_clear_and_info(info, DISCARD,
|
||||
btrfs_clear_and_info(info, DISCARD_SYNC,
|
||||
"turning off discard");
|
||||
btrfs_clear_and_info(info, DISCARD_ASYNC,
|
||||
"turning off async discard");
|
||||
break;
|
||||
case Opt_space_cache:
|
||||
case Opt_space_cache_version:
|
||||
@ -1322,8 +1341,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||
seq_puts(seq, ",nologreplay");
|
||||
if (btrfs_test_opt(info, FLUSHONCOMMIT))
|
||||
seq_puts(seq, ",flushoncommit");
|
||||
if (btrfs_test_opt(info, DISCARD))
|
||||
if (btrfs_test_opt(info, DISCARD_SYNC))
|
||||
seq_puts(seq, ",discard");
|
||||
if (btrfs_test_opt(info, DISCARD_ASYNC))
|
||||
seq_puts(seq, ",discard=async");
|
||||
if (!(info->sb->s_flags & SB_POSIXACL))
|
||||
seq_puts(seq, ",noacl");
|
||||
if (btrfs_test_opt(info, SPACE_CACHE))
|
||||
@ -1713,6 +1734,14 @@ static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info,
|
||||
btrfs_cleanup_defrag_inodes(fs_info);
|
||||
}
|
||||
|
||||
/* If we toggled discard async */
|
||||
if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
|
||||
btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||
btrfs_discard_resume(fs_info);
|
||||
else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) &&
|
||||
!btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||
btrfs_discard_cleanup(fs_info);
|
||||
|
||||
clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
|
||||
}
|
||||
|
||||
@ -1760,6 +1789,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
*/
|
||||
cancel_work_sync(&fs_info->async_reclaim_work);
|
||||
|
||||
btrfs_discard_cleanup(fs_info);
|
||||
|
||||
/* wait for the uuid_scan task to finish */
|
||||
down(&fs_info->uuid_tree_rescan_sem);
|
||||
/* avoid complains from lockdep et al. */
|
||||
|
394
fs/btrfs/sysfs.c
394
fs/btrfs/sysfs.c
@ -12,6 +12,7 @@
|
||||
#include <crypto/hash.h>
|
||||
|
||||
#include "ctree.h"
|
||||
#include "discard.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "sysfs.h"
|
||||
@ -338,12 +339,178 @@ static const struct attribute_group btrfs_static_feature_attr_group = {
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
|
||||
/*
|
||||
* Discard statistics and tunables
|
||||
*/
|
||||
#define discard_to_fs_info(_kobj) to_fs_info((_kobj)->parent->parent)
|
||||
|
||||
static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
atomic64_read(&fs_info->discard_ctl.discardable_bytes));
|
||||
}
|
||||
BTRFS_ATTR(discard, discardable_bytes, btrfs_discardable_bytes_show);
|
||||
|
||||
static ssize_t btrfs_discardable_extents_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n",
|
||||
atomic_read(&fs_info->discard_ctl.discardable_extents));
|
||||
}
|
||||
BTRFS_ATTR(discard, discardable_extents, btrfs_discardable_extents_show);
|
||||
|
||||
static ssize_t btrfs_discard_bitmap_bytes_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
fs_info->discard_ctl.discard_bitmap_bytes);
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_bitmap_bytes, btrfs_discard_bitmap_bytes_show);
|
||||
|
||||
static ssize_t btrfs_discard_bytes_saved_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
atomic64_read(&fs_info->discard_ctl.discard_bytes_saved));
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_bytes_saved, btrfs_discard_bytes_saved_show);
|
||||
|
||||
static ssize_t btrfs_discard_extent_bytes_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%lld\n",
|
||||
fs_info->discard_ctl.discard_extent_bytes);
|
||||
}
|
||||
BTRFS_ATTR(discard, discard_extent_bytes, btrfs_discard_extent_bytes_show);
|
||||
|
||||
static ssize_t btrfs_discard_iops_limit_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.iops_limit));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_discard_iops_limit_store(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||
u32 iops_limit;
|
||||
int ret;
|
||||
|
||||
ret = kstrtou32(buf, 10, &iops_limit);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(discard_ctl->iops_limit, iops_limit);
|
||||
|
||||
return len;
|
||||
}
|
||||
BTRFS_ATTR_RW(discard, iops_limit, btrfs_discard_iops_limit_show,
|
||||
btrfs_discard_iops_limit_store);
|
||||
|
||||
static ssize_t btrfs_discard_kbps_limit_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u\n",
|
||||
READ_ONCE(fs_info->discard_ctl.kbps_limit));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_discard_kbps_limit_store(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||
u32 kbps_limit;
|
||||
int ret;
|
||||
|
||||
ret = kstrtou32(buf, 10, &kbps_limit);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(discard_ctl->kbps_limit, kbps_limit);
|
||||
|
||||
return len;
|
||||
}
|
||||
BTRFS_ATTR_RW(discard, kbps_limit, btrfs_discard_kbps_limit_show,
|
||||
btrfs_discard_kbps_limit_store);
|
||||
|
||||
static ssize_t btrfs_discard_max_discard_size_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%llu\n",
|
||||
READ_ONCE(fs_info->discard_ctl.max_discard_size));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_discard_max_discard_size_store(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = discard_to_fs_info(kobj);
|
||||
struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
|
||||
u64 max_discard_size;
|
||||
int ret;
|
||||
|
||||
ret = kstrtou64(buf, 10, &max_discard_size);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(discard_ctl->max_discard_size, max_discard_size);
|
||||
|
||||
return len;
|
||||
}
|
||||
BTRFS_ATTR_RW(discard, max_discard_size, btrfs_discard_max_discard_size_show,
|
||||
btrfs_discard_max_discard_size_store);
|
||||
|
||||
static const struct attribute *discard_debug_attrs[] = {
|
||||
BTRFS_ATTR_PTR(discard, discardable_bytes),
|
||||
BTRFS_ATTR_PTR(discard, discardable_extents),
|
||||
BTRFS_ATTR_PTR(discard, discard_bitmap_bytes),
|
||||
BTRFS_ATTR_PTR(discard, discard_bytes_saved),
|
||||
BTRFS_ATTR_PTR(discard, discard_extent_bytes),
|
||||
BTRFS_ATTR_PTR(discard, iops_limit),
|
||||
BTRFS_ATTR_PTR(discard, kbps_limit),
|
||||
BTRFS_ATTR_PTR(discard, max_discard_size),
|
||||
NULL,
|
||||
};
|
||||
|
||||
/*
|
||||
* Runtime debugging exported via sysfs
|
||||
*
|
||||
* /sys/fs/btrfs/debug - applies to module or all filesystems
|
||||
* /sys/fs/btrfs/UUID - applies only to the given filesystem
|
||||
*/
|
||||
static const struct attribute *btrfs_debug_mount_attrs[] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *btrfs_debug_feature_attrs[] = {
|
||||
NULL
|
||||
};
|
||||
@ -734,10 +901,10 @@ static int addrm_unknown_feature_attrs(struct btrfs_fs_info *fs_info, bool add)
|
||||
|
||||
static void __btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs)
|
||||
{
|
||||
if (fs_devs->device_dir_kobj) {
|
||||
kobject_del(fs_devs->device_dir_kobj);
|
||||
kobject_put(fs_devs->device_dir_kobj);
|
||||
fs_devs->device_dir_kobj = NULL;
|
||||
if (fs_devs->devices_kobj) {
|
||||
kobject_del(fs_devs->devices_kobj);
|
||||
kobject_put(fs_devs->devices_kobj);
|
||||
fs_devs->devices_kobj = NULL;
|
||||
}
|
||||
|
||||
if (fs_devs->fsid_kobj.state_initialized) {
|
||||
@ -771,6 +938,19 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
|
||||
kobject_del(fs_info->space_info_kobj);
|
||||
kobject_put(fs_info->space_info_kobj);
|
||||
}
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
if (fs_info->discard_debug_kobj) {
|
||||
sysfs_remove_files(fs_info->discard_debug_kobj,
|
||||
discard_debug_attrs);
|
||||
kobject_del(fs_info->discard_debug_kobj);
|
||||
kobject_put(fs_info->discard_debug_kobj);
|
||||
}
|
||||
if (fs_info->debug_kobj) {
|
||||
sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
|
||||
kobject_del(fs_info->debug_kobj);
|
||||
kobject_put(fs_info->debug_kobj);
|
||||
}
|
||||
#endif
|
||||
addrm_unknown_feature_attrs(fs_info, false);
|
||||
sysfs_remove_group(&fs_info->fs_devices->fsid_kobj, &btrfs_feature_attr_group);
|
||||
sysfs_remove_files(&fs_info->fs_devices->fsid_kobj, btrfs_attrs);
|
||||
@ -969,45 +1149,119 @@ int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct hd_struct *disk;
|
||||
struct kobject *disk_kobj;
|
||||
|
||||
if (!fs_devices->device_dir_kobj)
|
||||
if (!fs_devices->devices_kobj)
|
||||
return -EINVAL;
|
||||
|
||||
if (one_device && one_device->bdev) {
|
||||
disk = one_device->bdev->bd_part;
|
||||
disk_kobj = &part_to_dev(disk)->kobj;
|
||||
if (one_device) {
|
||||
if (one_device->bdev) {
|
||||
disk = one_device->bdev->bd_part;
|
||||
disk_kobj = &part_to_dev(disk)->kobj;
|
||||
sysfs_remove_link(fs_devices->devices_kobj,
|
||||
disk_kobj->name);
|
||||
}
|
||||
|
||||
sysfs_remove_link(fs_devices->device_dir_kobj,
|
||||
disk_kobj->name);
|
||||
}
|
||||
kobject_del(&one_device->devid_kobj);
|
||||
kobject_put(&one_device->devid_kobj);
|
||||
|
||||
wait_for_completion(&one_device->kobj_unregister);
|
||||
|
||||
if (one_device)
|
||||
return 0;
|
||||
}
|
||||
|
||||
list_for_each_entry(one_device,
|
||||
&fs_devices->devices, dev_list) {
|
||||
if (!one_device->bdev)
|
||||
continue;
|
||||
disk = one_device->bdev->bd_part;
|
||||
disk_kobj = &part_to_dev(disk)->kobj;
|
||||
list_for_each_entry(one_device, &fs_devices->devices, dev_list) {
|
||||
|
||||
sysfs_remove_link(fs_devices->device_dir_kobj,
|
||||
disk_kobj->name);
|
||||
if (one_device->bdev) {
|
||||
disk = one_device->bdev->bd_part;
|
||||
disk_kobj = &part_to_dev(disk)->kobj;
|
||||
sysfs_remove_link(fs_devices->devices_kobj,
|
||||
disk_kobj->name);
|
||||
}
|
||||
kobject_del(&one_device->devid_kobj);
|
||||
kobject_put(&one_device->devid_kobj);
|
||||
|
||||
wait_for_completion(&one_device->kobj_unregister);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs)
|
||||
static ssize_t btrfs_devinfo_in_fs_metadata_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
if (!fs_devs->device_dir_kobj)
|
||||
fs_devs->device_dir_kobj = kobject_create_and_add("devices",
|
||||
&fs_devs->fsid_kobj);
|
||||
int val;
|
||||
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||
devid_kobj);
|
||||
|
||||
if (!fs_devs->device_dir_kobj)
|
||||
return -ENOMEM;
|
||||
val = !!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
|
||||
|
||||
return 0;
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, in_fs_metadata, btrfs_devinfo_in_fs_metadata_show);
|
||||
|
||||
static ssize_t btrfs_sysfs_missing_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
{
|
||||
int val;
|
||||
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||
devid_kobj);
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, missing, btrfs_sysfs_missing_show);
|
||||
|
||||
static ssize_t btrfs_devinfo_replace_target_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
int val;
|
||||
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||
devid_kobj);
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, replace_target, btrfs_devinfo_replace_target_show);
|
||||
|
||||
static ssize_t btrfs_devinfo_writeable_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
{
|
||||
int val;
|
||||
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||
devid_kobj);
|
||||
|
||||
val = !!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%d\n", val);
|
||||
}
|
||||
BTRFS_ATTR(devid, writeable, btrfs_devinfo_writeable_show);
|
||||
|
||||
static struct attribute *devid_attrs[] = {
|
||||
BTRFS_ATTR_PTR(devid, in_fs_metadata),
|
||||
BTRFS_ATTR_PTR(devid, missing),
|
||||
BTRFS_ATTR_PTR(devid, replace_target),
|
||||
BTRFS_ATTR_PTR(devid, writeable),
|
||||
NULL
|
||||
};
|
||||
ATTRIBUTE_GROUPS(devid);
|
||||
|
||||
static void btrfs_release_devid_kobj(struct kobject *kobj)
|
||||
{
|
||||
struct btrfs_device *device = container_of(kobj, struct btrfs_device,
|
||||
devid_kobj);
|
||||
|
||||
memset(&device->devid_kobj, 0, sizeof(struct kobject));
|
||||
complete(&device->kobj_unregister);
|
||||
}
|
||||
|
||||
static struct kobj_type devid_ktype = {
|
||||
.sysfs_ops = &kobj_sysfs_ops,
|
||||
.default_groups = devid_groups,
|
||||
.release = btrfs_release_devid_kobj,
|
||||
};
|
||||
|
||||
int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device)
|
||||
@ -1016,22 +1270,31 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *dev;
|
||||
|
||||
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
|
||||
struct hd_struct *disk;
|
||||
struct kobject *disk_kobj;
|
||||
|
||||
if (!dev->bdev)
|
||||
continue;
|
||||
|
||||
if (one_device && one_device != dev)
|
||||
continue;
|
||||
|
||||
disk = dev->bdev->bd_part;
|
||||
disk_kobj = &part_to_dev(disk)->kobj;
|
||||
if (dev->bdev) {
|
||||
struct hd_struct *disk;
|
||||
struct kobject *disk_kobj;
|
||||
|
||||
error = sysfs_create_link(fs_devices->device_dir_kobj,
|
||||
disk_kobj, disk_kobj->name);
|
||||
if (error)
|
||||
disk = dev->bdev->bd_part;
|
||||
disk_kobj = &part_to_dev(disk)->kobj;
|
||||
|
||||
error = sysfs_create_link(fs_devices->devices_kobj,
|
||||
disk_kobj, disk_kobj->name);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
|
||||
init_completion(&dev->kobj_unregister);
|
||||
error = kobject_init_and_add(&dev->devid_kobj, &devid_ktype,
|
||||
fs_devices->devices_kobj, "%llu",
|
||||
dev->devid);
|
||||
if (error) {
|
||||
kobject_put(&dev->devid_kobj);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return error;
|
||||
@ -1063,27 +1326,49 @@ void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices,
|
||||
"sysfs: failed to create fsid for sprout");
|
||||
}
|
||||
|
||||
void btrfs_sysfs_update_devid(struct btrfs_device *device)
|
||||
{
|
||||
char tmp[24];
|
||||
|
||||
snprintf(tmp, sizeof(tmp), "%llu", device->devid);
|
||||
|
||||
if (kobject_rename(&device->devid_kobj, tmp))
|
||||
btrfs_warn(device->fs_devices->fs_info,
|
||||
"sysfs: failed to update devid for %llu",
|
||||
device->devid);
|
||||
}
|
||||
|
||||
/* /sys/fs/btrfs/ entry */
|
||||
static struct kset *btrfs_kset;
|
||||
|
||||
/*
|
||||
* Creates:
|
||||
* /sys/fs/btrfs/UUID
|
||||
*
|
||||
* Can be called by the device discovery thread.
|
||||
* And parent can be specified for seed device
|
||||
*/
|
||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
|
||||
struct kobject *parent)
|
||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs)
|
||||
{
|
||||
int error;
|
||||
|
||||
init_completion(&fs_devs->kobj_unregister);
|
||||
fs_devs->fsid_kobj.kset = btrfs_kset;
|
||||
error = kobject_init_and_add(&fs_devs->fsid_kobj,
|
||||
&btrfs_ktype, parent, "%pU", fs_devs->fsid);
|
||||
error = kobject_init_and_add(&fs_devs->fsid_kobj, &btrfs_ktype, NULL,
|
||||
"%pU", fs_devs->fsid);
|
||||
if (error) {
|
||||
kobject_put(&fs_devs->fsid_kobj);
|
||||
return error;
|
||||
}
|
||||
|
||||
fs_devs->devices_kobj = kobject_create_and_add("devices",
|
||||
&fs_devs->fsid_kobj);
|
||||
if (!fs_devs->devices_kobj) {
|
||||
btrfs_err(fs_devs->fs_info,
|
||||
"failed to init sysfs device interface");
|
||||
kobject_put(&fs_devs->fsid_kobj);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1111,8 +1396,26 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
|
||||
goto failure;
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
error = sysfs_create_group(fsid_kobj,
|
||||
&btrfs_debug_feature_attr_group);
|
||||
fs_info->debug_kobj = kobject_create_and_add("debug", fsid_kobj);
|
||||
if (!fs_info->debug_kobj) {
|
||||
error = -ENOMEM;
|
||||
goto failure;
|
||||
}
|
||||
|
||||
error = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
|
||||
if (error)
|
||||
goto failure;
|
||||
|
||||
/* Discard directory */
|
||||
fs_info->discard_debug_kobj = kobject_create_and_add("discard",
|
||||
fs_info->debug_kobj);
|
||||
if (!fs_info->discard_debug_kobj) {
|
||||
error = -ENOMEM;
|
||||
goto failure;
|
||||
}
|
||||
|
||||
error = sysfs_create_files(fs_info->discard_debug_kobj,
|
||||
discard_debug_attrs);
|
||||
if (error)
|
||||
goto failure;
|
||||
#endif
|
||||
@ -1209,6 +1512,9 @@ void __cold btrfs_exit_sysfs(void)
|
||||
sysfs_unmerge_group(&btrfs_kset->kobj,
|
||||
&btrfs_static_feature_attr_group);
|
||||
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
sysfs_remove_group(&btrfs_kset->kobj, &btrfs_debug_feature_attr_group);
|
||||
#endif
|
||||
kset_unregister(btrfs_kset);
|
||||
}
|
||||
|
||||
|
@ -18,9 +18,7 @@ int btrfs_sysfs_add_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device);
|
||||
int btrfs_sysfs_rm_device_link(struct btrfs_fs_devices *fs_devices,
|
||||
struct btrfs_device *one_device);
|
||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs,
|
||||
struct kobject *parent);
|
||||
int btrfs_sysfs_add_device(struct btrfs_fs_devices *fs_devs);
|
||||
int btrfs_sysfs_add_fsid(struct btrfs_fs_devices *fs_devs);
|
||||
void btrfs_sysfs_remove_fsid(struct btrfs_fs_devices *fs_devs);
|
||||
void btrfs_sysfs_update_sprout_fsid(struct btrfs_fs_devices *fs_devices,
|
||||
const u8 *fsid);
|
||||
@ -36,5 +34,6 @@ void btrfs_sysfs_add_block_group_type(struct btrfs_block_group *cache);
|
||||
int btrfs_sysfs_add_space_info_type(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info);
|
||||
void btrfs_sysfs_remove_space_info(struct btrfs_space_info *space_info);
|
||||
void btrfs_sysfs_update_devid(struct btrfs_device *device);
|
||||
|
||||
#endif
|
||||
|
@ -86,6 +86,27 @@ static void btrfs_destroy_test_fs(void)
|
||||
unregister_filesystem(&test_type);
|
||||
}
|
||||
|
||||
struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_device *dev;
|
||||
|
||||
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
extent_io_tree_init(NULL, &dev->alloc_state, 0, NULL);
|
||||
INIT_LIST_HEAD(&dev->dev_list);
|
||||
list_add(&dev->dev_list, &fs_info->fs_devices->devices);
|
||||
|
||||
return dev;
|
||||
}
|
||||
|
||||
static void btrfs_free_dummy_device(struct btrfs_device *dev)
|
||||
{
|
||||
extent_io_tree_release(&dev->alloc_state);
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = kzalloc(sizeof(struct btrfs_fs_info),
|
||||
@ -132,12 +153,14 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize)
|
||||
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
|
||||
INIT_LIST_HEAD(&fs_info->dead_roots);
|
||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||
INIT_LIST_HEAD(&fs_info->fs_devices->devices);
|
||||
INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
|
||||
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
|
||||
extent_io_tree_init(fs_info, &fs_info->freed_extents[0],
|
||||
IO_TREE_FS_INFO_FREED_EXTENTS0, NULL);
|
||||
extent_io_tree_init(fs_info, &fs_info->freed_extents[1],
|
||||
IO_TREE_FS_INFO_FREED_EXTENTS1, NULL);
|
||||
extent_map_tree_init(&fs_info->mapping_tree);
|
||||
fs_info->pinned_extents = &fs_info->freed_extents[0];
|
||||
set_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
||||
|
||||
@ -150,6 +173,7 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct radix_tree_iter iter;
|
||||
void **slot;
|
||||
struct btrfs_device *dev, *tmp;
|
||||
|
||||
if (!fs_info)
|
||||
return;
|
||||
@ -180,6 +204,11 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
spin_unlock(&fs_info->buffer_lock);
|
||||
|
||||
btrfs_mapping_tree_free(&fs_info->mapping_tree);
|
||||
list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices,
|
||||
dev_list) {
|
||||
btrfs_free_dummy_device(dev);
|
||||
}
|
||||
btrfs_free_qgroup_config(fs_info);
|
||||
btrfs_free_fs_roots(fs_info);
|
||||
cleanup_srcu_struct(&fs_info->subvol_srcu);
|
||||
|
@ -46,6 +46,7 @@ btrfs_alloc_dummy_block_group(struct btrfs_fs_info *fs_info, unsigned long lengt
|
||||
void btrfs_free_dummy_block_group(struct btrfs_block_group *cache);
|
||||
void btrfs_init_dummy_trans(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_device *btrfs_alloc_dummy_device(struct btrfs_fs_info *fs_info);
|
||||
#else
|
||||
static inline int btrfs_run_sanity_tests(void)
|
||||
{
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include <linux/types.h>
|
||||
#include "btrfs-tests.h"
|
||||
#include "../ctree.h"
|
||||
#include "../volumes.h"
|
||||
#include "../disk-io.h"
|
||||
#include "../block-group.h"
|
||||
|
||||
static void free_extent_map_tree(struct extent_map_tree *em_tree)
|
||||
{
|
||||
@ -437,11 +440,153 @@ static int test_case_4(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct rmap_test_vector {
|
||||
u64 raid_type;
|
||||
u64 physical_start;
|
||||
u64 data_stripe_size;
|
||||
u64 num_data_stripes;
|
||||
u64 num_stripes;
|
||||
/* Assume we won't have more than 5 physical stripes */
|
||||
u64 data_stripe_phys_start[5];
|
||||
bool expected_mapped_addr;
|
||||
/* Physical to logical addresses */
|
||||
u64 mapped_logical[5];
|
||||
};
|
||||
|
||||
static int test_rmap_block(struct btrfs_fs_info *fs_info,
|
||||
struct rmap_test_vector *test)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map = NULL;
|
||||
u64 *logical = NULL;
|
||||
int i, out_ndaddrs, out_stripe_len;
|
||||
int ret;
|
||||
|
||||
em = alloc_extent_map();
|
||||
if (!em) {
|
||||
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
map = kmalloc(map_lookup_size(test->num_stripes), GFP_KERNEL);
|
||||
if (!map) {
|
||||
kfree(em);
|
||||
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
|
||||
/* Start at 4GiB logical address */
|
||||
em->start = SZ_4G;
|
||||
em->len = test->data_stripe_size * test->num_data_stripes;
|
||||
em->block_len = em->len;
|
||||
em->orig_block_len = test->data_stripe_size;
|
||||
em->map_lookup = map;
|
||||
|
||||
map->num_stripes = test->num_stripes;
|
||||
map->stripe_len = BTRFS_STRIPE_LEN;
|
||||
map->type = test->raid_type;
|
||||
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
struct btrfs_device *dev = btrfs_alloc_dummy_device(fs_info);
|
||||
|
||||
if (IS_ERR(dev)) {
|
||||
test_err("cannot allocate device");
|
||||
ret = PTR_ERR(dev);
|
||||
goto out;
|
||||
}
|
||||
map->stripes[i].dev = dev;
|
||||
map->stripes[i].physical = test->data_stripe_phys_start[i];
|
||||
}
|
||||
|
||||
write_lock(&fs_info->mapping_tree.lock);
|
||||
ret = add_extent_mapping(&fs_info->mapping_tree, em, 0);
|
||||
write_unlock(&fs_info->mapping_tree.lock);
|
||||
if (ret) {
|
||||
test_err("error adding block group mapping to mapping tree");
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
ret = btrfs_rmap_block(fs_info, em->start, btrfs_sb_offset(1),
|
||||
&logical, &out_ndaddrs, &out_stripe_len);
|
||||
if (ret || (out_ndaddrs == 0 && test->expected_mapped_addr)) {
|
||||
test_err("didn't rmap anything but expected %d",
|
||||
test->expected_mapped_addr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (out_stripe_len != BTRFS_STRIPE_LEN) {
|
||||
test_err("calculated stripe length doesn't match");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (out_ndaddrs != test->expected_mapped_addr) {
|
||||
for (i = 0; i < out_ndaddrs; i++)
|
||||
test_msg("mapped %llu", logical[i]);
|
||||
test_err("unexpected number of mapped addresses: %d", out_ndaddrs);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < out_ndaddrs; i++) {
|
||||
if (logical[i] != test->mapped_logical[i]) {
|
||||
test_err("unexpected logical address mapped");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
write_lock(&fs_info->mapping_tree.lock);
|
||||
remove_extent_mapping(&fs_info->mapping_tree, em);
|
||||
write_unlock(&fs_info->mapping_tree.lock);
|
||||
/* For us */
|
||||
free_extent_map(em);
|
||||
out_free:
|
||||
/* For the tree */
|
||||
free_extent_map(em);
|
||||
kfree(logical);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_test_extent_map(void)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = NULL;
|
||||
struct extent_map_tree *em_tree;
|
||||
int ret = 0;
|
||||
int ret = 0, i;
|
||||
struct rmap_test_vector rmap_tests[] = {
|
||||
{
|
||||
/*
|
||||
* Test a chunk with 2 data stripes one of which
|
||||
* interesects the physical address of the super block
|
||||
* is correctly recognised.
|
||||
*/
|
||||
.raid_type = BTRFS_BLOCK_GROUP_RAID1,
|
||||
.physical_start = SZ_64M - SZ_4M,
|
||||
.data_stripe_size = SZ_256M,
|
||||
.num_data_stripes = 2,
|
||||
.num_stripes = 2,
|
||||
.data_stripe_phys_start =
|
||||
{SZ_64M - SZ_4M, SZ_64M - SZ_4M + SZ_256M},
|
||||
.expected_mapped_addr = true,
|
||||
.mapped_logical= {SZ_4G + SZ_4M}
|
||||
},
|
||||
{
|
||||
/*
|
||||
* Test that out-of-range physical addresses are
|
||||
* ignored
|
||||
*/
|
||||
|
||||
/* SINGLE chunk type */
|
||||
.raid_type = 0,
|
||||
.physical_start = SZ_4G,
|
||||
.data_stripe_size = SZ_256M,
|
||||
.num_data_stripes = 1,
|
||||
.num_stripes = 1,
|
||||
.data_stripe_phys_start = {SZ_256M},
|
||||
.expected_mapped_addr = false,
|
||||
.mapped_logical = {0}
|
||||
}
|
||||
};
|
||||
|
||||
test_msg("running extent_map tests");
|
||||
|
||||
@ -474,6 +619,13 @@ int btrfs_test_extent_map(void)
|
||||
goto out;
|
||||
ret = test_case_4(fs_info, em_tree);
|
||||
|
||||
test_msg("running rmap tests");
|
||||
for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
|
||||
ret = test_rmap_block(fs_info, &rmap_tests[i]);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(em_tree);
|
||||
btrfs_free_dummy_fs_info(fs_info);
|
||||
|
@ -263,7 +263,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
|
||||
/* First with no extents */
|
||||
BTRFS_I(inode)->root = root;
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
em = NULL;
|
||||
test_err("got an error when we shouldn't have");
|
||||
@ -283,7 +283,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
*/
|
||||
setup_file_extents(root, sectorsize);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, (u64)-1);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -305,7 +305,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -333,7 +333,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -356,7 +356,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
free_extent_map(em);
|
||||
|
||||
/* Regular extent */
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -384,7 +384,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
free_extent_map(em);
|
||||
|
||||
/* The next 3 are split extents */
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -413,7 +413,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -435,7 +435,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -469,7 +469,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
free_extent_map(em);
|
||||
|
||||
/* Prealloc extent */
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -498,7 +498,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
free_extent_map(em);
|
||||
|
||||
/* The next 3 are a half written prealloc extent */
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -528,7 +528,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -561,7 +561,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -596,7 +596,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
free_extent_map(em);
|
||||
|
||||
/* Now for the compressed extent */
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -630,7 +630,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
free_extent_map(em);
|
||||
|
||||
/* Split compressed extent */
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -665,7 +665,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -692,7 +692,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -727,8 +727,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
free_extent_map(em);
|
||||
|
||||
/* A hole between regular extents but no hole extent */
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6,
|
||||
sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset + 6, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -755,7 +754,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -788,7 +787,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -872,7 +871,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
|
||||
insert_inode_item_key(root);
|
||||
insert_extent(root, sectorsize, sectorsize, sectorsize, 0, sectorsize,
|
||||
sectorsize, BTRFS_FILE_EXTENT_REG, 0, 1);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, 0, 2 * sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
@ -894,8 +893,7 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
|
||||
}
|
||||
free_extent_map(em);
|
||||
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize,
|
||||
2 * sectorsize, 0);
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, sectorsize, 2 * sectorsize);
|
||||
if (IS_ERR(em)) {
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
|
@ -147,13 +147,14 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
|
||||
}
|
||||
}
|
||||
|
||||
static noinline void switch_commit_roots(struct btrfs_transaction *trans)
|
||||
static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_root *root, *tmp;
|
||||
|
||||
down_write(&fs_info->commit_root_sem);
|
||||
list_for_each_entry_safe(root, tmp, &trans->switch_commits,
|
||||
list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
|
||||
dirty_list) {
|
||||
list_del_init(&root->dirty_list);
|
||||
free_extent_buffer(root->commit_root);
|
||||
@ -165,16 +166,17 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans)
|
||||
}
|
||||
|
||||
/* We can free old roots now. */
|
||||
spin_lock(&trans->dropped_roots_lock);
|
||||
while (!list_empty(&trans->dropped_roots)) {
|
||||
root = list_first_entry(&trans->dropped_roots,
|
||||
spin_lock(&cur_trans->dropped_roots_lock);
|
||||
while (!list_empty(&cur_trans->dropped_roots)) {
|
||||
root = list_first_entry(&cur_trans->dropped_roots,
|
||||
struct btrfs_root, root_list);
|
||||
list_del_init(&root->root_list);
|
||||
spin_unlock(&trans->dropped_roots_lock);
|
||||
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||
btrfs_free_log(trans, root);
|
||||
btrfs_drop_and_free_fs_root(fs_info, root);
|
||||
spin_lock(&trans->dropped_roots_lock);
|
||||
spin_lock(&cur_trans->dropped_roots_lock);
|
||||
}
|
||||
spin_unlock(&trans->dropped_roots_lock);
|
||||
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||
up_write(&fs_info->commit_root_sem);
|
||||
}
|
||||
|
||||
@ -1421,7 +1423,7 @@ static int qgroup_account_snapshot(struct btrfs_trans_handle *trans,
|
||||
ret = commit_cowonly_roots(trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
switch_commit_roots(trans->transaction);
|
||||
switch_commit_roots(trans);
|
||||
ret = btrfs_write_and_wait_transaction(trans);
|
||||
if (ret)
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
@ -2013,6 +2015,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
|
||||
ASSERT(refcount_read(&trans->use_count) == 1);
|
||||
|
||||
/*
|
||||
* Some places just start a transaction to commit it. We need to make
|
||||
* sure that if this commit fails that the abort code actually marks the
|
||||
* transaction as failed, so set trans->dirty to make the abort code do
|
||||
* the right thing.
|
||||
*/
|
||||
trans->dirty = true;
|
||||
|
||||
/* Stop the commit early if ->aborted is set */
|
||||
if (unlikely(READ_ONCE(cur_trans->aborted))) {
|
||||
ret = cur_trans->aborted;
|
||||
@ -2301,7 +2311,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
|
||||
list_add_tail(&fs_info->chunk_root->dirty_list,
|
||||
&cur_trans->switch_commits);
|
||||
|
||||
switch_commit_roots(cur_trans);
|
||||
switch_commit_roots(trans);
|
||||
|
||||
ASSERT(list_empty(&cur_trans->dirty_bgs));
|
||||
ASSERT(list_empty(&cur_trans->io_bgs));
|
||||
|
@ -373,6 +373,104 @@ static int check_csum_item(struct extent_buffer *leaf, struct btrfs_key *key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Inode item error output has the same format as dir_item_err() */
|
||||
#define inode_item_err(eb, slot, fmt, ...) \
|
||||
dir_item_err(eb, slot, fmt, __VA_ARGS__)
|
||||
|
||||
static int check_inode_key(struct extent_buffer *leaf, struct btrfs_key *key,
|
||||
int slot)
|
||||
{
|
||||
struct btrfs_key item_key;
|
||||
bool is_inode_item;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &item_key, slot);
|
||||
is_inode_item = (item_key.type == BTRFS_INODE_ITEM_KEY);
|
||||
|
||||
/* For XATTR_ITEM, location key should be all 0 */
|
||||
if (item_key.type == BTRFS_XATTR_ITEM_KEY) {
|
||||
if (key->type != 0 || key->objectid != 0 || key->offset != 0)
|
||||
return -EUCLEAN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
|
||||
key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
|
||||
key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
|
||||
key->objectid != BTRFS_FREE_INO_OBJECTID) {
|
||||
if (is_inode_item) {
|
||||
generic_err(leaf, slot,
|
||||
"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
|
||||
key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
|
||||
BTRFS_FIRST_FREE_OBJECTID,
|
||||
BTRFS_LAST_FREE_OBJECTID,
|
||||
BTRFS_FREE_INO_OBJECTID);
|
||||
} else {
|
||||
dir_item_err(leaf, slot,
|
||||
"invalid location key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
|
||||
key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
|
||||
BTRFS_FIRST_FREE_OBJECTID,
|
||||
BTRFS_LAST_FREE_OBJECTID,
|
||||
BTRFS_FREE_INO_OBJECTID);
|
||||
}
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (key->offset != 0) {
|
||||
if (is_inode_item)
|
||||
inode_item_err(leaf, slot,
|
||||
"invalid key offset: has %llu expect 0",
|
||||
key->offset);
|
||||
else
|
||||
dir_item_err(leaf, slot,
|
||||
"invalid location key offset:has %llu expect 0",
|
||||
key->offset);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
|
||||
int slot)
|
||||
{
|
||||
struct btrfs_key item_key;
|
||||
bool is_root_item;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &item_key, slot);
|
||||
is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
|
||||
|
||||
/* No such tree id */
|
||||
if (key->objectid == 0) {
|
||||
if (is_root_item)
|
||||
generic_err(leaf, slot, "invalid root id 0");
|
||||
else
|
||||
dir_item_err(leaf, slot,
|
||||
"invalid location key root id 0");
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* DIR_ITEM/INDEX/INODE_REF is not allowed to point to non-fs trees */
|
||||
if (!is_fstree(key->objectid) && !is_root_item) {
|
||||
dir_item_err(leaf, slot,
|
||||
"invalid location key objectid, have %llu expect [%llu, %llu]",
|
||||
key->objectid, BTRFS_FIRST_FREE_OBJECTID,
|
||||
BTRFS_LAST_FREE_OBJECTID);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* ROOT_ITEM with non-zero offset means this is a snapshot, created at
|
||||
* @offset transid.
|
||||
* Furthermore, for location key in DIR_ITEM, its offset is always -1.
|
||||
*
|
||||
* So here we only check offset for reloc tree whose key->offset must
|
||||
* be a valid tree.
|
||||
*/
|
||||
if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) {
|
||||
generic_err(leaf, slot, "invalid root id 0 for reloc tree");
|
||||
return -EUCLEAN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int check_dir_item(struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, struct btrfs_key *prev_key,
|
||||
int slot)
|
||||
@ -386,12 +484,14 @@ static int check_dir_item(struct extent_buffer *leaf,
|
||||
return -EUCLEAN;
|
||||
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
|
||||
while (cur < item_size) {
|
||||
struct btrfs_key location_key;
|
||||
u32 name_len;
|
||||
u32 data_len;
|
||||
u32 max_name_len;
|
||||
u32 total_size;
|
||||
u32 name_hash;
|
||||
u8 dir_type;
|
||||
int ret;
|
||||
|
||||
/* header itself should not cross item boundary */
|
||||
if (cur + sizeof(*di) > item_size) {
|
||||
@ -401,6 +501,25 @@ static int check_dir_item(struct extent_buffer *leaf,
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* Location key check */
|
||||
btrfs_dir_item_key_to_cpu(leaf, di, &location_key);
|
||||
if (location_key.type == BTRFS_ROOT_ITEM_KEY) {
|
||||
ret = check_root_key(leaf, &location_key, slot);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
} else if (location_key.type == BTRFS_INODE_ITEM_KEY ||
|
||||
location_key.type == 0) {
|
||||
ret = check_inode_key(leaf, &location_key, slot);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
} else {
|
||||
dir_item_err(leaf, slot,
|
||||
"invalid location key type, have %u, expect %u or %u",
|
||||
location_key.type, BTRFS_ROOT_ITEM_KEY,
|
||||
BTRFS_INODE_ITEM_KEY);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/* dir type check */
|
||||
dir_type = btrfs_dir_type(leaf, di);
|
||||
if (dir_type >= BTRFS_FT_MAX) {
|
||||
@ -738,6 +857,44 @@ int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enhanced version of chunk item checker.
|
||||
*
|
||||
* The common btrfs_check_chunk_valid() doesn't check item size since it needs
|
||||
* to work on super block sys_chunk_array which doesn't have full item ptr.
|
||||
*/
|
||||
static int check_leaf_chunk_item(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk,
|
||||
struct btrfs_key *key, int slot)
|
||||
{
|
||||
int num_stripes;
|
||||
|
||||
if (btrfs_item_size_nr(leaf, slot) < sizeof(struct btrfs_chunk)) {
|
||||
chunk_err(leaf, chunk, key->offset,
|
||||
"invalid chunk item size: have %u expect [%zu, %u)",
|
||||
btrfs_item_size_nr(leaf, slot),
|
||||
sizeof(struct btrfs_chunk),
|
||||
BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
|
||||
/* Let btrfs_check_chunk_valid() handle this error type */
|
||||
if (num_stripes == 0)
|
||||
goto out;
|
||||
|
||||
if (btrfs_chunk_item_size(num_stripes) !=
|
||||
btrfs_item_size_nr(leaf, slot)) {
|
||||
chunk_err(leaf, chunk, key->offset,
|
||||
"invalid chunk item size: have %u expect %lu",
|
||||
btrfs_item_size_nr(leaf, slot),
|
||||
btrfs_chunk_item_size(num_stripes));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
out:
|
||||
return btrfs_check_chunk_valid(leaf, chunk, key->offset);
|
||||
}
|
||||
|
||||
__printf(3, 4)
|
||||
__cold
|
||||
static void dev_item_err(const struct extent_buffer *eb, int slot,
|
||||
@ -801,7 +958,7 @@ static int check_dev_item(struct extent_buffer *leaf,
|
||||
}
|
||||
|
||||
/* Inode item error output has the same format as dir_item_err() */
|
||||
#define inode_item_err(fs_info, eb, slot, fmt, ...) \
|
||||
#define inode_item_err(eb, slot, fmt, ...) \
|
||||
dir_item_err(eb, slot, fmt, __VA_ARGS__)
|
||||
|
||||
static int check_inode_item(struct extent_buffer *leaf,
|
||||
@ -812,30 +969,17 @@ static int check_inode_item(struct extent_buffer *leaf,
|
||||
u64 super_gen = btrfs_super_generation(fs_info->super_copy);
|
||||
u32 valid_mask = (S_IFMT | S_ISUID | S_ISGID | S_ISVTX | 0777);
|
||||
u32 mode;
|
||||
int ret;
|
||||
|
||||
ret = check_inode_key(leaf, key, slot);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if ((key->objectid < BTRFS_FIRST_FREE_OBJECTID ||
|
||||
key->objectid > BTRFS_LAST_FREE_OBJECTID) &&
|
||||
key->objectid != BTRFS_ROOT_TREE_DIR_OBJECTID &&
|
||||
key->objectid != BTRFS_FREE_INO_OBJECTID) {
|
||||
generic_err(leaf, slot,
|
||||
"invalid key objectid: has %llu expect %llu or [%llu, %llu] or %llu",
|
||||
key->objectid, BTRFS_ROOT_TREE_DIR_OBJECTID,
|
||||
BTRFS_FIRST_FREE_OBJECTID,
|
||||
BTRFS_LAST_FREE_OBJECTID,
|
||||
BTRFS_FREE_INO_OBJECTID);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (key->offset != 0) {
|
||||
inode_item_err(fs_info, leaf, slot,
|
||||
"invalid key offset: has %llu expect 0",
|
||||
key->offset);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
iitem = btrfs_item_ptr(leaf, slot, struct btrfs_inode_item);
|
||||
|
||||
/* Here we use super block generation + 1 to handle log tree */
|
||||
if (btrfs_inode_generation(leaf, iitem) > super_gen + 1) {
|
||||
inode_item_err(fs_info, leaf, slot,
|
||||
inode_item_err(leaf, slot,
|
||||
"invalid inode generation: has %llu expect (0, %llu]",
|
||||
btrfs_inode_generation(leaf, iitem),
|
||||
super_gen + 1);
|
||||
@ -843,7 +987,7 @@ static int check_inode_item(struct extent_buffer *leaf,
|
||||
}
|
||||
/* Note for ROOT_TREE_DIR_ITEM, mkfs could set its transid 0 */
|
||||
if (btrfs_inode_transid(leaf, iitem) > super_gen + 1) {
|
||||
inode_item_err(fs_info, leaf, slot,
|
||||
inode_item_err(leaf, slot,
|
||||
"invalid inode generation: has %llu expect [0, %llu]",
|
||||
btrfs_inode_transid(leaf, iitem), super_gen + 1);
|
||||
return -EUCLEAN;
|
||||
@ -856,7 +1000,7 @@ static int check_inode_item(struct extent_buffer *leaf,
|
||||
*/
|
||||
mode = btrfs_inode_mode(leaf, iitem);
|
||||
if (mode & ~valid_mask) {
|
||||
inode_item_err(fs_info, leaf, slot,
|
||||
inode_item_err(leaf, slot,
|
||||
"unknown mode bit detected: 0x%x",
|
||||
mode & ~valid_mask);
|
||||
return -EUCLEAN;
|
||||
@ -869,20 +1013,20 @@ static int check_inode_item(struct extent_buffer *leaf,
|
||||
*/
|
||||
if (!has_single_bit_set(mode & S_IFMT)) {
|
||||
if (!S_ISLNK(mode) && !S_ISBLK(mode) && !S_ISSOCK(mode)) {
|
||||
inode_item_err(fs_info, leaf, slot,
|
||||
inode_item_err(leaf, slot,
|
||||
"invalid mode: has 0%o expect valid S_IF* bit(s)",
|
||||
mode & S_IFMT);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
}
|
||||
if (S_ISDIR(mode) && btrfs_inode_nlink(leaf, iitem) > 1) {
|
||||
inode_item_err(fs_info, leaf, slot,
|
||||
inode_item_err(leaf, slot,
|
||||
"invalid nlink: has %u expect no more than 1 for dir",
|
||||
btrfs_inode_nlink(leaf, iitem));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
if (btrfs_inode_flags(leaf, iitem) & ~BTRFS_INODE_FLAG_MASK) {
|
||||
inode_item_err(fs_info, leaf, slot,
|
||||
inode_item_err(leaf, slot,
|
||||
"unknown flags detected: 0x%llx",
|
||||
btrfs_inode_flags(leaf, iitem) &
|
||||
~BTRFS_INODE_FLAG_MASK);
|
||||
@ -898,22 +1042,11 @@ static int check_root_item(struct extent_buffer *leaf, struct btrfs_key *key,
|
||||
struct btrfs_root_item ri;
|
||||
const u64 valid_root_flags = BTRFS_ROOT_SUBVOL_RDONLY |
|
||||
BTRFS_ROOT_SUBVOL_DEAD;
|
||||
int ret;
|
||||
|
||||
/* No such tree id */
|
||||
if (key->objectid == 0) {
|
||||
generic_err(leaf, slot, "invalid root id 0");
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some older kernel may create ROOT_ITEM with non-zero offset, so here
|
||||
* we only check offset for reloc tree whose key->offset must be a
|
||||
* valid tree.
|
||||
*/
|
||||
if (key->objectid == BTRFS_TREE_RELOC_OBJECTID && key->offset == 0) {
|
||||
generic_err(leaf, slot, "invalid root id 0 for reloc tree");
|
||||
return -EUCLEAN;
|
||||
}
|
||||
ret = check_root_key(leaf, key, slot);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (btrfs_item_size_nr(leaf, slot) != sizeof(ri)) {
|
||||
generic_err(leaf, slot,
|
||||
@ -1302,8 +1435,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define inode_ref_err(fs_info, eb, slot, fmt, args...) \
|
||||
inode_item_err(fs_info, eb, slot, fmt, ##args)
|
||||
#define inode_ref_err(eb, slot, fmt, args...) \
|
||||
inode_item_err(eb, slot, fmt, ##args)
|
||||
static int check_inode_ref(struct extent_buffer *leaf,
|
||||
struct btrfs_key *key, struct btrfs_key *prev_key,
|
||||
int slot)
|
||||
@ -1316,7 +1449,7 @@ static int check_inode_ref(struct extent_buffer *leaf,
|
||||
return -EUCLEAN;
|
||||
/* namelen can't be 0, so item_size == sizeof() is also invalid */
|
||||
if (btrfs_item_size_nr(leaf, slot) <= sizeof(*iref)) {
|
||||
inode_ref_err(fs_info, leaf, slot,
|
||||
inode_ref_err(leaf, slot,
|
||||
"invalid item size, have %u expect (%zu, %u)",
|
||||
btrfs_item_size_nr(leaf, slot),
|
||||
sizeof(*iref), BTRFS_LEAF_DATA_SIZE(leaf->fs_info));
|
||||
@ -1329,7 +1462,7 @@ static int check_inode_ref(struct extent_buffer *leaf,
|
||||
u16 namelen;
|
||||
|
||||
if (ptr + sizeof(iref) > end) {
|
||||
inode_ref_err(fs_info, leaf, slot,
|
||||
inode_ref_err(leaf, slot,
|
||||
"inode ref overflow, ptr %lu end %lu inode_ref_size %zu",
|
||||
ptr, end, sizeof(iref));
|
||||
return -EUCLEAN;
|
||||
@ -1338,7 +1471,7 @@ static int check_inode_ref(struct extent_buffer *leaf,
|
||||
iref = (struct btrfs_inode_ref *)ptr;
|
||||
namelen = btrfs_inode_ref_name_len(leaf, iref);
|
||||
if (ptr + sizeof(*iref) + namelen > end) {
|
||||
inode_ref_err(fs_info, leaf, slot,
|
||||
inode_ref_err(leaf, slot,
|
||||
"inode ref overflow, ptr %lu end %lu namelen %u",
|
||||
ptr, end, namelen);
|
||||
return -EUCLEAN;
|
||||
@ -1384,7 +1517,7 @@ static int check_leaf_item(struct extent_buffer *leaf,
|
||||
break;
|
||||
case BTRFS_CHUNK_ITEM_KEY:
|
||||
chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
|
||||
ret = btrfs_check_chunk_valid(leaf, chunk, key->offset);
|
||||
ret = check_leaf_chunk_item(leaf, chunk, key, slot);
|
||||
break;
|
||||
case BTRFS_DEV_ITEM_KEY:
|
||||
ret = check_dev_item(leaf, key, slot);
|
||||
|
@ -2674,14 +2674,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
u32 blocksize;
|
||||
int ret = 0;
|
||||
|
||||
WARN_ON(*level < 0);
|
||||
WARN_ON(*level >= BTRFS_MAX_LEVEL);
|
||||
|
||||
while (*level > 0) {
|
||||
struct btrfs_key first_key;
|
||||
|
||||
WARN_ON(*level < 0);
|
||||
WARN_ON(*level >= BTRFS_MAX_LEVEL);
|
||||
cur = path->nodes[*level];
|
||||
|
||||
WARN_ON(btrfs_header_level(cur) != *level);
|
||||
@ -2732,9 +2727,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
|
||||
WARN_ON(root_owner !=
|
||||
BTRFS_TREE_LOG_OBJECTID);
|
||||
ret = btrfs_free_and_pin_reserved_extent(
|
||||
fs_info, bytenr,
|
||||
blocksize);
|
||||
ret = btrfs_pin_reserved_extent(fs_info,
|
||||
bytenr, blocksize);
|
||||
if (ret) {
|
||||
free_extent_buffer(next);
|
||||
return ret;
|
||||
@ -2749,7 +2743,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
WARN_ON(*level <= 0);
|
||||
if (path->nodes[*level-1])
|
||||
free_extent_buffer(path->nodes[*level-1]);
|
||||
path->nodes[*level-1] = next;
|
||||
@ -2757,9 +2750,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
|
||||
path->slots[*level] = 0;
|
||||
cond_resched();
|
||||
}
|
||||
WARN_ON(*level < 0);
|
||||
WARN_ON(*level >= BTRFS_MAX_LEVEL);
|
||||
|
||||
path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
|
||||
|
||||
cond_resched();
|
||||
@ -2815,8 +2805,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
|
||||
ret = btrfs_free_and_pin_reserved_extent(
|
||||
fs_info,
|
||||
ret = btrfs_pin_reserved_extent(fs_info,
|
||||
path->nodes[*level]->start,
|
||||
path->nodes[*level]->len);
|
||||
if (ret)
|
||||
@ -2896,10 +2885,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
|
||||
clear_extent_buffer_dirty(next);
|
||||
}
|
||||
|
||||
WARN_ON(log->root_key.objectid !=
|
||||
BTRFS_TREE_LOG_OBJECTID);
|
||||
ret = btrfs_free_and_pin_reserved_extent(fs_info,
|
||||
next->start, next->len);
|
||||
ret = btrfs_pin_reserved_extent(fs_info, next->start,
|
||||
next->len);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@ -3935,7 +3922,7 @@ static int log_csums(struct btrfs_trans_handle *trans,
|
||||
static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode,
|
||||
struct btrfs_path *dst_path,
|
||||
struct btrfs_path *src_path, u64 *last_extent,
|
||||
struct btrfs_path *src_path,
|
||||
int start_slot, int nr, int inode_only,
|
||||
u64 logged_isize)
|
||||
{
|
||||
@ -3946,7 +3933,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_file_extent_item *extent;
|
||||
struct btrfs_inode_item *inode_item;
|
||||
struct extent_buffer *src = src_path->nodes[0];
|
||||
struct btrfs_key first_key, last_key, key;
|
||||
int ret;
|
||||
struct btrfs_key *ins_keys;
|
||||
u32 *ins_sizes;
|
||||
@ -3954,9 +3940,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
int i;
|
||||
struct list_head ordered_sums;
|
||||
int skip_csum = inode->flags & BTRFS_INODE_NODATASUM;
|
||||
bool has_extents = false;
|
||||
bool need_find_last_extent = true;
|
||||
bool done = false;
|
||||
|
||||
INIT_LIST_HEAD(&ordered_sums);
|
||||
|
||||
@ -3965,8 +3948,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
if (!ins_data)
|
||||
return -ENOMEM;
|
||||
|
||||
first_key.objectid = (u64)-1;
|
||||
|
||||
ins_sizes = (u32 *)ins_data;
|
||||
ins_keys = (struct btrfs_key *)(ins_data + nr * sizeof(u32));
|
||||
|
||||
@ -3987,9 +3968,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
|
||||
src_offset = btrfs_item_ptr_offset(src, start_slot + i);
|
||||
|
||||
if (i == nr - 1)
|
||||
last_key = ins_keys[i];
|
||||
|
||||
if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) {
|
||||
inode_item = btrfs_item_ptr(dst_path->nodes[0],
|
||||
dst_path->slots[0],
|
||||
@ -4003,20 +3981,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
src_offset, ins_sizes[i]);
|
||||
}
|
||||
|
||||
/*
|
||||
* We set need_find_last_extent here in case we know we were
|
||||
* processing other items and then walk into the first extent in
|
||||
* the inode. If we don't hit an extent then nothing changes,
|
||||
* we'll do the last search the next time around.
|
||||
*/
|
||||
if (ins_keys[i].type == BTRFS_EXTENT_DATA_KEY) {
|
||||
has_extents = true;
|
||||
if (first_key.objectid == (u64)-1)
|
||||
first_key = ins_keys[i];
|
||||
} else {
|
||||
need_find_last_extent = false;
|
||||
}
|
||||
|
||||
/* take a reference on file data extents so that truncates
|
||||
* or deletes of this inode don't have to relog the inode
|
||||
* again
|
||||
@ -4082,167 +4046,6 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
|
||||
kfree(sums);
|
||||
}
|
||||
|
||||
if (!has_extents)
|
||||
return ret;
|
||||
|
||||
if (need_find_last_extent && *last_extent == first_key.offset) {
|
||||
/*
|
||||
* We don't have any leafs between our current one and the one
|
||||
* we processed before that can have file extent items for our
|
||||
* inode (and have a generation number smaller than our current
|
||||
* transaction id).
|
||||
*/
|
||||
need_find_last_extent = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Because we use btrfs_search_forward we could skip leaves that were
|
||||
* not modified and then assume *last_extent is valid when it really
|
||||
* isn't. So back up to the previous leaf and read the end of the last
|
||||
* extent before we go and fill in holes.
|
||||
*/
|
||||
if (need_find_last_extent) {
|
||||
u64 len;
|
||||
|
||||
ret = btrfs_prev_leaf(inode->root, src_path);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret)
|
||||
goto fill_holes;
|
||||
if (src_path->slots[0])
|
||||
src_path->slots[0]--;
|
||||
src = src_path->nodes[0];
|
||||
btrfs_item_key_to_cpu(src, &key, src_path->slots[0]);
|
||||
if (key.objectid != btrfs_ino(inode) ||
|
||||
key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
goto fill_holes;
|
||||
extent = btrfs_item_ptr(src, src_path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
if (btrfs_file_extent_type(src, extent) ==
|
||||
BTRFS_FILE_EXTENT_INLINE) {
|
||||
len = btrfs_file_extent_ram_bytes(src, extent);
|
||||
*last_extent = ALIGN(key.offset + len,
|
||||
fs_info->sectorsize);
|
||||
} else {
|
||||
len = btrfs_file_extent_num_bytes(src, extent);
|
||||
*last_extent = key.offset + len;
|
||||
}
|
||||
}
|
||||
fill_holes:
|
||||
/* So we did prev_leaf, now we need to move to the next leaf, but a few
|
||||
* things could have happened
|
||||
*
|
||||
* 1) A merge could have happened, so we could currently be on a leaf
|
||||
* that holds what we were copying in the first place.
|
||||
* 2) A split could have happened, and now not all of the items we want
|
||||
* are on the same leaf.
|
||||
*
|
||||
* So we need to adjust how we search for holes, we need to drop the
|
||||
* path and re-search for the first extent key we found, and then walk
|
||||
* forward until we hit the last one we copied.
|
||||
*/
|
||||
if (need_find_last_extent) {
|
||||
/* btrfs_prev_leaf could return 1 without releasing the path */
|
||||
btrfs_release_path(src_path);
|
||||
ret = btrfs_search_slot(NULL, inode->root, &first_key,
|
||||
src_path, 0, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ASSERT(ret == 0);
|
||||
src = src_path->nodes[0];
|
||||
i = src_path->slots[0];
|
||||
} else {
|
||||
i = start_slot;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ok so here we need to go through and fill in any holes we may have
|
||||
* to make sure that holes are punched for those areas in case they had
|
||||
* extents previously.
|
||||
*/
|
||||
while (!done) {
|
||||
u64 offset, len;
|
||||
u64 extent_end;
|
||||
|
||||
if (i >= btrfs_header_nritems(src_path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(inode->root, src_path);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ASSERT(ret == 0);
|
||||
src = src_path->nodes[0];
|
||||
i = 0;
|
||||
need_find_last_extent = true;
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(src, &key, i);
|
||||
if (!btrfs_comp_cpu_keys(&key, &last_key))
|
||||
done = true;
|
||||
if (key.objectid != btrfs_ino(inode) ||
|
||||
key.type != BTRFS_EXTENT_DATA_KEY) {
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
extent = btrfs_item_ptr(src, i, struct btrfs_file_extent_item);
|
||||
if (btrfs_file_extent_type(src, extent) ==
|
||||
BTRFS_FILE_EXTENT_INLINE) {
|
||||
len = btrfs_file_extent_ram_bytes(src, extent);
|
||||
extent_end = ALIGN(key.offset + len,
|
||||
fs_info->sectorsize);
|
||||
} else {
|
||||
len = btrfs_file_extent_num_bytes(src, extent);
|
||||
extent_end = key.offset + len;
|
||||
}
|
||||
i++;
|
||||
|
||||
if (*last_extent == key.offset) {
|
||||
*last_extent = extent_end;
|
||||
continue;
|
||||
}
|
||||
offset = *last_extent;
|
||||
len = key.offset - *last_extent;
|
||||
ret = btrfs_insert_file_extent(trans, log, btrfs_ino(inode),
|
||||
offset, 0, 0, len, 0, len, 0, 0, 0);
|
||||
if (ret)
|
||||
break;
|
||||
*last_extent = extent_end;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if there is a hole between the last extent found in our leaf
|
||||
* and the first extent in the next leaf. If there is one, we need to
|
||||
* log an explicit hole so that at replay time we can punch the hole.
|
||||
*/
|
||||
if (ret == 0 &&
|
||||
key.objectid == btrfs_ino(inode) &&
|
||||
key.type == BTRFS_EXTENT_DATA_KEY &&
|
||||
i == btrfs_header_nritems(src_path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(inode->root, src_path);
|
||||
need_find_last_extent = true;
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
} else if (ret == 0) {
|
||||
btrfs_item_key_to_cpu(src_path->nodes[0], &key,
|
||||
src_path->slots[0]);
|
||||
if (key.objectid == btrfs_ino(inode) &&
|
||||
key.type == BTRFS_EXTENT_DATA_KEY &&
|
||||
*last_extent < key.offset) {
|
||||
const u64 len = key.offset - *last_extent;
|
||||
|
||||
ret = btrfs_insert_file_extent(trans, log,
|
||||
btrfs_ino(inode),
|
||||
*last_extent, 0,
|
||||
0, len, 0, len,
|
||||
0, 0, 0);
|
||||
*last_extent += len;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Need to let the callers know we dropped the path so they should
|
||||
* re-search.
|
||||
*/
|
||||
if (!ret && need_find_last_extent)
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -4407,7 +4210,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
||||
const u64 i_size = i_size_read(&inode->vfs_inode);
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
struct btrfs_path *dst_path = NULL;
|
||||
u64 last_extent = (u64)-1;
|
||||
bool dropped_extents = false;
|
||||
int ins_nr = 0;
|
||||
int start_slot;
|
||||
int ret;
|
||||
@ -4429,8 +4232,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
||||
if (slot >= btrfs_header_nritems(leaf)) {
|
||||
if (ins_nr > 0) {
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
&last_extent, start_slot,
|
||||
ins_nr, 1, 0);
|
||||
start_slot, ins_nr, 1, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ins_nr = 0;
|
||||
@ -4454,8 +4256,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
||||
path->slots[0]++;
|
||||
continue;
|
||||
}
|
||||
if (last_extent == (u64)-1) {
|
||||
last_extent = key.offset;
|
||||
if (!dropped_extents) {
|
||||
/*
|
||||
* Avoid logging extent items logged in past fsync calls
|
||||
* and leading to duplicate keys in the log tree.
|
||||
@ -4469,6 +4270,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
||||
} while (ret == -EAGAIN);
|
||||
if (ret)
|
||||
goto out;
|
||||
dropped_extents = true;
|
||||
}
|
||||
if (ins_nr == 0)
|
||||
start_slot = slot;
|
||||
@ -4483,7 +4285,7 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
}
|
||||
if (ins_nr > 0) {
|
||||
ret = copy_items(trans, inode, dst_path, path, &last_extent,
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
start_slot, ins_nr, 1, 0);
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
@ -4670,13 +4472,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (slot >= nritems) {
|
||||
if (ins_nr > 0) {
|
||||
u64 last_extent = 0;
|
||||
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
&last_extent, start_slot,
|
||||
ins_nr, 1, 0);
|
||||
/* can't be 1, extent items aren't processed */
|
||||
ASSERT(ret <= 0);
|
||||
start_slot, ins_nr, 1, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ins_nr = 0;
|
||||
@ -4700,13 +4497,8 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
||||
cond_resched();
|
||||
}
|
||||
if (ins_nr > 0) {
|
||||
u64 last_extent = 0;
|
||||
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
&last_extent, start_slot,
|
||||
ins_nr, 1, 0);
|
||||
/* can't be 1, extent items aren't processed */
|
||||
ASSERT(ret <= 0);
|
||||
start_slot, ins_nr, 1, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
@ -4715,100 +4507,119 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
/*
|
||||
* If the no holes feature is enabled we need to make sure any hole between the
|
||||
* last extent and the i_size of our inode is explicitly marked in the log. This
|
||||
* is to make sure that doing something like:
|
||||
*
|
||||
* 1) create file with 128Kb of data
|
||||
* 2) truncate file to 64Kb
|
||||
* 3) truncate file to 256Kb
|
||||
* 4) fsync file
|
||||
* 5) <crash/power failure>
|
||||
* 6) mount fs and trigger log replay
|
||||
*
|
||||
* Will give us a file with a size of 256Kb, the first 64Kb of data match what
|
||||
* the file had in its first 64Kb of data at step 1 and the last 192Kb of the
|
||||
* file correspond to a hole. The presence of explicit holes in a log tree is
|
||||
* what guarantees that log replay will remove/adjust file extent items in the
|
||||
* fs/subvol tree.
|
||||
*
|
||||
* Here we do not need to care about holes between extents, that is already done
|
||||
* by copy_items(). We also only need to do this in the full sync path, where we
|
||||
* lookup for extents from the fs/subvol tree only. In the fast path case, we
|
||||
* lookup the list of modified extent maps and if any represents a hole, we
|
||||
* insert a corresponding extent representing a hole in the log tree.
|
||||
* When using the NO_HOLES feature if we punched a hole that causes the
|
||||
* deletion of entire leafs or all the extent items of the first leaf (the one
|
||||
* that contains the inode item and references) we may end up not processing
|
||||
* any extents, because there are no leafs with a generation matching the
|
||||
* current transaction that have extent items for our inode. So we need to find
|
||||
* if any holes exist and then log them. We also need to log holes after any
|
||||
* truncate operation that changes the inode's size.
|
||||
*/
|
||||
static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_inode *inode,
|
||||
struct btrfs_path *path)
|
||||
static int btrfs_log_holes(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_inode *inode,
|
||||
struct btrfs_path *path)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
int ret;
|
||||
struct btrfs_key key;
|
||||
u64 hole_start;
|
||||
u64 hole_size;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_root *log = root->log_root;
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
const u64 i_size = i_size_read(&inode->vfs_inode);
|
||||
u64 prev_extent_end = 0;
|
||||
int ret;
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, NO_HOLES))
|
||||
if (!btrfs_fs_incompat(fs_info, NO_HOLES) || i_size == 0)
|
||||
return 0;
|
||||
|
||||
key.objectid = ino;
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = (u64)-1;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
ASSERT(ret != 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ASSERT(path->slots[0] > 0);
|
||||
path->slots[0]--;
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
|
||||
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
|
||||
/* inode does not have any extents */
|
||||
hole_start = 0;
|
||||
hole_size = i_size;
|
||||
} else {
|
||||
while (true) {
|
||||
struct btrfs_file_extent_item *extent;
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
u64 len;
|
||||
|
||||
/*
|
||||
* If there's an extent beyond i_size, an explicit hole was
|
||||
* already inserted by copy_items().
|
||||
*/
|
||||
if (key.offset >= i_size)
|
||||
return 0;
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
}
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
break;
|
||||
|
||||
/* We have a hole, log it. */
|
||||
if (prev_extent_end < key.offset) {
|
||||
const u64 hole_len = key.offset - prev_extent_end;
|
||||
|
||||
/*
|
||||
* Release the path to avoid deadlocks with other code
|
||||
* paths that search the root while holding locks on
|
||||
* leafs from the log root.
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
ret = btrfs_insert_file_extent(trans, root->log_root,
|
||||
ino, prev_extent_end, 0,
|
||||
0, hole_len, 0, hole_len,
|
||||
0, 0, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* Search for the same key again in the root. Since it's
|
||||
* an extent item and we are holding the inode lock, the
|
||||
* key must still exist. If it doesn't just emit warning
|
||||
* and return an error to fall back to a transaction
|
||||
* commit.
|
||||
*/
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (WARN_ON(ret > 0))
|
||||
return -ENOENT;
|
||||
leaf = path->nodes[0];
|
||||
}
|
||||
|
||||
extent = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
|
||||
if (btrfs_file_extent_type(leaf, extent) ==
|
||||
BTRFS_FILE_EXTENT_INLINE)
|
||||
return 0;
|
||||
BTRFS_FILE_EXTENT_INLINE) {
|
||||
len = btrfs_file_extent_ram_bytes(leaf, extent);
|
||||
prev_extent_end = ALIGN(key.offset + len,
|
||||
fs_info->sectorsize);
|
||||
} else {
|
||||
len = btrfs_file_extent_num_bytes(leaf, extent);
|
||||
prev_extent_end = key.offset + len;
|
||||
}
|
||||
|
||||
len = btrfs_file_extent_num_bytes(leaf, extent);
|
||||
/* Last extent goes beyond i_size, no need to log a hole. */
|
||||
if (key.offset + len > i_size)
|
||||
return 0;
|
||||
hole_start = key.offset + len;
|
||||
hole_size = i_size - hole_start;
|
||||
path->slots[0]++;
|
||||
cond_resched();
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
|
||||
/* Last extent ends at i_size. */
|
||||
if (hole_size == 0)
|
||||
return 0;
|
||||
if (prev_extent_end < i_size) {
|
||||
u64 hole_len;
|
||||
|
||||
hole_size = ALIGN(hole_size, fs_info->sectorsize);
|
||||
ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
|
||||
hole_size, 0, hole_size, 0, 0, 0);
|
||||
return ret;
|
||||
btrfs_release_path(path);
|
||||
hole_len = ALIGN(i_size - prev_extent_end, fs_info->sectorsize);
|
||||
ret = btrfs_insert_file_extent(trans, root->log_root,
|
||||
ino, prev_extent_end, 0, 0,
|
||||
hole_len, 0, hole_len,
|
||||
0, 0, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5011,6 +4822,50 @@ static int log_conflicting_inodes(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* If the inode was already logged skip it - otherwise we can
|
||||
* hit an infinite loop. Example:
|
||||
*
|
||||
* From the commit root (previous transaction) we have the
|
||||
* following inodes:
|
||||
*
|
||||
* inode 257 a directory
|
||||
* inode 258 with references "zz" and "zz_link" on inode 257
|
||||
* inode 259 with reference "a" on inode 257
|
||||
*
|
||||
* And in the current (uncommitted) transaction we have:
|
||||
*
|
||||
* inode 257 a directory, unchanged
|
||||
* inode 258 with references "a" and "a2" on inode 257
|
||||
* inode 259 with reference "zz_link" on inode 257
|
||||
* inode 261 with reference "zz" on inode 257
|
||||
*
|
||||
* When logging inode 261 the following infinite loop could
|
||||
* happen if we don't skip already logged inodes:
|
||||
*
|
||||
* - we detect inode 258 as a conflicting inode, with inode 261
|
||||
* on reference "zz", and log it;
|
||||
*
|
||||
* - we detect inode 259 as a conflicting inode, with inode 258
|
||||
* on reference "a", and log it;
|
||||
*
|
||||
* - we detect inode 258 as a conflicting inode, with inode 259
|
||||
* on reference "zz_link", and log it - again! After this we
|
||||
* repeat the above steps forever.
|
||||
*/
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
/*
|
||||
* Check the inode's logged_trans only instead of
|
||||
* btrfs_inode_in_log(). This is because the last_log_commit of
|
||||
* the inode is not updated when we only log that it exists and
|
||||
* and it has the full sync bit set (see btrfs_log_inode()).
|
||||
*/
|
||||
if (BTRFS_I(inode)->logged_trans == trans->transid) {
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
btrfs_add_delayed_iput(inode);
|
||||
continue;
|
||||
}
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
/*
|
||||
* We are safe logging the other inode without acquiring its
|
||||
* lock as long as we log with the LOG_INODE_EXISTS mode. We
|
||||
@ -5110,7 +4965,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_key min_key;
|
||||
struct btrfs_key max_key;
|
||||
struct btrfs_root *log = root->log_root;
|
||||
u64 last_extent = 0;
|
||||
int err = 0;
|
||||
int ret;
|
||||
int nritems;
|
||||
@ -5288,7 +5142,7 @@ again:
|
||||
ins_start_slot = path->slots[0];
|
||||
}
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
&last_extent, ins_start_slot,
|
||||
ins_start_slot,
|
||||
ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0) {
|
||||
@ -5311,17 +5165,13 @@ again:
|
||||
if (ins_nr == 0)
|
||||
goto next_slot;
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
&last_extent, ins_start_slot,
|
||||
ins_start_slot,
|
||||
ins_nr, inode_only, logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ins_nr = 0;
|
||||
if (ret) {
|
||||
btrfs_release_path(path);
|
||||
continue;
|
||||
}
|
||||
goto next_slot;
|
||||
}
|
||||
|
||||
@ -5334,18 +5184,13 @@ again:
|
||||
goto next_slot;
|
||||
}
|
||||
|
||||
ret = copy_items(trans, inode, dst_path, path, &last_extent,
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot, ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
if (ret) {
|
||||
ins_nr = 0;
|
||||
btrfs_release_path(path);
|
||||
continue;
|
||||
}
|
||||
ins_nr = 1;
|
||||
ins_start_slot = path->slots[0];
|
||||
next_slot:
|
||||
@ -5359,13 +5204,12 @@ next_slot:
|
||||
}
|
||||
if (ins_nr) {
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
&last_extent, ins_start_slot,
|
||||
ins_start_slot,
|
||||
ins_nr, inode_only, logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ret = 0;
|
||||
ins_nr = 0;
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
@ -5380,14 +5224,13 @@ next_key:
|
||||
}
|
||||
}
|
||||
if (ins_nr) {
|
||||
ret = copy_items(trans, inode, dst_path, path, &last_extent,
|
||||
ret = copy_items(trans, inode, dst_path, path,
|
||||
ins_start_slot, ins_nr, inode_only,
|
||||
logged_isize);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
goto out_unlock;
|
||||
}
|
||||
ret = 0;
|
||||
ins_nr = 0;
|
||||
}
|
||||
|
||||
@ -5400,7 +5243,7 @@ next_key:
|
||||
if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
|
||||
btrfs_release_path(path);
|
||||
btrfs_release_path(dst_path);
|
||||
err = btrfs_log_trailing_hole(trans, root, inode, path);
|
||||
err = btrfs_log_holes(trans, root, inode, path);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "tree-checker.h"
|
||||
#include "space-info.h"
|
||||
#include "block-group.h"
|
||||
#include "discard.h"
|
||||
|
||||
const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
[BTRFS_RAID_RAID10] = {
|
||||
@ -66,6 +67,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
.tolerated_failures = 2,
|
||||
.devs_increment = 3,
|
||||
.ncopies = 3,
|
||||
.nparity = 0,
|
||||
.raid_name = "raid1c3",
|
||||
.bg_flag = BTRFS_BLOCK_GROUP_RAID1C3,
|
||||
.mindev_error = BTRFS_ERROR_DEV_RAID1C3_MIN_NOT_MET,
|
||||
@ -78,6 +80,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
|
||||
.tolerated_failures = 3,
|
||||
.devs_increment = 4,
|
||||
.ncopies = 4,
|
||||
.nparity = 0,
|
||||
.raid_name = "raid1c4",
|
||||
.bg_flag = BTRFS_BLOCK_GROUP_RAID1C4,
|
||||
.mindev_error = BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET,
|
||||
@ -438,39 +441,6 @@ static noinline struct btrfs_fs_devices *find_fsid(
|
||||
|
||||
ASSERT(fsid);
|
||||
|
||||
if (metadata_fsid) {
|
||||
/*
|
||||
* Handle scanned device having completed its fsid change but
|
||||
* belonging to a fs_devices that was created by first scanning
|
||||
* a device which didn't have its fsid/metadata_uuid changed
|
||||
* at all and the CHANGING_FSID_V2 flag set.
|
||||
*/
|
||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||
if (fs_devices->fsid_change &&
|
||||
memcmp(metadata_fsid, fs_devices->fsid,
|
||||
BTRFS_FSID_SIZE) == 0 &&
|
||||
memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
|
||||
BTRFS_FSID_SIZE) == 0) {
|
||||
return fs_devices;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Handle scanned device having completed its fsid change but
|
||||
* belonging to a fs_devices that was created by a device that
|
||||
* has an outdated pair of fsid/metadata_uuid and
|
||||
* CHANGING_FSID_V2 flag set.
|
||||
*/
|
||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||
if (fs_devices->fsid_change &&
|
||||
memcmp(fs_devices->metadata_uuid,
|
||||
fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
|
||||
memcmp(metadata_fsid, fs_devices->metadata_uuid,
|
||||
BTRFS_FSID_SIZE) == 0) {
|
||||
return fs_devices;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle non-split brain cases */
|
||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||
if (metadata_fsid) {
|
||||
@ -486,6 +456,47 @@ static noinline struct btrfs_fs_devices *find_fsid(
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
|
||||
struct btrfs_super_block *disk_super)
|
||||
{
|
||||
|
||||
struct btrfs_fs_devices *fs_devices;
|
||||
|
||||
/*
|
||||
* Handle scanned device having completed its fsid change but
|
||||
* belonging to a fs_devices that was created by first scanning
|
||||
* a device which didn't have its fsid/metadata_uuid changed
|
||||
* at all and the CHANGING_FSID_V2 flag set.
|
||||
*/
|
||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||
if (fs_devices->fsid_change &&
|
||||
memcmp(disk_super->metadata_uuid, fs_devices->fsid,
|
||||
BTRFS_FSID_SIZE) == 0 &&
|
||||
memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
|
||||
BTRFS_FSID_SIZE) == 0) {
|
||||
return fs_devices;
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Handle scanned device having completed its fsid change but
|
||||
* belonging to a fs_devices that was created by a device that
|
||||
* has an outdated pair of fsid/metadata_uuid and
|
||||
* CHANGING_FSID_V2 flag set.
|
||||
*/
|
||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||
if (fs_devices->fsid_change &&
|
||||
memcmp(fs_devices->metadata_uuid,
|
||||
fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
|
||||
memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
|
||||
BTRFS_FSID_SIZE) == 0) {
|
||||
return fs_devices;
|
||||
}
|
||||
}
|
||||
|
||||
return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
|
||||
int flush, struct block_device **bdev,
|
||||
@ -669,7 +680,9 @@ error_brelse:
|
||||
|
||||
/*
|
||||
* Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
|
||||
* being created with a disk that has already completed its fsid change.
|
||||
* being created with a disk that has already completed its fsid change. Such
|
||||
* disk can belong to an fs which has its FSID changed or to one which doesn't.
|
||||
* Handle both cases here.
|
||||
*/
|
||||
static struct btrfs_fs_devices *find_fsid_inprogress(
|
||||
struct btrfs_super_block *disk_super)
|
||||
@ -685,7 +698,7 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return find_fsid(disk_super->fsid, NULL);
|
||||
}
|
||||
|
||||
|
||||
@ -697,17 +710,54 @@ static struct btrfs_fs_devices *find_fsid_changed(
|
||||
/*
|
||||
* Handles the case where scanned device is part of an fs that had
|
||||
* multiple successful changes of FSID but curently device didn't
|
||||
* observe it. Meaning our fsid will be different than theirs.
|
||||
* observe it. Meaning our fsid will be different than theirs. We need
|
||||
* to handle two subcases :
|
||||
* 1 - The fs still continues to have different METADATA/FSID uuids.
|
||||
* 2 - The fs is switched back to its original FSID (METADATA/FSID
|
||||
* are equal).
|
||||
*/
|
||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||
/* Changed UUIDs */
|
||||
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
|
||||
BTRFS_FSID_SIZE) != 0 &&
|
||||
memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
|
||||
BTRFS_FSID_SIZE) == 0 &&
|
||||
memcmp(fs_devices->fsid, disk_super->fsid,
|
||||
BTRFS_FSID_SIZE) != 0) {
|
||||
BTRFS_FSID_SIZE) != 0)
|
||||
return fs_devices;
|
||||
|
||||
/* Unchanged UUIDs */
|
||||
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
|
||||
BTRFS_FSID_SIZE) == 0 &&
|
||||
memcmp(fs_devices->fsid, disk_super->metadata_uuid,
|
||||
BTRFS_FSID_SIZE) == 0)
|
||||
return fs_devices;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct btrfs_fs_devices *find_fsid_reverted_metadata(
|
||||
struct btrfs_super_block *disk_super)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices;
|
||||
|
||||
/*
|
||||
* Handle the case where the scanned device is part of an fs whose last
|
||||
* metadata UUID change reverted it to the original FSID. At the same
|
||||
* time * fs_devices was first created by another constitutent device
|
||||
* which didn't fully observe the operation. This results in an
|
||||
* btrfs_fs_devices created with metadata/fsid different AND
|
||||
* btrfs_fs_devices::fsid_change set AND the metadata_uuid of the
|
||||
* fs_devices equal to the FSID of the disk.
|
||||
*/
|
||||
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
|
||||
if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
|
||||
BTRFS_FSID_SIZE) != 0 &&
|
||||
memcmp(fs_devices->metadata_uuid, disk_super->fsid,
|
||||
BTRFS_FSID_SIZE) == 0 &&
|
||||
fs_devices->fsid_change)
|
||||
return fs_devices;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
@ -734,24 +784,16 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
|
||||
|
||||
if (fsid_change_in_progress) {
|
||||
if (!has_metadata_uuid) {
|
||||
/*
|
||||
* When we have an image which has CHANGING_FSID_V2 set
|
||||
* it might belong to either a filesystem which has
|
||||
* disks with completed fsid change or it might belong
|
||||
* to fs with no UUID changes in effect, handle both.
|
||||
*/
|
||||
if (!has_metadata_uuid)
|
||||
fs_devices = find_fsid_inprogress(disk_super);
|
||||
if (!fs_devices)
|
||||
fs_devices = find_fsid(disk_super->fsid, NULL);
|
||||
} else {
|
||||
else
|
||||
fs_devices = find_fsid_changed(disk_super);
|
||||
}
|
||||
} else if (has_metadata_uuid) {
|
||||
fs_devices = find_fsid(disk_super->fsid,
|
||||
disk_super->metadata_uuid);
|
||||
fs_devices = find_fsid_with_metadata_uuid(disk_super);
|
||||
} else {
|
||||
fs_devices = find_fsid(disk_super->fsid, NULL);
|
||||
fs_devices = find_fsid_reverted_metadata(disk_super);
|
||||
if (!fs_devices)
|
||||
fs_devices = find_fsid(disk_super->fsid, NULL);
|
||||
}
|
||||
|
||||
|
||||
@ -781,12 +823,18 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
* a device which had the CHANGING_FSID_V2 flag then replace the
|
||||
* metadata_uuid/fsid values of the fs_devices.
|
||||
*/
|
||||
if (has_metadata_uuid && fs_devices->fsid_change &&
|
||||
if (fs_devices->fsid_change &&
|
||||
found_transid > fs_devices->latest_generation) {
|
||||
memcpy(fs_devices->fsid, disk_super->fsid,
|
||||
BTRFS_FSID_SIZE);
|
||||
memcpy(fs_devices->metadata_uuid,
|
||||
disk_super->metadata_uuid, BTRFS_FSID_SIZE);
|
||||
|
||||
if (has_metadata_uuid)
|
||||
memcpy(fs_devices->metadata_uuid,
|
||||
disk_super->metadata_uuid,
|
||||
BTRFS_FSID_SIZE);
|
||||
else
|
||||
memcpy(fs_devices->metadata_uuid,
|
||||
disk_super->fsid, BTRFS_FSID_SIZE);
|
||||
|
||||
fs_devices->fsid_change = false;
|
||||
}
|
||||
@ -1064,11 +1112,6 @@ static void btrfs_close_bdev(struct btrfs_device *device)
|
||||
static void btrfs_close_one_device(struct btrfs_device *device)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = device->fs_devices;
|
||||
struct btrfs_device *new_device;
|
||||
struct rcu_string *name;
|
||||
|
||||
if (device->bdev)
|
||||
fs_devices->open_devices--;
|
||||
|
||||
if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
|
||||
device->devid != BTRFS_DEV_REPLACE_DEVID) {
|
||||
@ -1080,23 +1123,22 @@ static void btrfs_close_one_device(struct btrfs_device *device)
|
||||
fs_devices->missing_devices--;
|
||||
|
||||
btrfs_close_bdev(device);
|
||||
|
||||
new_device = btrfs_alloc_device(NULL, &device->devid,
|
||||
device->uuid);
|
||||
BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
|
||||
|
||||
/* Safe because we are under uuid_mutex */
|
||||
if (device->name) {
|
||||
name = rcu_string_strdup(device->name->str, GFP_NOFS);
|
||||
BUG_ON(!name); /* -ENOMEM */
|
||||
rcu_assign_pointer(new_device->name, name);
|
||||
if (device->bdev) {
|
||||
fs_devices->open_devices--;
|
||||
device->bdev = NULL;
|
||||
}
|
||||
clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
|
||||
|
||||
list_replace_rcu(&device->dev_list, &new_device->dev_list);
|
||||
new_device->fs_devices = device->fs_devices;
|
||||
device->fs_info = NULL;
|
||||
atomic_set(&device->dev_stats_ccnt, 0);
|
||||
extent_io_tree_release(&device->alloc_state);
|
||||
|
||||
synchronize_rcu();
|
||||
btrfs_free_device(device);
|
||||
/* Verify the device is back in a pristine state */
|
||||
ASSERT(!test_bit(BTRFS_DEV_STATE_FLUSH_SENT, &device->dev_state));
|
||||
ASSERT(!test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
|
||||
ASSERT(list_empty(&device->dev_alloc_list));
|
||||
ASSERT(list_empty(&device->post_commit_list));
|
||||
ASSERT(atomic_read(&device->reada_in_flight) == 0);
|
||||
}
|
||||
|
||||
static int close_fs_devices(struct btrfs_fs_devices *fs_devices)
|
||||
@ -2130,7 +2172,6 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = tgtdev->fs_info->fs_devices;
|
||||
|
||||
WARN_ON(!tgtdev);
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
|
||||
btrfs_sysfs_rm_device_link(fs_devices, tgtdev);
|
||||
@ -2875,6 +2916,7 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->chunk_root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_block_group *block_group;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -2898,6 +2940,12 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
block_group = btrfs_lookup_block_group(fs_info, chunk_offset);
|
||||
if (!block_group)
|
||||
return -ENOENT;
|
||||
btrfs_discard_cancel_work(&fs_info->discard_ctl, block_group);
|
||||
btrfs_put_block_group(block_group);
|
||||
|
||||
trans = btrfs_start_trans_remove_block_group(root->fs_info,
|
||||
chunk_offset);
|
||||
if (IS_ERR(trans)) {
|
||||
@ -6111,75 +6159,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
|
||||
}
|
||||
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len)
|
||||
{
|
||||
struct extent_map *em;
|
||||
struct map_lookup *map;
|
||||
u64 *buf;
|
||||
u64 bytenr;
|
||||
u64 length;
|
||||
u64 stripe_nr;
|
||||
u64 rmap_len;
|
||||
int i, j, nr = 0;
|
||||
|
||||
em = btrfs_get_chunk_map(fs_info, chunk_start, 1);
|
||||
if (IS_ERR(em))
|
||||
return -EIO;
|
||||
|
||||
map = em->map_lookup;
|
||||
length = em->len;
|
||||
rmap_len = map->stripe_len;
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10)
|
||||
length = div_u64(length, map->num_stripes / map->sub_stripes);
|
||||
else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
|
||||
length = div_u64(length, map->num_stripes);
|
||||
else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
|
||||
length = div_u64(length, nr_data_stripes(map));
|
||||
rmap_len = map->stripe_len * nr_data_stripes(map);
|
||||
}
|
||||
|
||||
buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
|
||||
BUG_ON(!buf); /* -ENOMEM */
|
||||
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
if (map->stripes[i].physical > physical ||
|
||||
map->stripes[i].physical + length <= physical)
|
||||
continue;
|
||||
|
||||
stripe_nr = physical - map->stripes[i].physical;
|
||||
stripe_nr = div64_u64(stripe_nr, map->stripe_len);
|
||||
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
|
||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||
stripe_nr = div_u64(stripe_nr, map->sub_stripes);
|
||||
} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
|
||||
stripe_nr = stripe_nr * map->num_stripes + i;
|
||||
} /* else if RAID[56], multiply by nr_data_stripes().
|
||||
* Alternatively, just use rmap_len below instead of
|
||||
* map->stripe_len */
|
||||
|
||||
bytenr = chunk_start + stripe_nr * rmap_len;
|
||||
WARN_ON(nr >= map->num_stripes);
|
||||
for (j = 0; j < nr; j++) {
|
||||
if (buf[j] == bytenr)
|
||||
break;
|
||||
}
|
||||
if (j == nr) {
|
||||
WARN_ON(nr >= map->num_stripes);
|
||||
buf[nr++] = bytenr;
|
||||
}
|
||||
}
|
||||
|
||||
*logical = buf;
|
||||
*naddrs = nr;
|
||||
*stripe_len = rmap_len;
|
||||
|
||||
free_extent_map(em);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
|
||||
{
|
||||
bio->bi_private = bbio->private;
|
||||
@ -6480,19 +6459,14 @@ static u64 calc_stripe_length(u64 type, u64 chunk_len, int num_stripes)
|
||||
{
|
||||
int index = btrfs_bg_flags_to_raid_index(type);
|
||||
int ncopies = btrfs_raid_array[index].ncopies;
|
||||
const int nparity = btrfs_raid_array[index].nparity;
|
||||
int data_stripes;
|
||||
|
||||
switch (type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
||||
case BTRFS_BLOCK_GROUP_RAID5:
|
||||
data_stripes = num_stripes - 1;
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_RAID6:
|
||||
data_stripes = num_stripes - 2;
|
||||
break;
|
||||
default:
|
||||
if (nparity)
|
||||
data_stripes = num_stripes - nparity;
|
||||
else
|
||||
data_stripes = num_stripes / ncopies;
|
||||
break;
|
||||
}
|
||||
|
||||
return div_u64(chunk_len, data_stripes);
|
||||
}
|
||||
|
||||
@ -7331,6 +7305,8 @@ int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
|
||||
else
|
||||
btrfs_dev_stat_set(dev, i, 0);
|
||||
}
|
||||
btrfs_info(fs_info, "device stats zeroed by %s (%d)",
|
||||
current->comm, task_pid_nr(current));
|
||||
} else {
|
||||
for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
|
||||
if (stats->nr_items > i)
|
||||
|
@ -120,8 +120,6 @@ struct btrfs_device {
|
||||
/* per-device scrub information */
|
||||
struct scrub_ctx *scrub_ctx;
|
||||
|
||||
struct btrfs_work work;
|
||||
|
||||
/* readahead state */
|
||||
atomic_t reada_in_flight;
|
||||
u64 reada_next;
|
||||
@ -138,6 +136,10 @@ struct btrfs_device {
|
||||
atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
|
||||
|
||||
struct extent_io_tree alloc_state;
|
||||
|
||||
struct completion kobj_unregister;
|
||||
/* For sysfs/FSID/devinfo/devid/ */
|
||||
struct kobject devid_kobj;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -255,7 +257,7 @@ struct btrfs_fs_devices {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
/* sysfs kobjects */
|
||||
struct kobject fsid_kobj;
|
||||
struct kobject *device_dir_kobj;
|
||||
struct kobject *devices_kobj;
|
||||
struct completion kobj_unregister;
|
||||
};
|
||||
|
||||
@ -417,8 +419,6 @@ int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
struct btrfs_bio **bbio_ret);
|
||||
int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 len, struct btrfs_io_geometry *io_geom);
|
||||
int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
|
||||
u64 physical, u64 **logical, int *naddrs, int *stripe_len);
|
||||
int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, u64 type);
|
||||
|
@ -456,6 +456,41 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen,
|
||||
return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
|
||||
}
|
||||
|
||||
static inline void bitmap_next_clear_region(unsigned long *bitmap,
|
||||
unsigned int *rs, unsigned int *re,
|
||||
unsigned int end)
|
||||
{
|
||||
*rs = find_next_zero_bit(bitmap, end, *rs);
|
||||
*re = find_next_bit(bitmap, end, *rs + 1);
|
||||
}
|
||||
|
||||
static inline void bitmap_next_set_region(unsigned long *bitmap,
|
||||
unsigned int *rs, unsigned int *re,
|
||||
unsigned int end)
|
||||
{
|
||||
*rs = find_next_bit(bitmap, end, *rs);
|
||||
*re = find_next_zero_bit(bitmap, end, *rs + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bitmap region iterators. Iterates over the bitmap between [@start, @end).
|
||||
* @rs and @re should be integer variables and will be set to start and end
|
||||
* index of the current clear or set region.
|
||||
*/
|
||||
#define bitmap_for_each_clear_region(bitmap, rs, re, start, end) \
|
||||
for ((rs) = (start), \
|
||||
bitmap_next_clear_region((bitmap), &(rs), &(re), (end)); \
|
||||
(rs) < (re); \
|
||||
(rs) = (re) + 1, \
|
||||
bitmap_next_clear_region((bitmap), &(rs), &(re), (end)))
|
||||
|
||||
#define bitmap_for_each_set_region(bitmap, rs, re, start, end) \
|
||||
for ((rs) = (start), \
|
||||
bitmap_next_set_region((bitmap), &(rs), &(re), (end)); \
|
||||
(rs) < (re); \
|
||||
(rs) = (re) + 1, \
|
||||
bitmap_next_set_region((bitmap), &(rs), &(re), (end)))
|
||||
|
||||
/**
|
||||
* BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap.
|
||||
* @n: u64 value
|
||||
|
@ -496,9 +496,9 @@ DECLARE_EVENT_CLASS(btrfs__ordered_extent,
|
||||
TP_fast_assign_btrfs(btrfs_sb(inode->i_sb),
|
||||
__entry->ino = btrfs_ino(BTRFS_I(inode));
|
||||
__entry->file_offset = ordered->file_offset;
|
||||
__entry->start = ordered->start;
|
||||
__entry->len = ordered->len;
|
||||
__entry->disk_len = ordered->disk_len;
|
||||
__entry->start = ordered->disk_bytenr;
|
||||
__entry->len = ordered->num_bytes;
|
||||
__entry->disk_len = ordered->disk_num_bytes;
|
||||
__entry->bytes_left = ordered->bytes_left;
|
||||
__entry->flags = ordered->flags;
|
||||
__entry->compress_type = ordered->compress_type;
|
||||
|
61
mm/percpu.c
61
mm/percpu.c
@ -270,33 +270,6 @@ static unsigned long pcpu_chunk_addr(struct pcpu_chunk *chunk,
|
||||
pcpu_unit_page_offset(cpu, page_idx);
|
||||
}
|
||||
|
||||
static void pcpu_next_unpop(unsigned long *bitmap, int *rs, int *re, int end)
|
||||
{
|
||||
*rs = find_next_zero_bit(bitmap, end, *rs);
|
||||
*re = find_next_bit(bitmap, end, *rs + 1);
|
||||
}
|
||||
|
||||
static void pcpu_next_pop(unsigned long *bitmap, int *rs, int *re, int end)
|
||||
{
|
||||
*rs = find_next_bit(bitmap, end, *rs);
|
||||
*re = find_next_zero_bit(bitmap, end, *rs + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bitmap region iterators. Iterates over the bitmap between
|
||||
* [@start, @end) in @chunk. @rs and @re should be integer variables
|
||||
* and will be set to start and end index of the current free region.
|
||||
*/
|
||||
#define pcpu_for_each_unpop_region(bitmap, rs, re, start, end) \
|
||||
for ((rs) = (start), pcpu_next_unpop((bitmap), &(rs), &(re), (end)); \
|
||||
(rs) < (re); \
|
||||
(rs) = (re) + 1, pcpu_next_unpop((bitmap), &(rs), &(re), (end)))
|
||||
|
||||
#define pcpu_for_each_pop_region(bitmap, rs, re, start, end) \
|
||||
for ((rs) = (start), pcpu_next_pop((bitmap), &(rs), &(re), (end)); \
|
||||
(rs) < (re); \
|
||||
(rs) = (re) + 1, pcpu_next_pop((bitmap), &(rs), &(re), (end)))
|
||||
|
||||
/*
|
||||
* The following are helper functions to help access bitmaps and convert
|
||||
* between bitmap offsets to address offsets.
|
||||
@ -732,9 +705,8 @@ static void pcpu_chunk_refresh_hint(struct pcpu_chunk *chunk, bool full_scan)
|
||||
}
|
||||
|
||||
bits = 0;
|
||||
pcpu_for_each_md_free_region(chunk, bit_off, bits) {
|
||||
pcpu_for_each_md_free_region(chunk, bit_off, bits)
|
||||
pcpu_block_update(chunk_md, bit_off, bit_off + bits);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -749,7 +721,7 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
|
||||
{
|
||||
struct pcpu_block_md *block = chunk->md_blocks + index;
|
||||
unsigned long *alloc_map = pcpu_index_alloc_map(chunk, index);
|
||||
int rs, re, start; /* region start, region end */
|
||||
unsigned int rs, re, start; /* region start, region end */
|
||||
|
||||
/* promote scan_hint to contig_hint */
|
||||
if (block->scan_hint) {
|
||||
@ -765,10 +737,9 @@ static void pcpu_block_refresh_hint(struct pcpu_chunk *chunk, int index)
|
||||
block->right_free = 0;
|
||||
|
||||
/* iterate over free areas and update the contig hints */
|
||||
pcpu_for_each_unpop_region(alloc_map, rs, re, start,
|
||||
PCPU_BITMAP_BLOCK_BITS) {
|
||||
bitmap_for_each_clear_region(alloc_map, rs, re, start,
|
||||
PCPU_BITMAP_BLOCK_BITS)
|
||||
pcpu_block_update(block, rs, re);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1041,13 +1012,13 @@ static void pcpu_block_update_hint_free(struct pcpu_chunk *chunk, int bit_off,
|
||||
static bool pcpu_is_populated(struct pcpu_chunk *chunk, int bit_off, int bits,
|
||||
int *next_off)
|
||||
{
|
||||
int page_start, page_end, rs, re;
|
||||
unsigned int page_start, page_end, rs, re;
|
||||
|
||||
page_start = PFN_DOWN(bit_off * PCPU_MIN_ALLOC_SIZE);
|
||||
page_end = PFN_UP((bit_off + bits) * PCPU_MIN_ALLOC_SIZE);
|
||||
|
||||
rs = page_start;
|
||||
pcpu_next_unpop(chunk->populated, &rs, &re, page_end);
|
||||
bitmap_next_clear_region(chunk->populated, &rs, &re, page_end);
|
||||
if (rs >= page_end)
|
||||
return true;
|
||||
|
||||
@ -1702,13 +1673,13 @@ area_found:
|
||||
|
||||
/* populate if not all pages are already there */
|
||||
if (!is_atomic) {
|
||||
int page_start, page_end, rs, re;
|
||||
unsigned int page_start, page_end, rs, re;
|
||||
|
||||
page_start = PFN_DOWN(off);
|
||||
page_end = PFN_UP(off + size);
|
||||
|
||||
pcpu_for_each_unpop_region(chunk->populated, rs, re,
|
||||
page_start, page_end) {
|
||||
bitmap_for_each_clear_region(chunk->populated, rs, re,
|
||||
page_start, page_end) {
|
||||
WARN_ON(chunk->immutable);
|
||||
|
||||
ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
|
||||
@ -1858,10 +1829,10 @@ static void pcpu_balance_workfn(struct work_struct *work)
|
||||
spin_unlock_irq(&pcpu_lock);
|
||||
|
||||
list_for_each_entry_safe(chunk, next, &to_free, list) {
|
||||
int rs, re;
|
||||
unsigned int rs, re;
|
||||
|
||||
pcpu_for_each_pop_region(chunk->populated, rs, re, 0,
|
||||
chunk->nr_pages) {
|
||||
bitmap_for_each_set_region(chunk->populated, rs, re, 0,
|
||||
chunk->nr_pages) {
|
||||
pcpu_depopulate_chunk(chunk, rs, re);
|
||||
spin_lock_irq(&pcpu_lock);
|
||||
pcpu_chunk_depopulated(chunk, rs, re);
|
||||
@ -1893,7 +1864,7 @@ retry_pop:
|
||||
}
|
||||
|
||||
for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
|
||||
int nr_unpop = 0, rs, re;
|
||||
unsigned int nr_unpop = 0, rs, re;
|
||||
|
||||
if (!nr_to_pop)
|
||||
break;
|
||||
@ -1910,9 +1881,9 @@ retry_pop:
|
||||
continue;
|
||||
|
||||
/* @chunk can't go away while pcpu_alloc_mutex is held */
|
||||
pcpu_for_each_unpop_region(chunk->populated, rs, re, 0,
|
||||
chunk->nr_pages) {
|
||||
int nr = min(re - rs, nr_to_pop);
|
||||
bitmap_for_each_clear_region(chunk->populated, rs, re, 0,
|
||||
chunk->nr_pages) {
|
||||
int nr = min_t(int, re - rs, nr_to_pop);
|
||||
|
||||
ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
|
||||
if (!ret) {
|
||||
|
Loading…
Reference in New Issue
Block a user