btrfs: fiemap: Cache and merge fiemap extent before submit it to user
[BUG] Cycle mount btrfs can cause fiemap to return different result. Like: # mount /dev/vdb5 /mnt/btrfs # dd if=/dev/zero bs=16K count=4 oflag=dsync of=/mnt/btrfs/file # xfs_io -c "fiemap -v" /mnt/btrfs/file /mnt/test/file: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 25088..25215 128 0x1 # umount /mnt/btrfs # mount /dev/vdb5 /mnt/btrfs # xfs_io -c "fiemap -v" /mnt/btrfs/file /mnt/test/file: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..31]: 25088..25119 32 0x0 1: [32..63]: 25120..25151 32 0x0 2: [64..95]: 25152..25183 32 0x0 3: [96..127]: 25184..25215 32 0x1 But after above fiemap, we get correct merged result if we call fiemap again. # xfs_io -c "fiemap -v" /mnt/btrfs/file /mnt/test/file: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 25088..25215 128 0x1 [REASON] Btrfs will try to merge extent map when inserting new extent map. btrfs_fiemap(start=0 len=(u64)-1) |- extent_fiemap(start=0 len=(u64)-1) |- get_extent_skip_holes(start=0 len=64k) | |- btrfs_get_extent_fiemap(start=0 len=64k) | |- btrfs_get_extent(start=0 len=64k) | | Found on-disk (ino, EXTENT_DATA, 0) | |- add_extent_mapping() | |- Return (em->start=0, len=16k) | |- fiemap_fill_next_extent(logic=0 phys=X len=16k) | |- get_extent_skip_holes(start=0 len=64k) | |- btrfs_get_extent_fiemap(start=0 len=64k) | |- btrfs_get_extent(start=16k len=48k) | | Found on-disk (ino, EXTENT_DATA, 16k) | |- add_extent_mapping() | | |- try_merge_map() | | Merge with previous em start=0 len=16k | | resulting em start=0 len=32k | |- Return (em->start=0, len=32K) << Merged result |- Stripe off the unrelated range (0~16K) of return em |- fiemap_fill_next_extent(logic=16K phys=X+16K len=16K) ^^^ Causing split fiemap extent. And since in add_extent_mapping(), em is already merged, in next fiemap() call, we will get merged result. [FIX] Here we introduce a new structure, fiemap_cache, which records previous fiemap extent. And will always try to merge current fiemap_cache result before calling fiemap_fill_next_extent(). Only when we failed to merge current fiemap extent with cached one, we will call fiemap_fill_next_extent() to submit cached one. So by this method, we can merge all fiemap extents. It can also be done in fs/ioctl.c, however the problem is if fieinfo->fi_extents_max == 0, we have no space to cache previous fiemap extent. So I choose to merge it in btrfs. Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Reviewed-by: Liu Bo <bo.li.liu@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
9bcaaea741
commit
4751832da9
@ -4377,6 +4377,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* To cache previous fiemap extent
|
||||||
|
*
|
||||||
|
* Will be used for merging fiemap extent
|
||||||
|
*/
|
||||||
|
struct fiemap_cache {
|
||||||
|
u64 offset;
|
||||||
|
u64 phys;
|
||||||
|
u64 len;
|
||||||
|
u32 flags;
|
||||||
|
bool cached;
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper to submit fiemap extent.
|
||||||
|
*
|
||||||
|
* Will try to merge current fiemap extent specified by @offset, @phys,
|
||||||
|
* @len and @flags with cached one.
|
||||||
|
* And only when we fails to merge, cached one will be submitted as
|
||||||
|
* fiemap extent.
|
||||||
|
*
|
||||||
|
* Return value is the same as fiemap_fill_next_extent().
|
||||||
|
*/
|
||||||
|
static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
|
||||||
|
struct fiemap_cache *cache,
|
||||||
|
u64 offset, u64 phys, u64 len, u32 flags)
|
||||||
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
|
if (!cache->cached)
|
||||||
|
goto assign;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sanity check, extent_fiemap() should have ensured that new
|
||||||
|
* fiemap extent won't overlap with cahced one.
|
||||||
|
* Not recoverable.
|
||||||
|
*
|
||||||
|
* NOTE: Physical address can overlap, due to compression
|
||||||
|
*/
|
||||||
|
if (cache->offset + cache->len > offset) {
|
||||||
|
WARN_ON(1);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Only merges fiemap extents if
|
||||||
|
* 1) Their logical addresses are continuous
|
||||||
|
*
|
||||||
|
* 2) Their physical addresses are continuous
|
||||||
|
* So truly compressed (physical size smaller than logical size)
|
||||||
|
* extents won't get merged with each other
|
||||||
|
*
|
||||||
|
* 3) Share same flags except FIEMAP_EXTENT_LAST
|
||||||
|
* So regular extent won't get merged with prealloc extent
|
||||||
|
*/
|
||||||
|
if (cache->offset + cache->len == offset &&
|
||||||
|
cache->phys + cache->len == phys &&
|
||||||
|
(cache->flags & ~FIEMAP_EXTENT_LAST) ==
|
||||||
|
(flags & ~FIEMAP_EXTENT_LAST)) {
|
||||||
|
cache->len += len;
|
||||||
|
cache->flags |= flags;
|
||||||
|
goto try_submit_last;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Not mergeable, need to submit cached one */
|
||||||
|
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
|
||||||
|
cache->len, cache->flags);
|
||||||
|
cache->cached = false;
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
assign:
|
||||||
|
cache->cached = true;
|
||||||
|
cache->offset = offset;
|
||||||
|
cache->phys = phys;
|
||||||
|
cache->len = len;
|
||||||
|
cache->flags = flags;
|
||||||
|
try_submit_last:
|
||||||
|
if (cache->flags & FIEMAP_EXTENT_LAST) {
|
||||||
|
ret = fiemap_fill_next_extent(fieinfo, cache->offset,
|
||||||
|
cache->phys, cache->len, cache->flags);
|
||||||
|
cache->cached = false;
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Sanity check for fiemap cache
|
||||||
|
*
|
||||||
|
* All fiemap cache should be submitted by emit_fiemap_extent()
|
||||||
|
* Iteration should be terminated either by last fiemap extent or
|
||||||
|
* fieinfo->fi_extents_max.
|
||||||
|
* So no cached fiemap should exist.
|
||||||
|
*/
|
||||||
|
static int check_fiemap_cache(struct btrfs_fs_info *fs_info,
|
||||||
|
struct fiemap_extent_info *fieinfo,
|
||||||
|
struct fiemap_cache *cache)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!cache->cached)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Small and recoverbale problem, only to info developer */
|
||||||
|
#ifdef CONFIG_BTRFS_DEBUG
|
||||||
|
WARN_ON(1);
|
||||||
|
#endif
|
||||||
|
btrfs_warn(fs_info,
|
||||||
|
"unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x",
|
||||||
|
cache->offset, cache->phys, cache->len, cache->flags);
|
||||||
|
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
|
||||||
|
cache->len, cache->flags);
|
||||||
|
cache->cached = false;
|
||||||
|
if (ret > 0)
|
||||||
|
ret = 0;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||||
__u64 start, __u64 len, get_extent_t *get_extent)
|
__u64 start, __u64 len, get_extent_t *get_extent)
|
||||||
{
|
{
|
||||||
@ -4394,6 +4511,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||||||
struct extent_state *cached_state = NULL;
|
struct extent_state *cached_state = NULL;
|
||||||
struct btrfs_path *path;
|
struct btrfs_path *path;
|
||||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||||
|
struct fiemap_cache cache = { 0 };
|
||||||
int end = 0;
|
int end = 0;
|
||||||
u64 em_start = 0;
|
u64 em_start = 0;
|
||||||
u64 em_len = 0;
|
u64 em_len = 0;
|
||||||
@ -4573,8 +4691,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||||||
flags |= FIEMAP_EXTENT_LAST;
|
flags |= FIEMAP_EXTENT_LAST;
|
||||||
end = 1;
|
end = 1;
|
||||||
}
|
}
|
||||||
ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
|
ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko,
|
||||||
em_len, flags);
|
em_len, flags);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
if (ret == 1)
|
if (ret == 1)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
@ -4582,6 +4700,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
out_free:
|
out_free:
|
||||||
|
if (!ret)
|
||||||
|
ret = check_fiemap_cache(root->fs_info, fieinfo, &cache);
|
||||||
free_extent_map(em);
|
free_extent_map(em);
|
||||||
out:
|
out:
|
||||||
btrfs_free_path(path);
|
btrfs_free_path(path);
|
||||||
|
Loading…
Reference in New Issue
Block a user