btrfs: fiemap: Cache and merge fiemap extent before submit it to user
[BUG]
Cycle mount btrfs can cause fiemap to return different result.
Like:
 # mount /dev/vdb5 /mnt/btrfs
 # dd if=/dev/zero bs=16K count=4 oflag=dsync of=/mnt/btrfs/file
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        25088..25215       128   0x1
 # umount /mnt/btrfs
 # mount /dev/vdb5 /mnt/btrfs
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..31]:         25088..25119        32   0x0
   1: [32..63]:        25120..25151        32   0x0
   2: [64..95]:        25152..25183        32   0x0
   3: [96..127]:       25184..25215        32   0x1
But after above fiemap, we get correct merged result if we call fiemap
again.
 # xfs_io -c "fiemap -v" /mnt/btrfs/file
 /mnt/test/file:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..127]:        25088..25215       128   0x1
[REASON]
Btrfs will try to merge extent map when inserting new extent map.
btrfs_fiemap(start=0 len=(u64)-1)
|- extent_fiemap(start=0 len=(u64)-1)
   |- get_extent_skip_holes(start=0 len=64k)
   |  |- btrfs_get_extent_fiemap(start=0 len=64k)
   |     |- btrfs_get_extent(start=0 len=64k)
   |        |  Found on-disk (ino, EXTENT_DATA, 0)
   |        |- add_extent_mapping()
   |        |- Return (em->start=0, len=16k)
   |
   |- fiemap_fill_next_extent(logic=0 phys=X len=16k)
   |
   |- get_extent_skip_holes(start=0 len=64k)
   |  |- btrfs_get_extent_fiemap(start=0 len=64k)
   |     |- btrfs_get_extent(start=16k len=48k)
   |        |  Found on-disk (ino, EXTENT_DATA, 16k)
   |        |- add_extent_mapping()
   |        |  |- try_merge_map()
   |        |     Merge with previous em start=0 len=16k
   |        |     resulting em start=0 len=32k
   |        |- Return (em->start=0, len=32K)    << Merged result
   |- Stripe off the unrelated range (0~16K) of return em
   |- fiemap_fill_next_extent(logic=16K phys=X+16K len=16K)
      ^^^ Causing split fiemap extent.
And since in add_extent_mapping(), em is already merged, in next
fiemap() call, we will get merged result.
[FIX]
Here we introduce a new structure, fiemap_cache, which records previous
fiemap extent.
And will always try to merge current fiemap_cache result before calling
fiemap_fill_next_extent().
Only when we failed to merge current fiemap extent with cached one, we
will call fiemap_fill_next_extent() to submit cached one.
So by this method, we can merge all fiemap extents.
It can also be done in fs/ioctl.c, however the problem is if
fieinfo->fi_extents_max == 0, we have no space to cache previous fiemap
extent.
So I choose to merge it in btrfs.
Signed-off-by: Qu Wenruo <quwenruo@cn.fujitsu.com>
Reviewed-by: Liu Bo <bo.li.liu@oracle.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
			
			
This commit is contained in:
		
							parent
							
								
									9bcaaea741
								
							
						
					
					
						commit
						4751832da9
					
				| @ -4377,6 +4377,123 @@ static struct extent_map *get_extent_skip_holes(struct inode *inode, | |||||||
| 	return NULL; | 	return NULL; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /*
 | ||||||
|  |  * To cache previous fiemap extent | ||||||
|  |  * | ||||||
|  |  * Will be used for merging fiemap extent | ||||||
|  |  */ | ||||||
|  | struct fiemap_cache { | ||||||
|  | 	u64 offset; | ||||||
|  | 	u64 phys; | ||||||
|  | 	u64 len; | ||||||
|  | 	u32 flags; | ||||||
|  | 	bool cached; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Helper to submit fiemap extent. | ||||||
|  |  * | ||||||
|  |  * Will try to merge current fiemap extent specified by @offset, @phys, | ||||||
|  |  * @len and @flags with cached one. | ||||||
|  |  * And only when we fails to merge, cached one will be submitted as | ||||||
|  |  * fiemap extent. | ||||||
|  |  * | ||||||
|  |  * Return value is the same as fiemap_fill_next_extent(). | ||||||
|  |  */ | ||||||
|  | static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, | ||||||
|  | 				struct fiemap_cache *cache, | ||||||
|  | 				u64 offset, u64 phys, u64 len, u32 flags) | ||||||
|  | { | ||||||
|  | 	int ret = 0; | ||||||
|  | 
 | ||||||
|  | 	if (!cache->cached) | ||||||
|  | 		goto assign; | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Sanity check, extent_fiemap() should have ensured that new | ||||||
|  | 	 * fiemap extent won't overlap with cahced one. | ||||||
|  | 	 * Not recoverable. | ||||||
|  | 	 * | ||||||
|  | 	 * NOTE: Physical address can overlap, due to compression | ||||||
|  | 	 */ | ||||||
|  | 	if (cache->offset + cache->len > offset) { | ||||||
|  | 		WARN_ON(1); | ||||||
|  | 		return -EINVAL; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/*
 | ||||||
|  | 	 * Only merges fiemap extents if | ||||||
|  | 	 * 1) Their logical addresses are continuous | ||||||
|  | 	 * | ||||||
|  | 	 * 2) Their physical addresses are continuous | ||||||
|  | 	 *    So truly compressed (physical size smaller than logical size) | ||||||
|  | 	 *    extents won't get merged with each other | ||||||
|  | 	 * | ||||||
|  | 	 * 3) Share same flags except FIEMAP_EXTENT_LAST | ||||||
|  | 	 *    So regular extent won't get merged with prealloc extent | ||||||
|  | 	 */ | ||||||
|  | 	if (cache->offset + cache->len  == offset && | ||||||
|  | 	    cache->phys + cache->len == phys  && | ||||||
|  | 	    (cache->flags & ~FIEMAP_EXTENT_LAST) == | ||||||
|  | 			(flags & ~FIEMAP_EXTENT_LAST)) { | ||||||
|  | 		cache->len += len; | ||||||
|  | 		cache->flags |= flags; | ||||||
|  | 		goto try_submit_last; | ||||||
|  | 	} | ||||||
|  | 
 | ||||||
|  | 	/* Not mergeable, need to submit cached one */ | ||||||
|  | 	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, | ||||||
|  | 				      cache->len, cache->flags); | ||||||
|  | 	cache->cached = false; | ||||||
|  | 	if (ret) | ||||||
|  | 		return ret; | ||||||
|  | assign: | ||||||
|  | 	cache->cached = true; | ||||||
|  | 	cache->offset = offset; | ||||||
|  | 	cache->phys = phys; | ||||||
|  | 	cache->len = len; | ||||||
|  | 	cache->flags = flags; | ||||||
|  | try_submit_last: | ||||||
|  | 	if (cache->flags & FIEMAP_EXTENT_LAST) { | ||||||
|  | 		ret = fiemap_fill_next_extent(fieinfo, cache->offset, | ||||||
|  | 				cache->phys, cache->len, cache->flags); | ||||||
|  | 		cache->cached = false; | ||||||
|  | 	} | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /*
 | ||||||
|  |  * Sanity check for fiemap cache | ||||||
|  |  * | ||||||
|  |  * All fiemap cache should be submitted by emit_fiemap_extent() | ||||||
|  |  * Iteration should be terminated either by last fiemap extent or | ||||||
|  |  * fieinfo->fi_extents_max. | ||||||
|  |  * So no cached fiemap should exist. | ||||||
|  |  */ | ||||||
|  | static int check_fiemap_cache(struct btrfs_fs_info *fs_info, | ||||||
|  | 			       struct fiemap_extent_info *fieinfo, | ||||||
|  | 			       struct fiemap_cache *cache) | ||||||
|  | { | ||||||
|  | 	int ret; | ||||||
|  | 
 | ||||||
|  | 	if (!cache->cached) | ||||||
|  | 		return 0; | ||||||
|  | 
 | ||||||
|  | 	/* Small and recoverbale problem, only to info developer */ | ||||||
|  | #ifdef CONFIG_BTRFS_DEBUG | ||||||
|  | 	WARN_ON(1); | ||||||
|  | #endif | ||||||
|  | 	btrfs_warn(fs_info, | ||||||
|  | 		   "unhandled fiemap cache detected: offset=%llu phys=%llu len=%llu flags=0x%x", | ||||||
|  | 		   cache->offset, cache->phys, cache->len, cache->flags); | ||||||
|  | 	ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, | ||||||
|  | 				      cache->len, cache->flags); | ||||||
|  | 	cache->cached = false; | ||||||
|  | 	if (ret > 0) | ||||||
|  | 		ret = 0; | ||||||
|  | 	return ret; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||||||
| 		__u64 start, __u64 len, get_extent_t *get_extent) | 		__u64 start, __u64 len, get_extent_t *get_extent) | ||||||
| { | { | ||||||
| @ -4394,6 +4511,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||||||
| 	struct extent_state *cached_state = NULL; | 	struct extent_state *cached_state = NULL; | ||||||
| 	struct btrfs_path *path; | 	struct btrfs_path *path; | ||||||
| 	struct btrfs_root *root = BTRFS_I(inode)->root; | 	struct btrfs_root *root = BTRFS_I(inode)->root; | ||||||
|  | 	struct fiemap_cache cache = { 0 }; | ||||||
| 	int end = 0; | 	int end = 0; | ||||||
| 	u64 em_start = 0; | 	u64 em_start = 0; | ||||||
| 	u64 em_len = 0; | 	u64 em_len = 0; | ||||||
| @ -4573,7 +4691,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||||||
| 			flags |= FIEMAP_EXTENT_LAST; | 			flags |= FIEMAP_EXTENT_LAST; | ||||||
| 			end = 1; | 			end = 1; | ||||||
| 		} | 		} | ||||||
| 		ret = fiemap_fill_next_extent(fieinfo, em_start, disko, | 		ret = emit_fiemap_extent(fieinfo, &cache, em_start, disko, | ||||||
| 					   em_len, flags); | 					   em_len, flags); | ||||||
| 		if (ret) { | 		if (ret) { | ||||||
| 			if (ret == 1) | 			if (ret == 1) | ||||||
| @ -4582,6 +4700,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| out_free: | out_free: | ||||||
|  | 	if (!ret) | ||||||
|  | 		ret = check_fiemap_cache(root->fs_info, fieinfo, &cache); | ||||||
| 	free_extent_map(em); | 	free_extent_map(em); | ||||||
| out: | out: | ||||||
| 	btrfs_free_path(path); | 	btrfs_free_path(path); | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user