Btrfs: add hole punching

This patch adds hole punching via fallocate.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
This commit is contained in:
Josef Bacik 2012-08-29 14:27:18 -04:00 committed by Chris Mason
parent 2671485d39
commit 2aaa665581
5 changed files with 355 additions and 13 deletions

View File

@ -3250,6 +3250,8 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct inode *dir, u64 objectid, struct inode *dir, u64 objectid,
const char *name, int name_len); const char *name, int name_len);
int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct inode *inode, u64 new_size, struct inode *inode, u64 new_size,
@ -3323,7 +3325,7 @@ extern const struct file_operations btrfs_file_operations;
int __btrfs_drop_extents(struct btrfs_trans_handle *trans, int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
struct btrfs_path *path, u64 start, u64 end, struct btrfs_path *path, u64 start, u64 end,
int drop_cache); u64 *drop_end, int drop_cache);
int btrfs_drop_extents(struct btrfs_trans_handle *trans, int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, u64 start, struct btrfs_root *root, struct inode *inode, u64 start,
u64 end, int drop_cache); u64 end, int drop_cache);

View File

@ -4132,6 +4132,8 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
void btrfs_free_block_rsv(struct btrfs_root *root, void btrfs_free_block_rsv(struct btrfs_root *root,
struct btrfs_block_rsv *rsv) struct btrfs_block_rsv *rsv)
{ {
if (!rsv)
return;
btrfs_block_rsv_release(root, rsv, (u64)-1); btrfs_block_rsv_release(root, rsv, (u64)-1);
kfree(rsv); kfree(rsv);
} }

View File

@ -39,6 +39,7 @@
#include "tree-log.h" #include "tree-log.h"
#include "locking.h" #include "locking.h"
#include "compat.h" #include "compat.h"
#include "volumes.h"
/* /*
* when auto defrag is enabled we * when auto defrag is enabled we
@ -584,7 +585,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int __btrfs_drop_extents(struct btrfs_trans_handle *trans, int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode, struct btrfs_root *root, struct inode *inode,
struct btrfs_path *path, u64 start, u64 end, struct btrfs_path *path, u64 start, u64 end,
int drop_cache) u64 *drop_end, int drop_cache)
{ {
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi; struct btrfs_file_extent_item *fi;
@ -822,6 +823,8 @@ next_slot:
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
} }
if (drop_end)
*drop_end = min(end, extent_end);
btrfs_release_path(path); btrfs_release_path(path);
return ret; return ret;
} }
@ -836,7 +839,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path(); path = btrfs_alloc_path();
if (!path) if (!path)
return -ENOMEM; return -ENOMEM;
ret = __btrfs_drop_extents(trans, root, inode, path, start, end, ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
drop_cache); drop_cache);
btrfs_free_path(path); btrfs_free_path(path);
return ret; return ret;
@ -1645,6 +1648,324 @@ static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma)
return 0; return 0;
} }
static int hole_mergeable(struct inode *inode, struct extent_buffer *leaf,
int slot, u64 start, u64 end)
{
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
if (slot < 0 || slot >= btrfs_header_nritems(leaf))
return 0;
btrfs_item_key_to_cpu(leaf, &key, slot);
if (key.objectid != btrfs_ino(inode) ||
key.type != BTRFS_EXTENT_DATA_KEY)
return 0;
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
return 0;
if (btrfs_file_extent_disk_bytenr(leaf, fi))
return 0;
if (key.offset == end)
return 1;
if (key.offset + btrfs_file_extent_num_bytes(leaf, fi) == start)
return 1;
return 0;
}
static int fill_holes(struct btrfs_trans_handle *trans, struct inode *inode,
struct btrfs_path *path, u64 offset, u64 end)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct extent_map *hole_em;
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct btrfs_key key;
int ret;
key.objectid = btrfs_ino(inode);
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = offset;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
return ret;
BUG_ON(!ret);
leaf = path->nodes[0];
if (hole_mergeable(inode, leaf, path->slots[0]-1, offset, end)) {
u64 num_bytes;
path->slots[0]--;
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
num_bytes = btrfs_file_extent_num_bytes(leaf, fi) +
end - offset;
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
if (hole_mergeable(inode, leaf, path->slots[0]+1, offset, end)) {
u64 num_bytes;
path->slots[0]++;
key.offset = offset;
btrfs_set_item_key_safe(trans, root, path, &key);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
num_bytes = btrfs_file_extent_num_bytes(leaf, fi) + end -
offset;
btrfs_set_file_extent_num_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
btrfs_set_file_extent_offset(leaf, fi, 0);
btrfs_mark_buffer_dirty(leaf);
goto out;
}
btrfs_release_path(path);
ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset,
0, 0, end - offset, 0, end - offset,
0, 0, 0);
if (ret)
return ret;
out:
btrfs_release_path(path);
hole_em = alloc_extent_map();
if (!hole_em) {
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
} else {
hole_em->start = offset;
hole_em->len = end - offset;
hole_em->orig_start = offset;
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->bdev = root->fs_info->fs_devices->latest_bdev;
hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
do {
btrfs_drop_extent_cache(inode, offset, end - 1, 0);
write_lock(&em_tree->lock);
ret = add_extent_mapping(em_tree, hole_em);
if (!ret)
list_move(&hole_em->list,
&em_tree->modified_extents);
write_unlock(&em_tree->lock);
} while (ret == -EEXIST);
free_extent_map(hole_em);
if (ret)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
&BTRFS_I(inode)->runtime_flags);
}
return 0;
}
static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
struct btrfs_block_rsv *rsv;
struct btrfs_trans_handle *trans;
u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
u64 lockstart = (offset + mask) & ~mask;
u64 lockend = ((offset + len) & ~mask) - 1;
u64 cur_offset = lockstart;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
u64 drop_end;
unsigned long nr;
int ret = 0;
int err = 0;
bool same_page = (offset >> PAGE_CACHE_SHIFT) ==
((offset + len) >> PAGE_CACHE_SHIFT);
btrfs_wait_ordered_range(inode, offset, len);
mutex_lock(&inode->i_mutex);
if (offset >= inode->i_size) {
mutex_unlock(&inode->i_mutex);
return 0;
}
/*
* Only do this if we are in the same page and we aren't doing the
* entire page.
*/
if (same_page && len < PAGE_CACHE_SIZE) {
ret = btrfs_truncate_page(inode, offset, len, 0);
mutex_unlock(&inode->i_mutex);
return ret;
}
/* zero back part of the first page */
ret = btrfs_truncate_page(inode, offset, 0, 0);
if (ret) {
mutex_unlock(&inode->i_mutex);
return ret;
}
/* zero the front end of the last page */
ret = btrfs_truncate_page(inode, offset + len, 0, 1);
if (ret) {
mutex_unlock(&inode->i_mutex);
return ret;
}
if (lockend < lockstart) {
mutex_unlock(&inode->i_mutex);
return 0;
}
while (1) {
struct btrfs_ordered_extent *ordered;
truncate_pagecache_range(inode, lockstart, lockend);
lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
0, &cached_state);
ordered = btrfs_lookup_first_ordered_extent(inode, lockend);
/*
* We need to make sure we have no ordered extents in this range
* and nobody raced in and read a page in this range, if we did
* we need to try again.
*/
if ((!ordered ||
(ordered->file_offset + ordered->len < lockstart ||
ordered->file_offset > lockend)) &&
!test_range_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_UPTODATE, 0,
cached_state)) {
if (ordered)
btrfs_put_ordered_extent(ordered);
break;
}
if (ordered)
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
lockend, &cached_state, GFP_NOFS);
btrfs_wait_ordered_range(inode, lockstart,
lockend - lockstart + 1);
}
path = btrfs_alloc_path();
if (!path) {
ret = -ENOMEM;
goto out;
}
rsv = btrfs_alloc_block_rsv(root);
if (!rsv) {
ret = -ENOMEM;
goto out_free;
}
rsv->size = btrfs_calc_trunc_metadata_size(root, 1);
rsv->failfast = 1;
/*
* 1 - update the inode
* 1 - removing the extents in the range
* 1 - adding the hole extent
*/
trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out_free;
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
min_size);
BUG_ON(ret);
trans->block_rsv = rsv;
while (cur_offset < lockend) {
ret = __btrfs_drop_extents(trans, root, inode, path,
cur_offset, lockend + 1,
&drop_end, 1);
if (ret != -ENOSPC)
break;
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
if (ret) {
err = ret;
break;
}
cur_offset = drop_end;
ret = btrfs_update_inode(trans, root, inode);
if (ret) {
err = ret;
break;
}
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
trans = NULL;
break;
}
ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv,
rsv, min_size);
BUG_ON(ret); /* shouldn't happen */
trans->block_rsv = rsv;
}
if (ret) {
err = ret;
goto out_trans;
}
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = fill_holes(trans, inode, path, cur_offset, drop_end);
if (ret) {
err = ret;
goto out_trans;
}
out_trans:
if (!trans)
goto out_free;
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
out_free:
btrfs_free_path(path);
btrfs_free_block_rsv(root, rsv);
out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
mutex_unlock(&inode->i_mutex);
if (ret && !err)
err = ret;
return err;
}
static long btrfs_fallocate(struct file *file, int mode, static long btrfs_fallocate(struct file *file, int mode,
loff_t offset, loff_t len) loff_t offset, loff_t len)
{ {
@ -1663,10 +1984,13 @@ static long btrfs_fallocate(struct file *file, int mode,
alloc_start = offset & ~mask; alloc_start = offset & ~mask;
alloc_end = (offset + len + mask) & ~mask; alloc_end = (offset + len + mask) & ~mask;
/* We only support the FALLOC_FL_KEEP_SIZE mode */ /* Make sure we aren't being give some crap mode */
if (mode & ~FALLOC_FL_KEEP_SIZE) if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (mode & FALLOC_FL_PUNCH_HOLE)
return btrfs_punch_hole(inode, offset, len);
/* /*
* Make sure we have enough space before we do the * Make sure we have enough space before we do the
* allocation. * allocation.

View File

@ -3475,12 +3475,20 @@ error:
} }
/* /*
* taken from block_truncate_page, but does cow as it zeros out * btrfs_truncate_page - read, zero a chunk and write a page
* any bytes left in the last page in the file. * @inode - inode that we're zeroing
* @from - the offset to start zeroing
* @len - the length to zero, 0 to zero the entire range respective to the
* offset
* @front - zero up to the offset instead of from the offset on
*
* This will find the page for the "from" offset and cow the page and zero the
* part we want to zero. This is used with truncate and hole punching.
*/ */
static int btrfs_truncate_page(struct address_space *mapping, loff_t from) int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
int front)
{ {
struct inode *inode = mapping->host; struct address_space *mapping = inode->i_mapping;
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_ordered_extent *ordered; struct btrfs_ordered_extent *ordered;
@ -3495,7 +3503,8 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
u64 page_start; u64 page_start;
u64 page_end; u64 page_end;
if ((offset & (blocksize - 1)) == 0) if ((offset & (blocksize - 1)) == 0 &&
(!len || ((len & (blocksize - 1)) == 0)))
goto out; goto out;
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (ret) if (ret)
@ -3555,8 +3564,13 @@ again:
ret = 0; ret = 0;
if (offset != PAGE_CACHE_SIZE) { if (offset != PAGE_CACHE_SIZE) {
if (!len)
len = PAGE_CACHE_SIZE - offset;
kaddr = kmap(page); kaddr = kmap(page);
memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset); if (front)
memset(kaddr, 0, offset);
else
memset(kaddr + offset, 0, len);
flush_dcache_page(page); flush_dcache_page(page);
kunmap(page); kunmap(page);
} }
@ -6796,7 +6810,7 @@ static int btrfs_truncate(struct inode *inode)
u64 mask = root->sectorsize - 1; u64 mask = root->sectorsize - 1;
u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
if (ret) if (ret)
return ret; return ret;

View File

@ -2842,7 +2842,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
if (BTRFS_I(inode)->logged_trans == trans->transid) { if (BTRFS_I(inode)->logged_trans == trans->transid) {
ret = __btrfs_drop_extents(trans, log, inode, dst_path, start, ret = __btrfs_drop_extents(trans, log, inode, dst_path, start,
start + len, 0); start + len, NULL, 0);
if (ret) if (ret)
return ret; return ret;
} }