Btrfs: Make truncate(2) more ENOSPC friendly

truncating and deleting regular files are unbound operations,
so it's not good to do them in a single transaction. This
patch makes btrfs_truncate and btrfs_delete_inode start a
new transaction after all items in a tree leaf are deleted.

Signed-off-by: Yan Zheng <zheng.yan@oracle.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Yan, Zheng 2009-11-12 09:35:36 +00:00 committed by Chris Mason
parent 5a303d5d4b
commit 8082510e71
2 changed files with 214 additions and 139 deletions

View File

@ -2848,37 +2848,40 @@ out:
* min_type is the minimum key type to truncate down to. If set to 0, this
* will kill all the items on this inode, including the INODE_ITEM_KEY.
*/
noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode,
u64 new_size, u32 min_type)
{
int ret;
struct btrfs_path *path;
struct btrfs_key key;
struct btrfs_key found_key;
u32 found_type = (u8)-1;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
struct btrfs_key found_key;
u64 extent_start = 0;
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
u64 mask = root->sectorsize - 1;
u32 found_type = (u8)-1;
int found_extent;
int del_item;
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
int encoding;
u64 mask = root->sectorsize - 1;
int ret;
int err = 0;
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
if (root->ref_cows)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
path = btrfs_alloc_path();
BUG_ON(!path);
path->reada = -1;
/* FIXME, add redo link to tree so we don't leak on crash */
key.objectid = inode->i_ino;
key.offset = (u64)-1;
key.type = (u8)-1;
@ -2886,17 +2889,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
search_again:
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret < 0)
goto error;
if (ret < 0) {
err = ret;
goto out;
}
if (ret > 0) {
/* there are no items in the tree for us to truncate, we're
* done
*/
if (path->slots[0] == 0) {
ret = 0;
goto error;
}
if (path->slots[0] == 0)
goto out;
path->slots[0]--;
}
@ -2931,28 +2934,17 @@ search_again:
}
item_end--;
}
if (item_end < new_size) {
if (found_type == BTRFS_DIR_ITEM_KEY)
found_type = BTRFS_INODE_ITEM_KEY;
else if (found_type == BTRFS_EXTENT_ITEM_KEY)
found_type = BTRFS_EXTENT_DATA_KEY;
else if (found_type == BTRFS_EXTENT_DATA_KEY)
found_type = BTRFS_XATTR_ITEM_KEY;
else if (found_type == BTRFS_XATTR_ITEM_KEY)
found_type = BTRFS_INODE_REF_KEY;
else if (found_type)
found_type--;
else
if (found_type > min_type) {
del_item = 1;
} else {
if (item_end < new_size)
break;
btrfs_set_key_type(&key, found_type);
goto next;
}
if (found_key.offset >= new_size)
del_item = 1;
else
del_item = 0;
}
found_extent = 0;
/* FIXME, shrink the extent if the ref count is only 1 */
if (found_type != BTRFS_EXTENT_DATA_KEY)
goto delete;
@ -3039,42 +3031,36 @@ delete:
inode->i_ino, extent_offset);
BUG_ON(ret);
}
next:
if (path->slots[0] == 0) {
if (pending_del_nr)
goto del_pending;
btrfs_release_path(root, path);
if (found_type == BTRFS_INODE_ITEM_KEY)
break;
goto search_again;
}
path->slots[0]--;
if (pending_del_nr &&
path->slots[0] + 1 != pending_del_slot) {
struct btrfs_key debug;
del_pending:
btrfs_item_key_to_cpu(path->nodes[0], &debug,
pending_del_slot);
if (path->slots[0] == 0 ||
path->slots[0] != pending_del_slot) {
if (root->ref_cows) {
err = -EAGAIN;
goto out;
}
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path,
pending_del_slot,
pending_del_nr);
BUG_ON(ret);
pending_del_nr = 0;
}
btrfs_release_path(root, path);
if (found_type == BTRFS_INODE_ITEM_KEY)
break;
goto search_again;
} else {
path->slots[0]--;
}
}
ret = 0;
error:
out:
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
}
btrfs_free_path(path);
return ret;
return err;
}
/*
@ -3194,10 +3180,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
if (size <= hole_start)
return 0;
err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (err)
return err;
while (1) {
struct btrfs_ordered_extent *ordered;
btrfs_wait_ordered_range(inode, hole_start,
@ -3210,9 +3192,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
btrfs_put_ordered_extent(ordered);
}
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
cur_offset = hole_start;
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@ -3220,38 +3199,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
BUG_ON(IS_ERR(em) || !em);
last_byte = min(extent_map_end(em), block_end);
last_byte = (last_byte + mask) & ~mask;
if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
u64 hint_byte = 0;
hole_size = last_byte - cur_offset;
err = btrfs_drop_extents(trans, inode, cur_offset,
cur_offset + hole_size,
&hint_byte, 1);
err = btrfs_reserve_metadata_space(root, 2);
if (err)
break;
err = btrfs_reserve_metadata_space(root, 1);
if (err)
break;
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
err = btrfs_drop_extents(trans, inode, cur_offset,
cur_offset + hole_size,
&hint_byte, 1);
BUG_ON(err);
err = btrfs_insert_file_extent(trans, root,
inode->i_ino, cur_offset, 0,
0, hole_size, 0, hole_size,
0, 0, 0);
BUG_ON(err);
btrfs_drop_extent_cache(inode, hole_start,
last_byte - 1, 0);
btrfs_unreserve_metadata_space(root, 1);
btrfs_end_transaction(trans, root);
btrfs_unreserve_metadata_space(root, 2);
}
free_extent_map(em);
cur_offset = last_byte;
if (err || cur_offset >= block_end)
if (cur_offset >= block_end)
break;
}
btrfs_end_transaction(trans, root);
unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
return err;
}
static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
unsigned long nr;
int ret;
if (attr->ia_size == inode->i_size)
return 0;
if (attr->ia_size > inode->i_size) {
unsigned long limit;
limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
if (attr->ia_size > inode->i_sb->s_maxbytes)
return -EFBIG;
if (limit != RLIM_INFINITY && attr->ia_size > limit) {
send_sig(SIGXFSZ, current, 0);
return -EFBIG;
}
}
ret = btrfs_reserve_metadata_space(root, 1);
if (ret)
return ret;
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_orphan_add(trans, inode);
BUG_ON(ret);
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_unreserve_metadata_space(root, 1);
btrfs_btree_balance_dirty(root, nr);
if (attr->ia_size > inode->i_size) {
ret = btrfs_cont_expand(inode, attr->ia_size);
if (ret) {
btrfs_truncate(inode);
return ret;
}
i_size_write(inode, attr->ia_size);
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
if (inode->i_nlink > 0) {
ret = btrfs_orphan_del(trans, inode);
BUG_ON(ret);
}
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
return 0;
}
/*
* We're truncating a file that used to have good data down to
* zero. Make sure it gets into the ordered flush list so that
* any new writes get down to disk quickly.
*/
if (attr->ia_size == 0)
BTRFS_I(inode)->ordered_data_close = 1;
/* we don't support swapfiles, so vmtruncate shouldn't fail */
ret = vmtruncate(inode, attr->ia_size);
BUG_ON(ret);
return 0;
}
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
@ -3262,22 +3323,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
if (attr->ia_size > inode->i_size) {
err = btrfs_cont_expand(inode, attr->ia_size);
err = btrfs_setattr_size(inode, attr);
if (err)
return err;
} else if (inode->i_size > 0 &&
attr->ia_size == 0) {
/* we're truncating a file that used to have good
* data down to zero. Make sure it gets into
* the ordered flush list so that any new writes
* get down to disk quickly.
*/
BTRFS_I(inode)->ordered_data_close = 1;
}
}
attr->ia_valid &= ~ATTR_SIZE;
if (attr->ia_valid)
err = inode_setattr(inode, attr);
if (!err && ((attr->ia_valid & ATTR_MODE)))
@ -3310,30 +3362,32 @@ void btrfs_delete_inode(struct inode *inode)
}
btrfs_i_size_write(inode, 0);
trans = btrfs_join_transaction(root, 1);
while (1) {
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
if (ret) {
btrfs_orphan_del(NULL, inode);
goto no_delete_lock;
}
ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
btrfs_orphan_del(trans, inode);
if (ret != -EAGAIN)
break;
nr = trans->blocks_used;
clear_inode(inode);
btrfs_end_transaction(trans, root);
trans = NULL;
btrfs_btree_balance_dirty(root, nr);
return;
}
if (ret == 0) {
ret = btrfs_orphan_del(trans, inode);
BUG_ON(ret);
}
no_delete_lock:
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
no_delete:
clear_inode(inode);
return;
}
/*
@ -5097,17 +5151,20 @@ static void btrfs_truncate(struct inode *inode)
unsigned long nr;
u64 mask = root->sectorsize - 1;
if (!S_ISREG(inode->i_mode))
return;
if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
if (!S_ISREG(inode->i_mode)) {
WARN_ON(1);
return;
}
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (ret)
return;
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
/*
* setattr is responsible for setting the ordered_data_close flag,
@ -5129,21 +5186,32 @@ static void btrfs_truncate(struct inode *inode)
if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
btrfs_add_ordered_operation(trans, root, inode);
btrfs_set_trans_block_group(trans, inode);
btrfs_i_size_write(inode, inode->i_size);
ret = btrfs_orphan_add(trans, inode);
if (ret)
goto out;
/* FIXME, add redo link to tree so we don't leak on crash */
ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
while (1) {
ret = btrfs_truncate_inode_items(trans, root, inode,
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
btrfs_update_inode(trans, root, inode);
if (ret != -EAGAIN)
break;
ret = btrfs_orphan_del(trans, inode);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
trans = btrfs_start_transaction(root, 1);
btrfs_set_trans_block_group(trans, inode);
}
if (ret == 0 && inode->i_nlink > 0) {
ret = btrfs_orphan_del(trans, inode);
BUG_ON(ret);
}
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
out:
nr = trans->blocks_used;
ret = btrfs_end_transaction_throttle(trans, root);
BUG_ON(ret);
@ -5240,9 +5308,9 @@ void btrfs_destroy_inode(struct inode *inode)
spin_lock(&root->list_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
" list\n", inode->i_ino);
dump_stack();
printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
inode->i_ino);
list_del_init(&BTRFS_I(inode)->i_orphan);
}
spin_unlock(&root->list_lock);

View File

@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
return 0;
}
static void put_inodes(struct list_head *list)
{
struct inodevec *ivec;
while (!list_empty(list)) {
ivec = list_entry(list->next, struct inodevec, list);
list_del(&ivec->list);
while (ivec->nr > 0) {
ivec->nr--;
iput(ivec->inode[ivec->nr]);
}
kfree(ivec);
}
}
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key)
@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
btrfs_btree_balance_dirty(root, nr);
/*
* put inodes outside transaction, otherwise we may deadlock.
*/
put_inodes(&inode_list);
if (replaced && rc->stage == UPDATE_DATA_PTRS)
invalidate_extent_cache(root, &key, &next_key);
}
@ -1752,19 +1771,7 @@ out:
btrfs_btree_balance_dirty(root, nr);
/*
* put inodes while we aren't holding the tree locks
*/
while (!list_empty(&inode_list)) {
struct inodevec *ivec;
ivec = list_entry(inode_list.next, struct inodevec, list);
list_del(&ivec->list);
while (ivec->nr > 0) {
ivec->nr--;
iput(ivec->inode[ivec->nr]);
}
kfree(ivec);
}
put_inodes(&inode_list);
if (replaced && rc->stage == UPDATE_DATA_PTRS)
invalidate_extent_cache(root, &key, &next_key);