Btrfs: more allocator enhancements

Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
Chris Mason 2007-05-06 10:15:01 -04:00 committed by David Woodhouse
parent be08c1b9f8
commit be74417553
5 changed files with 249 additions and 85 deletions

View File

@ -26,8 +26,10 @@ int set_radix_bit(struct radix_tree_root *radix, unsigned long bit)
if (ret)
return ret;
}
set_bit(bit_slot, bits + 1);
return 0;
ret = test_and_set_bit(bit_slot, bits + 1);
if (ret < 0)
ret = 1;
return ret;
}
int test_radix_bit(struct radix_tree_root *radix, unsigned long bit)

View File

@ -257,6 +257,8 @@ struct btrfs_block_group_cache {
struct btrfs_block_group_item item;
u64 first_free;
u64 last_alloc;
u64 pinned;
int data;
};
struct crypto_hash;
@ -264,12 +266,12 @@ struct btrfs_fs_info {
struct btrfs_root *extent_root;
struct btrfs_root *tree_root;
struct btrfs_root *dev_root;
struct btrfs_block_group_cache *block_group_cache;
struct radix_tree_root fs_roots_radix;
struct radix_tree_root pending_del_radix;
struct radix_tree_root pinned_radix;
struct radix_tree_root dev_radix;
struct radix_tree_root block_group_radix;
struct radix_tree_root block_group_data_radix;
u64 extent_tree_insert[BTRFS_MAX_LEVEL * 3];
int extent_tree_insert_nr;
@ -1072,7 +1074,8 @@ static inline void btrfs_mark_buffer_dirty(struct buffer_head *bh)
/* extent-tree.c */
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache
*hint, int data);
*hint, u64 search_start,
int data);
int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,

View File

@ -554,6 +554,7 @@ struct btrfs_root *open_ctree(struct super_block *sb)
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->dev_radix, GFP_NOFS);
INIT_RADIX_TREE(&fs_info->block_group_radix, GFP_KERNEL);
INIT_RADIX_TREE(&fs_info->block_group_data_radix, GFP_KERNEL);
INIT_LIST_HEAD(&fs_info->trans_list);
sb_set_blocksize(sb, 4096);
fs_info->running_transaction = NULL;
@ -582,7 +583,6 @@ struct btrfs_root *open_ctree(struct super_block *sb)
}
mutex_init(&fs_info->trans_mutex);
mutex_init(&fs_info->fs_mutex);
fs_info->block_group_cache = NULL;
__setup_root(sb->s_blocksize, dev_root,
fs_info, BTRFS_DEV_TREE_OBJECTID);

View File

@ -12,36 +12,88 @@ static int finish_current_insert(struct btrfs_trans_handle *trans, struct
static int del_pending_extents(struct btrfs_trans_handle *trans, struct
btrfs_root *extent_root);
static struct btrfs_block_group_cache *lookup_block_group(struct
btrfs_fs_info *info,
u64 blocknr)
{
struct btrfs_block_group_cache *block_group;
int ret;
ret = radix_tree_gang_lookup(&info->block_group_radix,
(void **)&block_group,
blocknr, 1);
if (ret) {
if (block_group->key.objectid <= blocknr && blocknr <
block_group->key.objectid + block_group->key.offset)
return block_group;
}
ret = radix_tree_gang_lookup(&info->block_group_data_radix,
(void **)&block_group,
blocknr, 1);
if (ret) {
if (block_group->key.objectid <= blocknr && blocknr <
block_group->key.objectid + block_group->key.offset)
return block_group;
}
printk("lookup_block_group fails for blocknr %Lu\n", blocknr);
return NULL;
}
struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
struct btrfs_block_group_cache
*hint, int data)
*hint, u64 search_start,
int data)
{
struct btrfs_block_group_cache *cache[8];
struct btrfs_block_group_cache *found_group = NULL;
struct btrfs_fs_info *info = root->fs_info;
struct radix_tree_root *radix;
u64 used;
u64 last = 0;
u64 hint_last;
int i;
int ret;
int full_search = 0;
if (!data && hint) {
if (data)
radix = &info->block_group_data_radix;
else
radix = &info->block_group_radix;
if (search_start) {
struct btrfs_block_group_cache *shint;
shint = lookup_block_group(info, search_start);
if (shint->data == data) {
used = btrfs_block_group_used(&shint->item);
if (used + shint->pinned <
(shint->key.offset * 8) / 10) {
return shint;
}
}
}
if (hint && hint->data == data) {
used = btrfs_block_group_used(&hint->item);
if (used < (hint->key.offset * 2) / 3) {
if (used + hint->pinned < (hint->key.offset * 8) / 10) {
return hint;
}
radix_tree_tag_clear(&info->block_group_radix,
hint->key.objectid + hint->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
last = hint->key.objectid + hint->key.offset;
if (used >= (hint->key.offset * 8) / 10) {
radix_tree_tag_clear(radix,
hint->key.objectid +
hint->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
last = hint->key.offset * 2;
if (hint->key.objectid >= last)
last = max(search_start, hint->key.objectid - last);
else
last = hint->key.objectid + hint->key.offset;
hint_last = last;
} else {
hint_last = 0;
last = 0;
hint_last = search_start;
last = search_start;
}
while(1) {
ret = radix_tree_gang_lookup_tag(&info->block_group_radix,
(void **)cache,
ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
last, ARRAY_SIZE(cache),
BTRFS_BLOCK_GROUP_AVAIL);
if (!ret)
@ -49,65 +101,54 @@ struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
for (i = 0; i < ret; i++) {
last = cache[i]->key.objectid +
cache[i]->key.offset;
if (!full_search && !data &&
(cache[i]->key.objectid & cache[i]->key.offset))
continue;
if (!full_search && data &&
(cache[i]->key.objectid & cache[i]->key.offset) == 0)
continue;
used = btrfs_block_group_used(&cache[i]->item);
if (used < (cache[i]->key.offset * 2) / 3) {
info->block_group_cache = cache[i];
if (used + cache[i]->pinned <
(cache[i]->key.offset * 8) / 10) {
found_group = cache[i];
goto found;
}
radix_tree_tag_clear(&info->block_group_radix,
cache[i]->key.objectid +
cache[i]->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
if (used >= (cache[i]->key.offset * 8) / 10) {
radix_tree_tag_clear(radix,
cache[i]->key.objectid +
cache[i]->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
}
}
last = hint_last;
again:
while(1) {
ret = radix_tree_gang_lookup(&info->block_group_radix,
(void **)cache,
last, ARRAY_SIZE(cache));
ret = radix_tree_gang_lookup(radix, (void **)cache,
last, ARRAY_SIZE(cache));
if (!ret)
break;
for (i = 0; i < ret; i++) {
last = cache[i]->key.objectid +
cache[i]->key.offset;
if (!full_search && !data &&
(cache[i]->key.objectid & cache[i]->key.offset))
continue;
if (!full_search && data &&
(cache[i]->key.objectid & cache[i]->key.offset) == 0)
continue;
used = btrfs_block_group_used(&cache[i]->item);
if (used < cache[i]->key.offset) {
info->block_group_cache = cache[i];
if (used + cache[i]->pinned < cache[i]->key.offset) {
found_group = cache[i];
goto found;
}
radix_tree_tag_clear(&info->block_group_radix,
cache[i]->key.objectid +
cache[i]->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
if (used >= cache[i]->key.offset) {
radix_tree_tag_clear(radix,
cache[i]->key.objectid +
cache[i]->key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
}
}
info->block_group_cache = NULL;
if (!full_search) {
last = 0;
last = search_start;
full_search = 1;
goto again;
}
found:
if (!found_group) {
ret = radix_tree_gang_lookup(&info->block_group_radix,
ret = radix_tree_gang_lookup(radix,
(void **)&found_group, 0, 1);
BUG_ON(ret != 1);
}
found:
return found_group;
}
@ -252,18 +293,20 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans,
return ret;
if (pending_ret)
return pending_ret;
if (cache->data)
cache->last_alloc = cache->first_free;
return 0;
}
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct radix_tree_root *radix)
{
struct btrfs_block_group_cache *cache[8];
int ret;
int err = 0;
int werr = 0;
struct radix_tree_root *radix = &root->fs_info->block_group_radix;
int i;
struct btrfs_path *path;
@ -285,35 +328,74 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
path, cache[i]);
if (err)
werr = err;
cache[i]->last_alloc = cache[i]->first_free;
}
}
btrfs_free_path(path);
return werr;
}
int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
int ret;
int ret2;
ret = write_dirty_block_radix(trans, root,
&root->fs_info->block_group_radix);
ret2 = write_dirty_block_radix(trans, root,
&root->fs_info->block_group_data_radix);
if (ret)
return ret;
if (ret2)
return ret2;
return 0;
}
static int update_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 blocknr, u64 num, int alloc)
{
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *info = root->fs_info;
struct radix_tree_root *radix;
u64 total = num;
u64 old_val;
u64 block_in_group;
int ret;
if (num != 1)
radix = &info->block_group_data_radix;
else
radix = &info->block_group_radix;
while(total) {
ret = radix_tree_gang_lookup(&info->block_group_radix,
(void **)&cache, blocknr, 1);
ret = radix_tree_gang_lookup(radix, (void **)&cache,
blocknr, 1);
if (!ret) {
printk(KERN_CRIT "blocknr %Lu lookup failed\n",
blocknr);
return -1;
}
block_in_group = blocknr - cache->key.objectid;
if (block_in_group > cache->key.offset || cache->key.objectid >
blocknr) {
if (radix == &info->block_group_data_radix)
radix = &info->block_group_radix;
else
radix = &info->block_group_data_radix;
ret = radix_tree_gang_lookup(radix, (void **)&cache,
blocknr, 1);
if (!ret) {
printk(KERN_CRIT "blocknr %Lu lookup failed\n",
blocknr);
return -1;
}
block_in_group = blocknr - cache->key.objectid;
if (block_in_group > cache->key.offset ||
cache->key.objectid > blocknr) {
BUG();
}
}
WARN_ON(block_in_group > cache->key.offset);
radix_tree_tag_set(&info->block_group_radix,
cache->key.objectid + cache->key.offset - 1,
radix_tree_tag_set(radix, cache->key.objectid +
cache->key.offset - 1,
BTRFS_BLOCK_GROUP_DIRTY);
old_val = btrfs_block_group_used(&cache->item);
@ -346,6 +428,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
{
unsigned long gang[8];
struct inode *btree_inode = root->fs_info->btree_inode;
struct btrfs_block_group_cache *block_group;
u64 first = 0;
int ret;
int i;
@ -360,6 +443,14 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
first = gang[0];
for (i = 0; i < ret; i++) {
clear_radix_bit(pinned_radix, gang[i]);
block_group = lookup_block_group(root->fs_info,
gang[i]);
if (block_group) {
WARN_ON(block_group->pinned == 0);
block_group->pinned--;
if (gang[i] < block_group->last_alloc)
block_group->last_alloc = gang[i];
}
try_remove_page(btree_inode->i_mapping,
gang[i] << (PAGE_CACHE_SHIFT -
btree_inode->i_blkbits));
@ -420,10 +511,16 @@ static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
btrfs_block_release(root, bh);
}
err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
if (!err) {
struct btrfs_block_group_cache *cache;
cache = lookup_block_group(root->fs_info, blocknr);
if (cache)
cache->pinned++;
}
} else {
err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
}
BUG_ON(err);
BUG_ON(err < 0);
return 0;
}
@ -502,6 +599,7 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
int i;
struct radix_tree_root *pending_radix;
struct radix_tree_root *pinned_radix;
struct btrfs_block_group_cache *cache;
pending_radix = &extent_root->fs_info->pending_del_radix;
pinned_radix = &extent_root->fs_info->pinned_radix;
@ -513,7 +611,17 @@ static int del_pending_extents(struct btrfs_trans_handle *trans, struct
break;
for (i = 0; i < ret; i++) {
wret = set_radix_bit(pinned_radix, gang[i]);
BUG_ON(wret);
if (wret == 0) {
cache = lookup_block_group(extent_root->fs_info,
gang[i]);
if (cache)
cache->pinned++;
}
if (wret < 0) {
printk(KERN_CRIT "set_radix_bit, err %d\n",
wret);
BUG_ON(wret < 0);
}
wret = clear_radix_bit(pending_radix, gang[i]);
BUG_ON(wret);
wret = __free_extent(trans, extent_root,
@ -563,6 +671,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
int slot = 0;
u64 last_block = 0;
u64 test_block;
u64 orig_search_start = search_start;
int start_found;
struct btrfs_leaf *l;
struct btrfs_root * root = orig_root->fs_info->extent_root;
@ -572,6 +681,7 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
int fill_prealloc = 0;
int level;
struct btrfs_block_group_cache *block_group;
int full_scan = 0;
path = btrfs_alloc_path();
ins->flags = 0;
@ -583,10 +693,21 @@ static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
num_blocks = 1;
total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
}
block_group = btrfs_find_block_group(root, trans->block_group, data);
if (search_start) {
block_group = lookup_block_group(info, search_start);
block_group = btrfs_find_block_group(root, block_group,
search_start, data);
} else {
block_group = btrfs_find_block_group(root,
trans->block_group, 0,
data);
}
check_failed:
if (block_group->data != data)
WARN_ON(1);
if (block_group->last_alloc > search_start)
search_start = block_group->last_alloc;
check_failed:
btrfs_init_path(path);
ins->objectid = search_start;
ins->offset = 0;
@ -639,6 +760,13 @@ check_failed:
}
start_found = 1;
last_block = key.objectid + key.offset;
if (last_block >= block_group->key.objectid +
block_group->key.offset) {
btrfs_release_path(root, path);
search_start = block_group->key.objectid +
block_group->key.offset * 2;
goto new_group;
}
next:
path->slots[0]++;
}
@ -650,16 +778,17 @@ check_pending:
btrfs_release_path(root, path);
BUG_ON(ins->objectid < search_start);
if (ins->objectid >= btrfs_super_total_blocks(info->disk_super)) {
if (search_start == 0)
if (full_scan)
return -ENOSPC;
search_start = 0;
goto check_failed;
search_start = orig_search_start;
full_scan = 1;
goto new_group;
}
for (test_block = ins->objectid;
test_block < ins->objectid + num_blocks; test_block++) {
if (test_radix_bit(&info->pinned_radix, test_block)) {
search_start = test_block + 1;
goto check_failed;
goto new_group;
}
}
if (!fill_prealloc && info->extent_tree_insert_nr) {
@ -670,7 +799,7 @@ check_pending:
ins->objectid <= last) {
search_start = last + 1;
WARN_ON(1);
goto check_failed;
goto new_group;
}
}
if (!fill_prealloc && info->extent_tree_prealloc_nr) {
@ -680,7 +809,7 @@ check_pending:
ins->objectid <= info->extent_tree_prealloc[0]) {
search_start = info->extent_tree_prealloc[0] + 1;
WARN_ON(1);
goto check_failed;
goto new_group;
}
}
if (fill_prealloc) {
@ -696,14 +825,12 @@ check_pending:
}
if (total_found < total_needed) {
search_start = test_block;
goto check_failed;
goto new_group;
}
info->extent_tree_prealloc_nr = total_found;
}
ret = radix_tree_gang_lookup(&info->block_group_radix,
(void **)&block_group,
ins->objectid, 1);
if (ret) {
block_group = lookup_block_group(info, ins->objectid);
if (block_group) {
block_group->last_alloc = ins->objectid;
if (!data)
trans->block_group = block_group;
@ -711,6 +838,18 @@ check_pending:
ins->offset = num_blocks;
btrfs_free_path(path);
return 0;
new_group:
if (search_start >= btrfs_super_total_blocks(info->disk_super)) {
search_start = orig_search_start;
full_scan = 1;
}
block_group = lookup_block_group(info, search_start);
if (!full_scan)
block_group = btrfs_find_block_group(root, block_group,
search_start, data);
goto check_failed;
error:
btrfs_release_path(root, path);
btrfs_free_path(path);
@ -794,7 +933,7 @@ struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
struct buffer_head *buf;
ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1, 0, (unsigned long)-1, &ins, 0);
1, hint, (unsigned long)-1, &ins, 0);
if (ret) {
BUG();
return NULL;
@ -984,21 +1123,19 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
return ret;
}
int btrfs_free_block_groups(struct btrfs_fs_info *info)
static int free_block_group_radix(struct radix_tree_root *radix)
{
int ret;
struct btrfs_block_group_cache *cache[8];
int i;
while(1) {
ret = radix_tree_gang_lookup(&info->block_group_radix,
(void **)cache, 0,
ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
ARRAY_SIZE(cache));
if (!ret)
break;
for (i = 0; i < ret; i++) {
radix_tree_delete(&info->block_group_radix,
cache[i]->key.objectid +
radix_tree_delete(radix, cache[i]->key.objectid +
cache[i]->key.offset - 1);
kfree(cache[i]);
}
@ -1006,6 +1143,20 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
return 0;
}
int btrfs_free_block_groups(struct btrfs_fs_info *info)
{
int ret;
int ret2;
ret = free_block_group_radix(&info->block_group_radix);
ret2 = free_block_group_radix(&info->block_group_data_radix);
if (ret)
return ret;
if (ret2)
return ret2;
return 0;
}
int btrfs_read_block_groups(struct btrfs_root *root)
{
struct btrfs_path *path;
@ -1013,13 +1164,16 @@ int btrfs_read_block_groups(struct btrfs_root *root)
int err = 0;
struct btrfs_block_group_item *bi;
struct btrfs_block_group_cache *cache;
struct btrfs_fs_info *info = root->fs_info;
struct radix_tree_root *radix;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_leaf *leaf;
u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
u64 used;
u64 nr = 0;
root = root->fs_info->extent_root;
root = info->extent_root;
key.objectid = 0;
key.offset = group_size_blocks;
key.flags = 0;
@ -1030,7 +1184,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
return -ENOMEM;
while(1) {
ret = btrfs_search_slot(NULL, root->fs_info->extent_root,
ret = btrfs_search_slot(NULL, info->extent_root,
&key, path, 0, 0);
if (ret != 0) {
err = ret;
@ -1050,23 +1204,28 @@ int btrfs_read_block_groups(struct btrfs_root *root)
memcpy(&cache->key, &found_key, sizeof(found_key));
cache->last_alloc = cache->key.objectid;
cache->first_free = cache->key.objectid;
cache->pinned = 0;
cache->data = (nr & 1);
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(root, path);
ret = radix_tree_insert(&root->fs_info->block_group_radix,
found_key.objectid +
if (nr & 1)
radix = &info->block_group_data_radix;
else
radix = &info->block_group_radix;
ret = radix_tree_insert(radix, found_key.objectid +
found_key.offset - 1,
(void *)cache);
BUG_ON(ret);
used = btrfs_block_group_used(bi);
if (used < (key.offset * 2) / 3) {
radix_tree_tag_set(&root->fs_info->block_group_radix,
found_key.objectid +
if (used < (key.offset * 8) / 10) {
radix_tree_tag_set(radix, found_key.objectid +
found_key.offset - 1,
BTRFS_BLOCK_GROUP_AVAIL);
}
if (key.objectid >=
btrfs_super_total_blocks(root->fs_info->disk_super))
btrfs_super_total_blocks(info->disk_super))
break;
nr++;
}
btrfs_free_path(path);

View File

@ -811,7 +811,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
return ERR_PTR(-ENOMEM);
BTRFS_I(inode)->root = root;
group = btrfs_find_block_group(root, group, 0);
group = btrfs_find_block_group(root, group, 0, 0);
BTRFS_I(inode)->block_group = group;
inode->i_uid = current->fsuid;