Btrfs: Add balance ioctl to restripe the chunks
Signed-off-by: Chris Mason <chris.mason@oracle.com>
This commit is contained in:
parent
788f20eb5a
commit
ec44a35cbe
@ -1364,7 +1364,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
|
||||
u64 root_objectid, u64 ref_generation,
|
||||
u64 owner, u64 owner_offset,
|
||||
u64 empty_size, u64 hint_byte,
|
||||
u64 search_end, struct btrfs_key *ins, int data);
|
||||
u64 search_end, struct btrfs_key *ins, u64 data);
|
||||
int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
struct extent_buffer *buf);
|
||||
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
|
||||
|
@ -17,6 +17,7 @@
|
||||
*/
|
||||
#include <linux/sched.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/writeback.h>
|
||||
#include "hash.h"
|
||||
#include "crc32c.h"
|
||||
#include "ctree.h"
|
||||
@ -1058,6 +1059,26 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
||||
}
|
||||
}
|
||||
|
||||
static u64 reduce_alloc_profile(u64 flags)
|
||||
{
|
||||
if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
|
||||
(flags & (BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10)))
|
||||
flags &= ~BTRFS_BLOCK_GROUP_DUP;
|
||||
|
||||
if ((flags & BTRFS_BLOCK_GROUP_RAID1) &&
|
||||
(flags & BTRFS_BLOCK_GROUP_RAID10))
|
||||
flags &= ~BTRFS_BLOCK_GROUP_RAID1;
|
||||
|
||||
if ((flags & BTRFS_BLOCK_GROUP_RAID0) &&
|
||||
((flags & BTRFS_BLOCK_GROUP_RAID1) |
|
||||
(flags & BTRFS_BLOCK_GROUP_RAID10) |
|
||||
(flags & BTRFS_BLOCK_GROUP_DUP)))
|
||||
flags &= ~BTRFS_BLOCK_GROUP_RAID0;
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
||||
static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *extent_root, u64 alloc_bytes,
|
||||
u64 flags)
|
||||
@ -1068,6 +1089,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
|
||||
u64 num_bytes;
|
||||
int ret;
|
||||
|
||||
flags = reduce_alloc_profile(flags);
|
||||
|
||||
space_info = __find_space_info(extent_root->fs_info, flags);
|
||||
if (!space_info) {
|
||||
ret = update_space_info(extent_root->fs_info, flags,
|
||||
@ -1684,6 +1707,7 @@ enospc:
|
||||
error:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* finds a free extent and does all the dirty work required for allocation
|
||||
* returns the key for the extent through ins, and a tree buffer for
|
||||
@ -1697,7 +1721,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
|
||||
u64 root_objectid, u64 ref_generation,
|
||||
u64 owner, u64 owner_offset,
|
||||
u64 empty_size, u64 hint_byte,
|
||||
u64 search_end, struct btrfs_key *ins, int data)
|
||||
u64 search_end, struct btrfs_key *ins, u64 data)
|
||||
{
|
||||
int ret;
|
||||
int pending_ret;
|
||||
@ -1727,6 +1751,7 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
|
||||
data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile;
|
||||
}
|
||||
again:
|
||||
data = reduce_alloc_profile(data);
|
||||
if (root->ref_cows) {
|
||||
if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
|
||||
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
|
||||
@ -1752,6 +1777,9 @@ again:
|
||||
num_bytes = max(num_bytes, min_alloc_size);
|
||||
goto again;
|
||||
}
|
||||
if (ret) {
|
||||
printk("allocation failed flags %Lu\n", data);
|
||||
}
|
||||
BUG_ON(ret);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -2274,8 +2302,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
|
||||
{
|
||||
u64 page_start;
|
||||
u64 page_end;
|
||||
u64 delalloc_start;
|
||||
u64 existing_delalloc;
|
||||
unsigned long last_index;
|
||||
unsigned long i;
|
||||
struct page *page;
|
||||
@ -2293,7 +2319,6 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
|
||||
ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages;
|
||||
|
||||
file_ra_state_init(ra, inode->i_mapping);
|
||||
kfree(ra);
|
||||
|
||||
for (; i <= last_index; i++) {
|
||||
if (total_read % ra_pages == 0) {
|
||||
@ -2313,26 +2338,30 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start,
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
|
||||
ClearPageDirty(page);
|
||||
#else
|
||||
cancel_dirty_page(page, PAGE_CACHE_SIZE);
|
||||
#endif
|
||||
wait_on_page_writeback(page);
|
||||
set_page_extent_mapped(page);
|
||||
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
|
||||
page_end = page_start + PAGE_CACHE_SIZE - 1;
|
||||
|
||||
lock_extent(io_tree, page_start, page_end, GFP_NOFS);
|
||||
|
||||
delalloc_start = page_start;
|
||||
existing_delalloc = count_range_bits(io_tree,
|
||||
&delalloc_start, page_end,
|
||||
PAGE_CACHE_SIZE, EXTENT_DELALLOC);
|
||||
|
||||
set_page_dirty(page);
|
||||
set_extent_delalloc(io_tree, page_start,
|
||||
page_end, GFP_NOFS);
|
||||
|
||||
unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
|
||||
set_page_dirty(page);
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
kfree(ra);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return 0;
|
||||
}
|
||||
@ -2397,8 +2426,6 @@ static int noinline relocate_one_reference(struct btrfs_root *extent_root,
|
||||
goto out;
|
||||
}
|
||||
relocate_inode_pages(inode, ref_offset, extent_key->offset);
|
||||
/* FIXME, data=ordered will help get rid of this */
|
||||
filemap_fdatawrite(inode->i_mapping);
|
||||
iput(inode);
|
||||
mutex_lock(&extent_root->fs_info->fs_mutex);
|
||||
} else {
|
||||
@ -2486,6 +2513,47 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
|
||||
{
|
||||
u64 num_devices;
|
||||
u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
|
||||
BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
|
||||
|
||||
num_devices = btrfs_super_num_devices(&root->fs_info->super_copy);
|
||||
if (num_devices == 1) {
|
||||
stripped |= BTRFS_BLOCK_GROUP_DUP;
|
||||
stripped = flags & ~stripped;
|
||||
|
||||
/* turn raid0 into single device chunks */
|
||||
if (flags & BTRFS_BLOCK_GROUP_RAID0)
|
||||
return stripped;
|
||||
|
||||
/* turn mirroring into duplication */
|
||||
if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
|
||||
BTRFS_BLOCK_GROUP_RAID10))
|
||||
return stripped | BTRFS_BLOCK_GROUP_DUP;
|
||||
return flags;
|
||||
} else {
|
||||
/* they already had raid on here, just return */
|
||||
if ((flags & BTRFS_BLOCK_GROUP_DUP) &&
|
||||
(flags & BTRFS_BLOCK_GROUP_RAID1)) {
|
||||
}
|
||||
if (flags & stripped)
|
||||
return flags;
|
||||
|
||||
stripped |= BTRFS_BLOCK_GROUP_DUP;
|
||||
stripped = flags & ~stripped;
|
||||
|
||||
/* switch duplicated blocks with raid1 */
|
||||
if (flags & BTRFS_BLOCK_GROUP_DUP)
|
||||
return stripped | BTRFS_BLOCK_GROUP_RAID1;
|
||||
|
||||
/* turn single device chunks into raid0 */
|
||||
return stripped | BTRFS_BLOCK_GROUP_RAID0;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
@ -2494,6 +2562,7 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
|
||||
u64 cur_byte;
|
||||
u64 total_found;
|
||||
u64 shrink_last_byte;
|
||||
u64 new_alloc_flags;
|
||||
struct btrfs_block_group_cache *shrink_block_group;
|
||||
struct btrfs_fs_info *info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
@ -2511,17 +2580,20 @@ int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 shrink_start)
|
||||
|
||||
shrink_block_group->space_info->total_bytes -=
|
||||
shrink_block_group->key.offset;
|
||||
printk("shrink_extent_tree %Lu -> %Lu type %Lu\n", shrink_start, shrink_last_byte, shrink_block_group->flags);
|
||||
path = btrfs_alloc_path();
|
||||
root = root->fs_info->extent_root;
|
||||
path->reada = 2;
|
||||
|
||||
again:
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
do_chunk_alloc(trans, root->fs_info->extent_root,
|
||||
if (btrfs_block_group_used(&shrink_block_group->item) > 0) {
|
||||
trans = btrfs_start_transaction(root, 1);
|
||||
new_alloc_flags = update_block_group_flags(root,
|
||||
shrink_block_group->flags);
|
||||
do_chunk_alloc(trans, root->fs_info->extent_root,
|
||||
btrfs_block_group_used(&shrink_block_group->item) +
|
||||
2 * 1024 * 1024, shrink_block_group->flags);
|
||||
btrfs_end_transaction(trans, root);
|
||||
2 * 1024 * 1024, new_alloc_flags);
|
||||
btrfs_end_transaction(trans, root);
|
||||
}
|
||||
shrink_block_group->ro = 1;
|
||||
|
||||
total_found = 0;
|
||||
|
@ -2864,6 +2864,15 @@ int btrfs_defrag_file(struct file *file) {
|
||||
goto out_unlock;
|
||||
}
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
|
||||
ClearPageDirty(page);
|
||||
#else
|
||||
cancel_dirty_page(page, PAGE_CACHE_SIZE);
|
||||
#endif
|
||||
wait_on_page_writeback(page);
|
||||
set_page_extent_mapped(page);
|
||||
|
||||
page_start = (u64)page->index << PAGE_CACHE_SHIFT;
|
||||
page_end = page_start + PAGE_CACHE_SIZE - 1;
|
||||
|
||||
@ -3105,6 +3114,8 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return btrfs_ioctl_resize(root, (void __user *)arg);
|
||||
case BTRFS_IOC_ADD_DEV:
|
||||
return btrfs_ioctl_add_dev(root, (void __user *)arg);
|
||||
case BTRFS_IOC_BALANCE:
|
||||
return btrfs_balance(root->fs_info->dev_root);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
@ -869,6 +869,107 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 div_factor(u64 num, int factor)
|
||||
{
|
||||
if (factor == 10)
|
||||
return num;
|
||||
num *= factor;
|
||||
do_div(num, 10);
|
||||
return num;
|
||||
}
|
||||
|
||||
|
||||
int btrfs_balance(struct btrfs_root *dev_root)
|
||||
{
|
||||
int ret;
|
||||
struct list_head *cur;
|
||||
struct list_head *devices = &dev_root->fs_info->fs_devices->devices;
|
||||
struct btrfs_device *device;
|
||||
u64 old_size;
|
||||
u64 size_to_free;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_chunk *chunk;
|
||||
struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_key found_key;
|
||||
|
||||
|
||||
dev_root = dev_root->fs_info->dev_root;
|
||||
|
||||
mutex_lock(&dev_root->fs_info->fs_mutex);
|
||||
/* step one make some room on all the devices */
|
||||
list_for_each(cur, devices) {
|
||||
device = list_entry(cur, struct btrfs_device, dev_list);
|
||||
old_size = device->total_bytes;
|
||||
size_to_free = div_factor(old_size, 1);
|
||||
size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
|
||||
if (device->total_bytes - device->bytes_used > size_to_free)
|
||||
continue;
|
||||
|
||||
ret = btrfs_shrink_device(device, old_size - size_to_free);
|
||||
BUG_ON(ret);
|
||||
|
||||
trans = btrfs_start_transaction(dev_root, 1);
|
||||
BUG_ON(!trans);
|
||||
|
||||
ret = btrfs_grow_device(trans, device, old_size);
|
||||
BUG_ON(ret);
|
||||
|
||||
btrfs_end_transaction(trans, dev_root);
|
||||
}
|
||||
|
||||
/* step two, relocate all the chunks */
|
||||
path = btrfs_alloc_path();
|
||||
BUG_ON(!path);
|
||||
|
||||
key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
||||
key.offset = (u64)-1;
|
||||
key.type = BTRFS_CHUNK_ITEM_KEY;
|
||||
|
||||
while(1) {
|
||||
ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
/*
|
||||
* this shouldn't happen, it means the last relocate
|
||||
* failed
|
||||
*/
|
||||
if (ret == 0)
|
||||
break;
|
||||
|
||||
ret = btrfs_previous_item(chunk_root, path, 0,
|
||||
BTRFS_CHUNK_ITEM_KEY);
|
||||
if (ret) {
|
||||
break;
|
||||
}
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
|
||||
path->slots[0]);
|
||||
if (found_key.objectid != key.objectid)
|
||||
break;
|
||||
chunk = btrfs_item_ptr(path->nodes[0],
|
||||
path->slots[0],
|
||||
struct btrfs_chunk);
|
||||
key.offset = found_key.offset;
|
||||
/* chunk zero is special */
|
||||
if (key.offset == 0)
|
||||
break;
|
||||
|
||||
ret = btrfs_relocate_chunk(chunk_root,
|
||||
chunk_root->root_key.objectid,
|
||||
found_key.objectid,
|
||||
found_key.offset);
|
||||
BUG_ON(ret);
|
||||
btrfs_release_path(chunk_root, path);
|
||||
}
|
||||
ret = 0;
|
||||
error:
|
||||
btrfs_free_path(path);
|
||||
mutex_unlock(&dev_root->fs_info->fs_mutex);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* shrinking a device means finding all of the device extents past
|
||||
* the new size, and then following the back refs to the chunks.
|
||||
@ -985,15 +1086,6 @@ int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static u64 div_factor(u64 num, int factor)
|
||||
{
|
||||
if (factor == 10)
|
||||
return num;
|
||||
num *= factor;
|
||||
do_div(num, 10);
|
||||
return num;
|
||||
}
|
||||
|
||||
static u64 chunk_bytes_by_type(u64 type, u64 calc_size, int num_stripes,
|
||||
int sub_stripes)
|
||||
{
|
||||
@ -1040,6 +1132,11 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
|
||||
int stripe_len = 64 * 1024;
|
||||
struct btrfs_key key;
|
||||
|
||||
if ((type & BTRFS_BLOCK_GROUP_RAID1) &&
|
||||
(type & BTRFS_BLOCK_GROUP_DUP)) {
|
||||
WARN_ON(1);
|
||||
type &= ~BTRFS_BLOCK_GROUP_DUP;
|
||||
}
|
||||
dev_list = &extent_root->fs_info->fs_devices->alloc_list;
|
||||
if (list_empty(dev_list))
|
||||
return -ENOSPC;
|
||||
|
@ -134,4 +134,5 @@ struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid,
|
||||
u8 *uuid);
|
||||
int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
|
||||
int btrfs_init_new_device(struct btrfs_root *root, char *path);
|
||||
int btrfs_balance(struct btrfs_root *dev_root);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user