Merge branch 'jeffm-discard-4.3' into for-linus-4.3

This commit is contained in:
Chris Mason 2015-08-09 07:35:33 -07:00
commit 46cd28555f
9 changed files with 343 additions and 59 deletions

View File

@ -3431,6 +3431,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 group_start,
struct extent_map *em);
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *cache);
void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data);
@ -4067,6 +4069,7 @@ __cold
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
const char *btrfs_decode_error(int errno);
__cold
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,

View File

@ -3761,6 +3761,15 @@ void close_ctree(struct btrfs_root *root)
cancel_work_sync(&fs_info->async_reclaim_work);
if (!(fs_info->sb->s_flags & MS_RDONLY)) {
/*
* If the cleaner thread is stopped and there are
* block groups queued for removal, the deletion will be
* skipped when we quit the cleaner thread.
*/
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_delete_unused_bgs(root->fs_info);
mutex_unlock(&root->fs_info->cleaner_mutex);
ret = btrfs_commit_super(root);
if (ret)
btrfs_err(fs_info, "commit super ret %d", ret);

View File

@ -1882,10 +1882,77 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
return ret;
}
static int btrfs_issue_discard(struct block_device *bdev,
u64 start, u64 len)
#define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
u64 *discarded_bytes)
{
return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
int j, ret = 0;
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << 9);
if (WARN_ON(start != aligned_start)) {
len -= aligned_start - start;
len = round_down(len, 1 << 9);
start = aligned_start;
}
*discarded_bytes = 0;
if (!len)
return 0;
end = start + len;
bytes_left = len;
/* Skip any superblocks on this device. */
for (j = 0; j < BTRFS_SUPER_MIRROR_MAX; j++) {
u64 sb_start = btrfs_sb_offset(j);
u64 sb_end = sb_start + BTRFS_SUPER_INFO_SIZE;
u64 size = sb_start - start;
if (!in_range(sb_start, start, bytes_left) &&
!in_range(sb_end, start, bytes_left) &&
!in_range(start, sb_start, BTRFS_SUPER_INFO_SIZE))
continue;
/*
* Superblock spans beginning of range. Adjust start and
* try again.
*/
if (sb_start <= start) {
start += sb_end - start;
if (start > end) {
bytes_left = 0;
break;
}
bytes_left = end - start;
continue;
}
if (size) {
ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
GFP_NOFS, 0);
if (!ret)
*discarded_bytes += size;
else if (ret != -EOPNOTSUPP)
return ret;
}
start = sb_end;
if (start > end) {
bytes_left = 0;
break;
}
bytes_left = end - start;
}
if (bytes_left) {
ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
GFP_NOFS, 0);
if (!ret)
*discarded_bytes += bytes_left;
}
return ret;
}
int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
@ -1906,14 +1973,16 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
for (i = 0; i < bbio->num_stripes; i++, stripe++) {
u64 bytes;
if (!stripe->dev->can_discard)
continue;
ret = btrfs_issue_discard(stripe->dev->bdev,
stripe->physical,
stripe->length);
stripe->length,
&bytes);
if (!ret)
discarded_bytes += stripe->length;
discarded_bytes += bytes;
else if (ret != -EOPNOTSUPP)
break; /* Logic errors or -ENOMEM, or -EIO but I don't know how that could happen JDM */
@ -6061,20 +6130,19 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *block_group, *tmp;
struct list_head *deleted_bgs;
struct extent_io_tree *unpin;
u64 start;
u64 end;
int ret;
if (trans->aborted)
return 0;
if (fs_info->pinned_extents == &fs_info->freed_extents[0])
unpin = &fs_info->freed_extents[1];
else
unpin = &fs_info->freed_extents[0];
while (1) {
while (!trans->aborted) {
mutex_lock(&fs_info->unused_bg_unpin_mutex);
ret = find_first_extent_bit(unpin, 0, &start, &end,
EXTENT_DIRTY, NULL);
@ -6093,6 +6161,34 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
cond_resched();
}
/*
* Transaction is finished. We don't need the lock anymore. We
* do need to clean up the block groups in case of a transaction
* abort.
*/
deleted_bgs = &trans->transaction->deleted_bgs;
list_for_each_entry_safe(block_group, tmp, deleted_bgs, bg_list) {
u64 trimmed = 0;
ret = -EROFS;
if (!trans->aborted)
ret = btrfs_discard_extent(root,
block_group->key.objectid,
block_group->key.offset,
&trimmed);
list_del_init(&block_group->bg_list);
btrfs_put_block_group_trimming(block_group);
btrfs_put_block_group(block_group);
if (ret) {
const char *errstr = btrfs_decode_error(ret);
btrfs_warn(fs_info,
"Discard failed while removing blockgroup: errno=%d %s\n",
ret, errstr);
}
}
return 0;
}
@ -9830,6 +9926,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
* currently running transaction might finish and a new one start,
* allowing for new block groups to be created that can reuse the same
* physical device locations unless we take this special care.
*
* There may also be an implicit trim operation if the file system
* is mounted with -odiscard. The same protections must remain
* in place until the extents have been discarded completely when
* the transaction commit has completed.
*/
remove_em = (atomic_read(&block_group->trimming) == 0);
/*
@ -9904,6 +10005,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_lock(&fs_info->unused_bgs_lock);
while (!list_empty(&fs_info->unused_bgs)) {
u64 start, end;
int trimming;
block_group = list_first_entry(&fs_info->unused_bgs,
struct btrfs_block_group_cache,
@ -10003,12 +10105,39 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info)
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
/* DISCARD can flip during remount */
trimming = btrfs_test_opt(root, DISCARD);
/* Implicit trim during transaction commit. */
if (trimming)
btrfs_get_block_group_trimming(block_group);
/*
* Btrfs_remove_chunk will abort the transaction if things go
* horribly wrong.
*/
ret = btrfs_remove_chunk(trans, root,
block_group->key.objectid);
if (ret) {
if (trimming)
btrfs_put_block_group_trimming(block_group);
goto end_trans;
}
/*
* If we're not mounted with -odiscard, we can just forget
* about this block group. Otherwise we'll need to wait
* until transaction commit to do the actual discard.
*/
if (trimming) {
WARN_ON(!list_empty(&block_group->bg_list));
spin_lock(&trans->transaction->deleted_bgs_lock);
list_move(&block_group->bg_list,
&trans->transaction->deleted_bgs);
spin_unlock(&trans->transaction->deleted_bgs_lock);
btrfs_get_block_group(block_group);
}
end_trans:
btrfs_end_transaction(trans, root);
next:
@ -10062,10 +10191,99 @@ int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
return unpin_extent_range(root, start, end, false);
}
/*
* It used to be that old block groups would be left around forever.
* Iterating over them would be enough to trim unused space. Since we
* now automatically remove them, we also need to iterate over unallocated
* space.
*
* We don't want a transaction for this since the discard may take a
* substantial amount of time. We don't require that a transaction be
* running, but we do need to take a running transaction into account
* to ensure that we're not discarding chunks that were released in
* the current transaction.
*
* Holding the chunks lock will prevent other threads from allocating
* or releasing chunks, but it won't prevent a running transaction
* from committing and releasing the memory that the pending chunks
* list head uses. For that, we need to take a reference to the
* transaction.
*/
static int btrfs_trim_free_extents(struct btrfs_device *device,
u64 minlen, u64 *trimmed)
{
u64 start = 0, len = 0;
int ret;
*trimmed = 0;
/* Not writeable = nothing to do. */
if (!device->writeable)
return 0;
/* No free space = nothing to do. */
if (device->total_bytes <= device->bytes_used)
return 0;
ret = 0;
while (1) {
struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
struct btrfs_transaction *trans;
u64 bytes;
ret = mutex_lock_interruptible(&fs_info->chunk_mutex);
if (ret)
return ret;
down_read(&fs_info->commit_root_sem);
spin_lock(&fs_info->trans_lock);
trans = fs_info->running_transaction;
if (trans)
atomic_inc(&trans->use_count);
spin_unlock(&fs_info->trans_lock);
ret = find_free_dev_extent_start(trans, device, minlen, start,
&start, &len);
if (trans)
btrfs_put_transaction(trans);
if (ret) {
up_read(&fs_info->commit_root_sem);
mutex_unlock(&fs_info->chunk_mutex);
if (ret == -ENOSPC)
ret = 0;
break;
}
ret = btrfs_issue_discard(device->bdev, start, len, &bytes);
up_read(&fs_info->commit_root_sem);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)
break;
start += len;
*trimmed += bytes;
if (fatal_signal_pending(current)) {
ret = -ERESTARTSYS;
break;
}
cond_resched();
}
return ret;
}
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache = NULL;
struct btrfs_device *device;
struct list_head *devices;
u64 group_trimmed;
u64 start;
u64 end;
@ -10120,6 +10338,18 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
cache = next_block_group(fs_info->tree_root, cache);
}
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
devices = &root->fs_info->fs_devices->alloc_list;
list_for_each_entry(device, devices, dev_alloc_list) {
ret = btrfs_trim_free_extents(device, range->minlen,
&group_trimmed);
if (ret)
break;
trimmed += group_trimmed;
}
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
range->len = trimmed;
return ret;
}

View File

@ -3272,35 +3272,23 @@ next:
return ret;
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
void btrfs_get_block_group_trimming(struct btrfs_block_group_cache *cache)
{
int ret;
atomic_inc(&cache->trimming);
}
*trimmed = 0;
void btrfs_put_block_group_trimming(struct btrfs_block_group_cache *block_group)
{
struct extent_map_tree *em_tree;
struct extent_map *em;
bool cleanup;
spin_lock(&block_group->lock);
if (block_group->removed) {
spin_unlock(&block_group->lock);
return 0;
}
atomic_inc(&block_group->trimming);
cleanup = (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
if (ret)
goto out;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
out:
spin_lock(&block_group->lock);
if (atomic_dec_and_test(&block_group->trimming) &&
block_group->removed) {
struct extent_map_tree *em_tree;
struct extent_map *em;
spin_unlock(&block_group->lock);
if (cleanup) {
lock_chunks(block_group->fs_info->chunk_root);
em_tree = &block_group->fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
@ -3324,10 +3312,31 @@ out:
* this block group have left 1 entry each one. Free them.
*/
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
} else {
spin_unlock(&block_group->lock);
}
}
int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
u64 *trimmed, u64 start, u64 end, u64 minlen)
{
int ret;
*trimmed = 0;
spin_lock(&block_group->lock);
if (block_group->removed) {
spin_unlock(&block_group->lock);
return 0;
}
btrfs_get_block_group_trimming(block_group);
spin_unlock(&block_group->lock);
ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
if (ret)
goto out;
ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
out:
btrfs_put_block_group_trimming(block_group);
return ret;
}

View File

@ -69,7 +69,7 @@ static struct file_system_type btrfs_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data);
static const char *btrfs_decode_error(int errno)
const char *btrfs_decode_error(int errno)
{
char *errstr = "unknown";
@ -1651,6 +1651,17 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
sb->s_flags |= MS_RDONLY;
/*
* Setting MS_RDONLY will put the cleaner thread to
* sleep at the next loop if it's already active.
* If it's already asleep, we'll leave unused block
* groups on disk until we're mounted read-write again
* unless we clean them up here.
*/
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_delete_unused_bgs(fs_info);
mutex_unlock(&root->fs_info->cleaner_mutex);
btrfs_dev_replace_suspend_for_unmount(fs_info);
btrfs_scrub_cancel(fs_info);
btrfs_pause_balance(fs_info);

View File

@ -258,6 +258,8 @@ loop:
mutex_init(&cur_trans->cache_write_mutex);
cur_trans->num_dirty_bgs = 0;
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->deleted_bgs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(&cur_trans->dirty_pages,
fs_info->btree_inode->i_mapping);

View File

@ -74,6 +74,8 @@ struct btrfs_transaction {
*/
struct mutex cache_write_mutex;
spinlock_t dirty_bgs_lock;
struct list_head deleted_bgs;
spinlock_t deleted_bgs_lock;
struct btrfs_delayed_ref_root delayed_refs;
int aborted;
int dirty_bg_run;

View File

@ -1116,15 +1116,18 @@ out:
return ret;
}
static int contains_pending_extent(struct btrfs_trans_handle *trans,
static int contains_pending_extent(struct btrfs_transaction *transaction,
struct btrfs_device *device,
u64 *start, u64 len)
{
struct btrfs_fs_info *fs_info = device->dev_root->fs_info;
struct extent_map *em;
struct list_head *search_list = &trans->transaction->pending_chunks;
struct list_head *search_list = &fs_info->pinned_chunks;
int ret = 0;
u64 physical_start = *start;
if (transaction)
search_list = &transaction->pending_chunks;
again:
list_for_each_entry(em, search_list, list) {
struct map_lookup *map;
@ -1159,8 +1162,8 @@ again:
}
}
}
if (search_list == &trans->transaction->pending_chunks) {
search_list = &trans->root->fs_info->pinned_chunks;
if (search_list != &fs_info->pinned_chunks) {
search_list = &fs_info->pinned_chunks;
goto again;
}
@ -1169,12 +1172,13 @@ again:
/*
* find_free_dev_extent - find free space in the specified device
* @device: the device which we search the free space in
* @num_bytes: the size of the free space that we need
* @start: store the start of the free space.
* @len: the size of the free space. that we find, or the size of the max
* free space if we don't find suitable free space
* find_free_dev_extent_start - find free space in the specified device
* @device: the device which we search the free space in
* @num_bytes: the size of the free space that we need
* @search_start: the position from which to begin the search
* @start: store the start of the free space.
* @len: the size of the free space. that we find, or the size
* of the max free space if we don't find suitable free space
*
* this uses a pretty simple search, the expectation is that it is
* called very infrequently and that a given device has a small number
@ -1188,9 +1192,9 @@ again:
* But if we don't find suitable free space, it is used to store the size of
* the max free space.
*/
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *len)
int find_free_dev_extent_start(struct btrfs_transaction *transaction,
struct btrfs_device *device, u64 num_bytes,
u64 search_start, u64 *start, u64 *len)
{
struct btrfs_key key;
struct btrfs_root *root = device->dev_root;
@ -1200,19 +1204,11 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
u64 max_hole_start;
u64 max_hole_size;
u64 extent_end;
u64 search_start;
u64 search_end = device->total_bytes;
int ret;
int slot;
struct extent_buffer *l;
/* FIXME use last free of some kind */
/* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@ -1273,7 +1269,7 @@ again:
* Have to check before we set max_hole_start, otherwise
* we could end up sending back this offset anyway.
*/
if (contains_pending_extent(trans, device,
if (contains_pending_extent(transaction, device,
&search_start,
hole_size)) {
if (key.offset >= search_start) {
@ -1322,7 +1318,7 @@ next:
if (search_end > search_start) {
hole_size = search_end - search_start;
if (contains_pending_extent(trans, device, &search_start,
if (contains_pending_extent(transaction, device, &search_start,
hole_size)) {
btrfs_release_path(path);
goto again;
@ -1348,6 +1344,24 @@ out:
return ret;
}
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *len)
{
struct btrfs_root *root = device->dev_root;
u64 search_start;
/* FIXME use last free of some kind */
/*
* we don't want to overwrite the superblock on the drive,
* so we make sure to start at an offset of at least 1MB
*/
search_start = max(root->fs_info->alloc_start, 1024ull * 1024);
return find_free_dev_extent_start(trans->transaction, device,
num_bytes, search_start, start, len);
}
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device,
u64 start, u64 *dev_extent_len)
@ -4196,7 +4210,8 @@ again:
u64 start = new_size;
u64 len = old_size - new_size;
if (contains_pending_extent(trans, device, &start, len)) {
if (contains_pending_extent(trans->transaction, device,
&start, len)) {
unlock_chunks(root);
checked_pending_chunks = true;
failed = 0;

View File

@ -453,6 +453,9 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
int find_free_dev_extent_start(struct btrfs_transaction *transaction,
struct btrfs_device *device, u64 num_bytes,
u64 search_start, u64 *start, u64 *max_avail);
int find_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 num_bytes,
u64 *start, u64 *max_avail);