Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "This has a few fixes from Filipe, along with a readdir fix from Dave that we've been testing for some time" * 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: btrfs: properly set the termination value of ctx->pos in readdir Btrfs: fix hang on extent buffer lock caused by the inode_paths ioctl Btrfs: remove no longer used function extent_read_full_page_nolock() Btrfs: fix page reading in extent_same ioctl leading to csum errors Btrfs: fix invalid page accesses in extent_same (dedup) ioctl
This commit is contained in:
commit
27c9d772e5
@ -1406,7 +1406,8 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||
read_extent_buffer(eb, dest + bytes_left,
|
||||
name_off, name_len);
|
||||
if (eb != eb_in) {
|
||||
btrfs_tree_read_unlock_blocking(eb);
|
||||
if (!path->skip_locking)
|
||||
btrfs_tree_read_unlock_blocking(eb);
|
||||
free_extent_buffer(eb);
|
||||
}
|
||||
ret = btrfs_find_item(fs_root, path, parent, 0,
|
||||
@ -1426,9 +1427,10 @@ char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
|
||||
eb = path->nodes[0];
|
||||
/* make sure we can use eb after releasing the path */
|
||||
if (eb != eb_in) {
|
||||
atomic_inc(&eb->refs);
|
||||
btrfs_tree_read_lock(eb);
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
if (!path->skip_locking)
|
||||
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
|
||||
path->nodes[0] = NULL;
|
||||
path->locks[0] = 0;
|
||||
}
|
||||
btrfs_release_path(path);
|
||||
iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
|
||||
|
@ -637,11 +637,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
|
||||
faili = nr_pages - 1;
|
||||
cb->nr_pages = nr_pages;
|
||||
|
||||
/* In the parent-locked case, we only locked the range we are
|
||||
* interested in. In all other cases, we can opportunistically
|
||||
* cache decompressed data that goes beyond the requested range. */
|
||||
if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
|
||||
add_ra_bio_pages(inode, em_start + em_len, cb);
|
||||
add_ra_bio_pages(inode, em_start + em_len, cb);
|
||||
|
||||
/* include any pages we added in add_ra-bio_pages */
|
||||
uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
|
||||
|
@ -1689,7 +1689,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
*
|
||||
*/
|
||||
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
struct list_head *ins_list)
|
||||
struct list_head *ins_list, bool *emitted)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_delayed_item *curr, *next;
|
||||
@ -1733,6 +1733,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
|
||||
if (over)
|
||||
return 1;
|
||||
*emitted = true;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list,
|
||||
int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
u64 index);
|
||||
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
struct list_head *ins_list);
|
||||
struct list_head *ins_list, bool *emitted);
|
||||
|
||||
/* for init */
|
||||
int __init btrfs_delayed_inode_init(void);
|
||||
|
@ -2897,12 +2897,11 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
struct block_device *bdev;
|
||||
int ret;
|
||||
int nr = 0;
|
||||
int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
|
||||
size_t pg_offset = 0;
|
||||
size_t iosize;
|
||||
size_t disk_io_size;
|
||||
size_t blocksize = inode->i_sb->s_blocksize;
|
||||
unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
|
||||
unsigned long this_bio_flag = 0;
|
||||
|
||||
set_page_extent_mapped(page);
|
||||
|
||||
@ -2942,18 +2941,16 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
kunmap_atomic(userpage);
|
||||
set_extent_uptodate(tree, cur, cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
if (!parent_locked)
|
||||
unlock_extent_cached(tree, cur,
|
||||
cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
unlock_extent_cached(tree, cur,
|
||||
cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
break;
|
||||
}
|
||||
em = __get_extent_map(inode, page, pg_offset, cur,
|
||||
end - cur + 1, get_extent, em_cached);
|
||||
if (IS_ERR_OR_NULL(em)) {
|
||||
SetPageError(page);
|
||||
if (!parent_locked)
|
||||
unlock_extent(tree, cur, end);
|
||||
unlock_extent(tree, cur, end);
|
||||
break;
|
||||
}
|
||||
extent_offset = cur - em->start;
|
||||
@ -3038,12 +3035,9 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
|
||||
set_extent_uptodate(tree, cur, cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
if (parent_locked)
|
||||
free_extent_state(cached);
|
||||
else
|
||||
unlock_extent_cached(tree, cur,
|
||||
cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
unlock_extent_cached(tree, cur,
|
||||
cur + iosize - 1,
|
||||
&cached, GFP_NOFS);
|
||||
cur = cur + iosize;
|
||||
pg_offset += iosize;
|
||||
continue;
|
||||
@ -3052,8 +3046,7 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
if (test_range_bit(tree, cur, cur_end,
|
||||
EXTENT_UPTODATE, 1, NULL)) {
|
||||
check_page_uptodate(tree, page);
|
||||
if (!parent_locked)
|
||||
unlock_extent(tree, cur, cur + iosize - 1);
|
||||
unlock_extent(tree, cur, cur + iosize - 1);
|
||||
cur = cur + iosize;
|
||||
pg_offset += iosize;
|
||||
continue;
|
||||
@ -3063,8 +3056,7 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
*/
|
||||
if (block_start == EXTENT_MAP_INLINE) {
|
||||
SetPageError(page);
|
||||
if (!parent_locked)
|
||||
unlock_extent(tree, cur, cur + iosize - 1);
|
||||
unlock_extent(tree, cur, cur + iosize - 1);
|
||||
cur = cur + iosize;
|
||||
pg_offset += iosize;
|
||||
continue;
|
||||
@ -3083,8 +3075,7 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
*bio_flags = this_bio_flag;
|
||||
} else {
|
||||
SetPageError(page);
|
||||
if (!parent_locked)
|
||||
unlock_extent(tree, cur, cur + iosize - 1);
|
||||
unlock_extent(tree, cur, cur + iosize - 1);
|
||||
}
|
||||
cur = cur + iosize;
|
||||
pg_offset += iosize;
|
||||
@ -3213,20 +3204,6 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
|
||||
get_extent_t *get_extent, int mirror_num)
|
||||
{
|
||||
struct bio *bio = NULL;
|
||||
unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
|
||||
int ret;
|
||||
|
||||
ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
|
||||
&bio_flags, READ, NULL);
|
||||
if (bio)
|
||||
ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline void update_nr_written(struct page *page,
|
||||
struct writeback_control *wbc,
|
||||
unsigned long nr_written)
|
||||
|
@ -29,7 +29,6 @@
|
||||
*/
|
||||
#define EXTENT_BIO_COMPRESSED 1
|
||||
#define EXTENT_BIO_TREE_LOG 2
|
||||
#define EXTENT_BIO_PARENT_LOCKED 4
|
||||
#define EXTENT_BIO_FLAG_SHIFT 16
|
||||
|
||||
/* these are bit numbers for test/set bit */
|
||||
@ -210,8 +209,6 @@ static inline int lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
|
||||
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
|
||||
int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
|
||||
get_extent_t *get_extent, int mirror_num);
|
||||
int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
|
||||
get_extent_t *get_extent, int mirror_num);
|
||||
int __init extent_io_init(void);
|
||||
void extent_io_exit(void);
|
||||
|
||||
|
@ -5717,6 +5717,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
|
||||
char *name_ptr;
|
||||
int name_len;
|
||||
int is_curr = 0; /* ctx->pos points to the current index? */
|
||||
bool emitted;
|
||||
|
||||
/* FIXME, use a real flag for deciding about the key type */
|
||||
if (root->fs_info->tree_root == root)
|
||||
@ -5745,6 +5746,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
|
||||
if (ret < 0)
|
||||
goto err;
|
||||
|
||||
emitted = false;
|
||||
while (1) {
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
@ -5824,6 +5826,7 @@ skip:
|
||||
|
||||
if (over)
|
||||
goto nopos;
|
||||
emitted = true;
|
||||
di_len = btrfs_dir_name_len(leaf, di) +
|
||||
btrfs_dir_data_len(leaf, di) + sizeof(*di);
|
||||
di_cur += di_len;
|
||||
@ -5836,11 +5839,20 @@ next:
|
||||
if (key_type == BTRFS_DIR_INDEX_KEY) {
|
||||
if (is_curr)
|
||||
ctx->pos++;
|
||||
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list);
|
||||
ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list, &emitted);
|
||||
if (ret)
|
||||
goto nopos;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we haven't emitted any dir entry, we must not touch ctx->pos as
|
||||
* it was was set to the termination value in previous call. We assume
|
||||
* that "." and ".." were emitted if we reach this point and set the
|
||||
* termination value as well for an empty directory.
|
||||
*/
|
||||
if (ctx->pos > 2 && !emitted)
|
||||
goto nopos;
|
||||
|
||||
/* Reached end of directory/root. Bump pos past the last item. */
|
||||
ctx->pos++;
|
||||
|
||||
|
119
fs/btrfs/ioctl.c
119
fs/btrfs/ioctl.c
@ -2794,24 +2794,29 @@ out:
|
||||
static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
|
||||
{
|
||||
struct page *page;
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
|
||||
page = grab_cache_page(inode->i_mapping, index);
|
||||
if (!page)
|
||||
return NULL;
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (!PageUptodate(page)) {
|
||||
if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
|
||||
0))
|
||||
return NULL;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_readpage(NULL, page);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
lock_page(page);
|
||||
if (!PageUptodate(page)) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
return NULL;
|
||||
return ERR_PTR(-EIO);
|
||||
}
|
||||
if (page->mapping != inode->i_mapping) {
|
||||
unlock_page(page);
|
||||
page_cache_release(page);
|
||||
return ERR_PTR(-EAGAIN);
|
||||
}
|
||||
}
|
||||
unlock_page(page);
|
||||
|
||||
return page;
|
||||
}
|
||||
@ -2823,17 +2828,31 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
|
||||
pgoff_t index = off >> PAGE_CACHE_SHIFT;
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
again:
|
||||
pages[i] = extent_same_get_page(inode, index + i);
|
||||
if (!pages[i])
|
||||
return -ENOMEM;
|
||||
if (IS_ERR(pages[i])) {
|
||||
int err = PTR_ERR(pages[i]);
|
||||
|
||||
if (err == -EAGAIN)
|
||||
goto again;
|
||||
pages[i] = NULL;
|
||||
return err;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
|
||||
static int lock_extent_range(struct inode *inode, u64 off, u64 len,
|
||||
bool retry_range_locking)
|
||||
{
|
||||
/* do any pending delalloc/csum calc on src, one way or
|
||||
another, and lock file content */
|
||||
/*
|
||||
* Do any pending delalloc/csum calculations on inode, one way or
|
||||
* another, and lock file content.
|
||||
* The locking order is:
|
||||
*
|
||||
* 1) pages
|
||||
* 2) range in the inode's io tree
|
||||
*/
|
||||
while (1) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
|
||||
@ -2851,8 +2870,11 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
|
||||
if (ordered)
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
if (!retry_range_locking)
|
||||
return -EAGAIN;
|
||||
btrfs_wait_ordered_range(inode, off, len);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2)
|
||||
@ -2877,15 +2899,24 @@ static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
|
||||
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
|
||||
}
|
||||
|
||||
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len)
|
||||
static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len,
|
||||
bool retry_range_locking)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (inode1 < inode2) {
|
||||
swap(inode1, inode2);
|
||||
swap(loff1, loff2);
|
||||
}
|
||||
lock_extent_range(inode1, loff1, len);
|
||||
lock_extent_range(inode2, loff2, len);
|
||||
ret = lock_extent_range(inode1, loff1, len, retry_range_locking);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = lock_extent_range(inode2, loff2, len, retry_range_locking);
|
||||
if (ret)
|
||||
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1,
|
||||
loff1 + len - 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct cmp_pages {
|
||||
@ -2901,11 +2932,15 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
|
||||
|
||||
for (i = 0; i < cmp->num_pages; i++) {
|
||||
pg = cmp->src_pages[i];
|
||||
if (pg)
|
||||
if (pg) {
|
||||
unlock_page(pg);
|
||||
page_cache_release(pg);
|
||||
}
|
||||
pg = cmp->dst_pages[i];
|
||||
if (pg)
|
||||
if (pg) {
|
||||
unlock_page(pg);
|
||||
page_cache_release(pg);
|
||||
}
|
||||
}
|
||||
kfree(cmp->src_pages);
|
||||
kfree(cmp->dst_pages);
|
||||
@ -2966,6 +3001,8 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
|
||||
|
||||
src_page = cmp->src_pages[i];
|
||||
dst_page = cmp->dst_pages[i];
|
||||
ASSERT(PageLocked(src_page));
|
||||
ASSERT(PageLocked(dst_page));
|
||||
|
||||
addr = kmap_atomic(src_page);
|
||||
dst_addr = kmap_atomic(dst_page);
|
||||
@ -3078,14 +3115,46 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
again:
|
||||
ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
if (same_inode)
|
||||
lock_extent_range(src, same_lock_start, same_lock_len);
|
||||
ret = lock_extent_range(src, same_lock_start, same_lock_len,
|
||||
false);
|
||||
else
|
||||
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
|
||||
ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len,
|
||||
false);
|
||||
/*
|
||||
* If one of the inodes has dirty pages in the respective range or
|
||||
* ordered extents, we need to flush dellaloc and wait for all ordered
|
||||
* extents in the range. We must unlock the pages and the ranges in the
|
||||
* io trees to avoid deadlocks when flushing delalloc (requires locking
|
||||
* pages) and when waiting for ordered extents to complete (they require
|
||||
* range locking).
|
||||
*/
|
||||
if (ret == -EAGAIN) {
|
||||
/*
|
||||
* Ranges in the io trees already unlocked. Now unlock all
|
||||
* pages before waiting for all IO to complete.
|
||||
*/
|
||||
btrfs_cmp_data_free(&cmp);
|
||||
if (same_inode) {
|
||||
btrfs_wait_ordered_range(src, same_lock_start,
|
||||
same_lock_len);
|
||||
} else {
|
||||
btrfs_wait_ordered_range(src, loff, len);
|
||||
btrfs_wait_ordered_range(dst, dst_loff, len);
|
||||
}
|
||||
goto again;
|
||||
}
|
||||
ASSERT(ret == 0);
|
||||
if (WARN_ON(ret)) {
|
||||
/* ranges in the io trees already unlocked */
|
||||
btrfs_cmp_data_free(&cmp);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* pass original length for comparison so we stay within i_size */
|
||||
ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp);
|
||||
@ -3795,9 +3864,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
u64 lock_start = min_t(u64, off, destoff);
|
||||
u64 lock_len = max_t(u64, off, destoff) + len - lock_start;
|
||||
|
||||
lock_extent_range(src, lock_start, lock_len);
|
||||
ret = lock_extent_range(src, lock_start, lock_len, true);
|
||||
} else {
|
||||
btrfs_double_extent_lock(src, off, inode, destoff, len);
|
||||
ret = btrfs_double_extent_lock(src, off, inode, destoff, len,
|
||||
true);
|
||||
}
|
||||
ASSERT(ret == 0);
|
||||
if (WARN_ON(ret)) {
|
||||
/* ranges in the io trees already unlocked */
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
|
||||
|
Loading…
Reference in New Issue
Block a user