btrfs: fix RWF_NOWAIT writes blocking on extent locks and waiting for IO
A RWF_NOWAIT write is not supposed to wait on filesystem locks that can be
held for a long time or for ongoing IO to complete.
However when calling check_can_nocow(), if the inode has prealloc extents
or has the NOCOW flag set, we can block on extent (file range) locks
through the call to btrfs_lock_and_flush_ordered_range(). Such lock can
take a significant amount of time to be available. For example, a fiemap
task may be running, and iterating through the entire file range checking
all extents and doing backref walking to determine if they are shared,
or a readpage operation may be in progress.
Also at btrfs_lock_and_flush_ordered_range(), called by check_can_nocow(),
after locking the file range we wait for any existing ordered extent that
is in progress to complete. Another operation that can take a significant
amount of time and defeat the purpose of RWF_NOWAIT.
So fix this by trying to lock the file range and if it's currently locked
return -EAGAIN to user space. If we are able to lock the file range without
waiting and there is an ordered extent in the range, return -EAGAIN as
well, instead of waiting for it to complete. Finally, don't bother trying
to lock the snapshot lock of the root when attempting a RWF_NOWAIT write,
as that is only important for buffered writes.
Fixes: edf064e7c6
("btrfs: nowait aio support")
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
260a63395f
commit
5dbb75ed69
@ -1533,7 +1533,7 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
||||||
size_t *write_bytes)
|
size_t *write_bytes, bool nowait)
|
||||||
{
|
{
|
||||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||||
struct btrfs_root *root = inode->root;
|
struct btrfs_root *root = inode->root;
|
||||||
@ -1541,27 +1541,43 @@ static noinline int check_can_nocow(struct btrfs_inode *inode, loff_t pos,
|
|||||||
u64 num_bytes;
|
u64 num_bytes;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (!btrfs_drew_try_write_lock(&root->snapshot_lock))
|
if (!nowait && !btrfs_drew_try_write_lock(&root->snapshot_lock))
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
|
|
||||||
lockstart = round_down(pos, fs_info->sectorsize);
|
lockstart = round_down(pos, fs_info->sectorsize);
|
||||||
lockend = round_up(pos + *write_bytes,
|
lockend = round_up(pos + *write_bytes,
|
||||||
fs_info->sectorsize) - 1;
|
fs_info->sectorsize) - 1;
|
||||||
|
|
||||||
btrfs_lock_and_flush_ordered_range(inode, lockstart,
|
|
||||||
lockend, NULL);
|
|
||||||
|
|
||||||
num_bytes = lockend - lockstart + 1;
|
num_bytes = lockend - lockstart + 1;
|
||||||
|
|
||||||
|
if (nowait) {
|
||||||
|
struct btrfs_ordered_extent *ordered;
|
||||||
|
|
||||||
|
if (!try_lock_extent(&inode->io_tree, lockstart, lockend))
|
||||||
|
return -EAGAIN;
|
||||||
|
|
||||||
|
ordered = btrfs_lookup_ordered_range(inode, lockstart,
|
||||||
|
num_bytes);
|
||||||
|
if (ordered) {
|
||||||
|
btrfs_put_ordered_extent(ordered);
|
||||||
|
ret = -EAGAIN;
|
||||||
|
goto out_unlock;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
btrfs_lock_and_flush_ordered_range(inode, lockstart,
|
||||||
|
lockend, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
|
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
|
||||||
NULL, NULL, NULL);
|
NULL, NULL, NULL);
|
||||||
if (ret <= 0) {
|
if (ret <= 0) {
|
||||||
ret = 0;
|
ret = 0;
|
||||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
if (!nowait)
|
||||||
|
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||||
} else {
|
} else {
|
||||||
*write_bytes = min_t(size_t, *write_bytes ,
|
*write_bytes = min_t(size_t, *write_bytes ,
|
||||||
num_bytes - pos + lockstart);
|
num_bytes - pos + lockstart);
|
||||||
}
|
}
|
||||||
|
out_unlock:
|
||||||
unlock_extent(&inode->io_tree, lockstart, lockend);
|
unlock_extent(&inode->io_tree, lockstart, lockend);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -1633,7 +1649,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
|
|||||||
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
|
if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
|
||||||
BTRFS_INODE_PREALLOC)) &&
|
BTRFS_INODE_PREALLOC)) &&
|
||||||
check_can_nocow(BTRFS_I(inode), pos,
|
check_can_nocow(BTRFS_I(inode), pos,
|
||||||
&write_bytes) > 0) {
|
&write_bytes, false) > 0) {
|
||||||
/*
|
/*
|
||||||
* For nodata cow case, no need to reserve
|
* For nodata cow case, no need to reserve
|
||||||
* data space.
|
* data space.
|
||||||
@ -1912,12 +1928,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
|
|||||||
*/
|
*/
|
||||||
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
|
if (!(BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
|
||||||
BTRFS_INODE_PREALLOC)) ||
|
BTRFS_INODE_PREALLOC)) ||
|
||||||
check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes) <= 0) {
|
check_can_nocow(BTRFS_I(inode), pos, &nocow_bytes,
|
||||||
|
true) <= 0) {
|
||||||
inode_unlock(inode);
|
inode_unlock(inode);
|
||||||
return -EAGAIN;
|
return -EAGAIN;
|
||||||
}
|
}
|
||||||
/* check_can_nocow() locks the snapshot lock on success */
|
|
||||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
|
||||||
/*
|
/*
|
||||||
* There are holes in the range or parts of the range that must
|
* There are holes in the range or parts of the range that must
|
||||||
* be COWed (shared extents, RO block groups, etc), so just bail
|
* be COWed (shared extents, RO block groups, etc), so just bail
|
||||||
|
Loading…
Reference in New Issue
Block a user