ext4: don't take the i_mutex lock when doing DIO overwrites

Aligned and overwrite direct I/O can be parallelized.  In
ext4_file_dio_write, we first check whether these conditions are
satisfied or not.  If so, we take i_data_sem and release i_mutex lock
directly.  Meanwhile iocb->private is set to indicate that this is a
dio overwrite, and it will be handled in ext4_ext_direct_IO.

[ Added fix from Dan Carpenter to fix locking bug on the error path. ]

CC: Tao Ma <tm@tao.ma>
CC: Eric Sandeen <sandeen@redhat.com>
CC: Robin Dong <hao.bigrat@gmail.com>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
This commit is contained in:
Zheng Liu 2012-07-22 20:19:31 -04:00 committed by Theodore Ts'o
parent 729f52c6be
commit 4bd809dbbf
2 changed files with 71 additions and 4 deletions

View File

@ -93,9 +93,13 @@ static ssize_t
ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct blk_plug plug;
int unaligned_aio = 0;
ssize_t ret;
int overwrite = 0;
size_t length = iov_length(iov, nr_segs);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb))
@ -115,7 +119,50 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
ext4_aiodio_wait(inode);
}
ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
BUG_ON(iocb->ki_pos != pos);
mutex_lock(&inode->i_mutex);
blk_start_plug(&plug);
iocb->private = &overwrite;
/* check whether we do a DIO overwrite or not */
if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
int err, len;
map.m_lblk = pos >> blkbits;
map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
- map.m_lblk;
len = map.m_len;
err = ext4_map_blocks(NULL, inode, &map, 0);
/*
* 'err==len' means that all of blocks has been preallocated no
* matter they are initialized or not. For excluding
* uninitialized extents, we need to check m_flags. There are
* two conditions that indicate for initialized extents.
* 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned;
* 2) If we do a real lookup, non-flags are returned.
* So we should check these two conditions.
*/
if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
overwrite = 1;
}
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
err = generic_write_sync(file, pos, ret);
if (err < 0 && ret > 0)
ret = err;
}
blk_finish_plug(&plug);
if (unaligned_aio)
mutex_unlock(ext4_aio_mutex(inode));

View File

@ -2996,6 +2996,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (rw == WRITE && final_size <= inode->i_size) {
int overwrite = 0;
BUG_ON(iocb->private == NULL);
/* If we do a overwrite dio, i_mutex locking can be released */
overwrite = *((int *)iocb->private);
if (overwrite) {
down_read(&EXT4_I(inode)->i_data_sem);
mutex_unlock(&inode->i_mutex);
}
/*
* We could direct write to holes and fallocate.
*
@ -3021,8 +3031,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
if (!is_sync_kiocb(iocb)) {
ext4_io_end_t *io_end =
ext4_init_io_end(inode, GFP_NOFS);
if (!io_end)
return -ENOMEM;
if (!io_end) {
ret = -ENOMEM;
goto retake_lock;
}
io_end->flag |= EXT4_IO_END_DIRECT;
iocb->private = io_end;
/*
@ -3083,6 +3095,14 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
ret = err;
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
}
retake_lock:
/* take i_mutex locking again if we do a ovewrite dio */
if (overwrite) {
up_read(&EXT4_I(inode)->i_data_sem);
mutex_lock(&inode->i_mutex);
}
return ret;
}