mirror of
https://github.com/torvalds/linux.git
synced 2024-11-12 23:23:03 +00:00
ext4: avoid deadlock in fs reclaim with page writeback
Ext4 has a filesystem wide lock protecting ext4_writepages() calls to
avoid races with switching of journalled data flag or inode format. This
lock can however cause a deadlock like:
CPU0 CPU1
ext4_writepages()
percpu_down_read(sbi->s_writepages_rwsem);
ext4_change_inode_journal_flag()
percpu_down_write(sbi->s_writepages_rwsem);
- blocks, all readers block from now on
ext4_do_writepages()
ext4_init_io_end()
kmem_cache_zalloc(io_end_cachep, GFP_KERNEL)
fs_reclaim frees dentry...
dentry_unlink_inode()
iput() - last ref =>
iput_final() - inode dirty =>
write_inode_now()...
ext4_writepages() tries to acquire sbi->s_writepages_rwsem
and blocks forever
Make sure we cannot recurse into filesystem reclaim from writeback code
to avoid the deadlock.
Reported-by: syzbot+6898da502aef574c5f8a@syzkaller.appspotmail.com
Link: https://lore.kernel.org/all/0000000000004c66b405fa108e27@google.com
Fixes: c8585c6fca
("ext4: fix races between changing inode journal mode and ext4_writepages")
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20230504124723.20205-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
parent
b87c7cdf2b
commit
00d873c17e
@ -1684,6 +1684,30 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
|
|||||||
return container_of(inode, struct ext4_inode_info, vfs_inode);
|
return container_of(inode, struct ext4_inode_info, vfs_inode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int ext4_writepages_down_read(struct super_block *sb)
|
||||||
|
{
|
||||||
|
percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
|
||||||
|
return memalloc_nofs_save();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ext4_writepages_up_read(struct super_block *sb, int ctx)
|
||||||
|
{
|
||||||
|
memalloc_nofs_restore(ctx);
|
||||||
|
percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int ext4_writepages_down_write(struct super_block *sb)
|
||||||
|
{
|
||||||
|
percpu_down_write(&EXT4_SB(sb)->s_writepages_rwsem);
|
||||||
|
return memalloc_nofs_save();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ext4_writepages_up_write(struct super_block *sb, int ctx)
|
||||||
|
{
|
||||||
|
memalloc_nofs_restore(ctx);
|
||||||
|
percpu_up_write(&EXT4_SB(sb)->s_writepages_rwsem);
|
||||||
|
}
|
||||||
|
|
||||||
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
|
||||||
{
|
{
|
||||||
return ino == EXT4_ROOT_INO ||
|
return ino == EXT4_ROOT_INO ||
|
||||||
|
@ -2783,11 +2783,12 @@ static int ext4_writepages(struct address_space *mapping,
|
|||||||
.can_map = 1,
|
.can_map = 1,
|
||||||
};
|
};
|
||||||
int ret;
|
int ret;
|
||||||
|
int alloc_ctx;
|
||||||
|
|
||||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
|
if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
percpu_down_read(&EXT4_SB(sb)->s_writepages_rwsem);
|
alloc_ctx = ext4_writepages_down_read(sb);
|
||||||
ret = ext4_do_writepages(&mpd);
|
ret = ext4_do_writepages(&mpd);
|
||||||
/*
|
/*
|
||||||
* For data=journal writeback we could have come across pages marked
|
* For data=journal writeback we could have come across pages marked
|
||||||
@ -2796,7 +2797,7 @@ static int ext4_writepages(struct address_space *mapping,
|
|||||||
*/
|
*/
|
||||||
if (!ret && mpd.journalled_more_data)
|
if (!ret && mpd.journalled_more_data)
|
||||||
ret = ext4_do_writepages(&mpd);
|
ret = ext4_do_writepages(&mpd);
|
||||||
percpu_up_read(&EXT4_SB(sb)->s_writepages_rwsem);
|
ext4_writepages_up_read(sb, alloc_ctx);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -2824,17 +2825,18 @@ static int ext4_dax_writepages(struct address_space *mapping,
|
|||||||
long nr_to_write = wbc->nr_to_write;
|
long nr_to_write = wbc->nr_to_write;
|
||||||
struct inode *inode = mapping->host;
|
struct inode *inode = mapping->host;
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
|
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
|
||||||
|
int alloc_ctx;
|
||||||
|
|
||||||
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
|
||||||
percpu_down_read(&sbi->s_writepages_rwsem);
|
alloc_ctx = ext4_writepages_down_read(inode->i_sb);
|
||||||
trace_ext4_writepages(inode, wbc);
|
trace_ext4_writepages(inode, wbc);
|
||||||
|
|
||||||
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
|
ret = dax_writeback_mapping_range(mapping, sbi->s_daxdev, wbc);
|
||||||
trace_ext4_writepages_result(inode, wbc, ret,
|
trace_ext4_writepages_result(inode, wbc, ret,
|
||||||
nr_to_write - wbc->nr_to_write);
|
nr_to_write - wbc->nr_to_write);
|
||||||
percpu_up_read(&sbi->s_writepages_rwsem);
|
ext4_writepages_up_read(inode->i_sb, alloc_ctx);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5928,7 +5930,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
journal_t *journal;
|
journal_t *journal;
|
||||||
handle_t *handle;
|
handle_t *handle;
|
||||||
int err;
|
int err;
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
int alloc_ctx;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have to be very careful here: changing a data block's
|
* We have to be very careful here: changing a data block's
|
||||||
@ -5966,7 +5968,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
percpu_down_write(&sbi->s_writepages_rwsem);
|
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
|
||||||
jbd2_journal_lock_updates(journal);
|
jbd2_journal_lock_updates(journal);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -5983,7 +5985,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
err = jbd2_journal_flush(journal, 0);
|
err = jbd2_journal_flush(journal, 0);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
jbd2_journal_unlock_updates(journal);
|
jbd2_journal_unlock_updates(journal);
|
||||||
percpu_up_write(&sbi->s_writepages_rwsem);
|
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
|
ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
|
||||||
@ -5991,7 +5993,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
|
|||||||
ext4_set_aops(inode);
|
ext4_set_aops(inode);
|
||||||
|
|
||||||
jbd2_journal_unlock_updates(journal);
|
jbd2_journal_unlock_updates(journal);
|
||||||
percpu_up_write(&sbi->s_writepages_rwsem);
|
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
|
||||||
|
|
||||||
if (val)
|
if (val)
|
||||||
filemap_invalidate_unlock(inode->i_mapping);
|
filemap_invalidate_unlock(inode->i_mapping);
|
||||||
|
@ -408,7 +408,6 @@ static int free_ext_block(handle_t *handle, struct inode *inode)
|
|||||||
|
|
||||||
int ext4_ext_migrate(struct inode *inode)
|
int ext4_ext_migrate(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
|
|
||||||
handle_t *handle;
|
handle_t *handle;
|
||||||
int retval = 0, i;
|
int retval = 0, i;
|
||||||
__le32 *i_data;
|
__le32 *i_data;
|
||||||
@ -418,6 +417,7 @@ int ext4_ext_migrate(struct inode *inode)
|
|||||||
unsigned long max_entries;
|
unsigned long max_entries;
|
||||||
__u32 goal, tmp_csum_seed;
|
__u32 goal, tmp_csum_seed;
|
||||||
uid_t owner[2];
|
uid_t owner[2];
|
||||||
|
int alloc_ctx;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the filesystem does not support extents, or the inode
|
* If the filesystem does not support extents, or the inode
|
||||||
@ -434,7 +434,7 @@ int ext4_ext_migrate(struct inode *inode)
|
|||||||
*/
|
*/
|
||||||
return retval;
|
return retval;
|
||||||
|
|
||||||
percpu_down_write(&sbi->s_writepages_rwsem);
|
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Worst case we can touch the allocation bitmaps and a block
|
* Worst case we can touch the allocation bitmaps and a block
|
||||||
@ -586,7 +586,7 @@ out_tmp_inode:
|
|||||||
unlock_new_inode(tmp_inode);
|
unlock_new_inode(tmp_inode);
|
||||||
iput(tmp_inode);
|
iput(tmp_inode);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
percpu_up_write(&sbi->s_writepages_rwsem);
|
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -605,6 +605,7 @@ int ext4_ind_migrate(struct inode *inode)
|
|||||||
ext4_fsblk_t blk;
|
ext4_fsblk_t blk;
|
||||||
handle_t *handle;
|
handle_t *handle;
|
||||||
int ret, ret2 = 0;
|
int ret, ret2 = 0;
|
||||||
|
int alloc_ctx;
|
||||||
|
|
||||||
if (!ext4_has_feature_extents(inode->i_sb) ||
|
if (!ext4_has_feature_extents(inode->i_sb) ||
|
||||||
(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
|
||||||
@ -621,7 +622,7 @@ int ext4_ind_migrate(struct inode *inode)
|
|||||||
if (test_opt(inode->i_sb, DELALLOC))
|
if (test_opt(inode->i_sb, DELALLOC))
|
||||||
ext4_alloc_da_blocks(inode);
|
ext4_alloc_da_blocks(inode);
|
||||||
|
|
||||||
percpu_down_write(&sbi->s_writepages_rwsem);
|
alloc_ctx = ext4_writepages_down_write(inode->i_sb);
|
||||||
|
|
||||||
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
|
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
|
||||||
if (IS_ERR(handle)) {
|
if (IS_ERR(handle)) {
|
||||||
@ -665,6 +666,6 @@ errout:
|
|||||||
ext4_journal_stop(handle);
|
ext4_journal_stop(handle);
|
||||||
up_write(&EXT4_I(inode)->i_data_sem);
|
up_write(&EXT4_I(inode)->i_data_sem);
|
||||||
out_unlock:
|
out_unlock:
|
||||||
percpu_up_write(&sbi->s_writepages_rwsem);
|
ext4_writepages_up_write(inode->i_sb, alloc_ctx);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user