Bug fixes and regressions for ext4, the most serious of which is a

potential deadlock during directory renames that was introduced during
 the merge window discovered by a combination of syzbot and lockdep.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEEK2m5VNv+CHkogTfJ8vlZVpUNgaMFAmQNVwIACgkQ8vlZVpUN
 gaMwmgf/ZAasXZEMV0zaQZa8zP4KvMKZjWe6azkcJg4sb/HG9Q7JzeJDCurhhWUj
 8+QnyUcuKTyWKYWjGf0f5CZaYEM5AZYij41UJzu2qMkz5hVXSqBVuY8KywxuiJv5
 kfuIvQh0Onv0Yrg2qAc52/kZkq1lu2sl/F5ertBWjdpTUXdBUdrCxkUk+1BgQWAj
 vNwi1/+gNuX7RxMboHqYmwXFP39vECd+wteNdsiK1hR8bLqL68duLLq8xQdHt4gS
 sbVmJKR4j2Giw4ZnlYi9RiwKIO0beqocanp+cfOPulyj5mTM8X1lr0uvaLZgx2AF
 lqrS3/5ksp45cRT70qCIz8je70hTSg==
 =nN3T
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 fixes from Ted Ts'o:
 "Bug fixes and regressions for ext4, the most serious of which is a
  potential deadlock during directory renames that was introduced during
  the merge window discovered by a combination of syzbot and lockdep"

* tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: zero i_disksize when initializing the bootloader inode
  ext4: make sure fs error flag setted before clear journal error
  ext4: commit super block if fs record error when journal record without error
  ext4, jbd2: add an optimized bmap for the journal inode
  ext4: fix WARNING in ext4_update_inline_data
  ext4: move where set the MAY_INLINE_DATA flag is set
  ext4: Fix deadlock during directory rename
  ext4: Fix comment about the 64BIT feature
  docs: ext4: modify the group desc size to 64
  ext4: fix another off-by-one fsmap error on 1k block filesystems
  ext4: fix RENAME_WHITEOUT handling for inline directories
  ext4: make kobj_type structures constant
  ext4: fix cgroup writeback accounting with fs-layer encryption
This commit is contained in:
Linus Torvalds 2023-03-12 08:55:55 -07:00
commit 40d0c0901e
13 changed files with 98 additions and 33 deletions

View File

@ -105,9 +105,9 @@ descriptors. Instead, the superblock and a single block group descriptor
block is placed at the beginning of the first, second, and last block
groups in a meta-block group. A meta-block group is a collection of
block groups which can be described by a single block group descriptor
block. Since the size of the block group descriptor structure is 32
bytes, a meta-block group contains 32 block groups for filesystems with
a 1KB block size, and 128 block groups for filesystems with a 4KB
block. Since the size of the block group descriptor structure is 64
bytes, a meta-block group contains 16 block groups for filesystems with
a 1KB block size, and 64 block groups for filesystems with a 4KB
blocksize. Filesystems can either be created using this new block group
descriptor layout, or existing filesystems can be resized on-line, and
the field s_first_meta_bg in the superblock will indicate the first

View File

@ -1387,7 +1387,7 @@ struct ext4_super_block {
__le32 s_first_meta_bg; /* First metablock block group */
__le32 s_mkfs_time; /* When the filesystem was created */
__le32 s_jnl_blocks[17]; /* Backup of the journal inode */
/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
/* 64bit support valid if EXT4_FEATURE_INCOMPAT_64BIT */
/*150*/ __le32 s_blocks_count_hi; /* Blocks count */
__le32 s_r_blocks_count_hi; /* Reserved blocks count */
__le32 s_free_blocks_count_hi; /* Free blocks count */

View File

@ -486,6 +486,8 @@ static int ext4_getfsmap_datadev(struct super_block *sb,
keys[0].fmr_physical = bofs;
if (keys[1].fmr_physical >= eofs)
keys[1].fmr_physical = eofs - 1;
if (keys[1].fmr_physical < keys[0].fmr_physical)
return 0;
start_fsb = keys[0].fmr_physical;
end_fsb = keys[1].fmr_physical;

View File

@ -159,7 +159,6 @@ int ext4_find_inline_data_nolock(struct inode *inode)
(void *)ext4_raw_inode(&is.iloc));
EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
le32_to_cpu(is.s.here->e_value_size);
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
}
out:
brelse(is.iloc.bh);

View File

@ -4797,8 +4797,13 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
if (EXT4_INODE_HAS_XATTR_SPACE(inode) &&
*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
int err;
ext4_set_inode_state(inode, EXT4_STATE_XATTR);
return ext4_find_inline_data_nolock(inode);
err = ext4_find_inline_data_nolock(inode);
if (!err && ext4_has_inline_data(inode))
ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
return err;
} else
EXT4_I(inode)->i_inline_off = 0;
return 0;

View File

@ -431,6 +431,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ei_bl->i_flags = 0;
inode_set_iversion(inode_bl, 1);
i_size_write(inode_bl, 0);
EXT4_I(inode_bl)->i_disksize = inode_bl->i_size;
inode_bl->i_mode = S_IFREG;
if (ext4_has_feature_extents(sb)) {
ext4_set_inode_flag(inode_bl, EXT4_INODE_EXTENTS);

View File

@ -1595,11 +1595,10 @@ static struct buffer_head *__ext4_find_entry(struct inode *dir,
int has_inline_data = 1;
ret = ext4_find_inline_entry(dir, fname, res_dir,
&has_inline_data);
if (has_inline_data) {
if (inlined)
*inlined = 1;
if (inlined)
*inlined = has_inline_data;
if (has_inline_data)
goto cleanup_and_exit;
}
}
if ((namelen <= 2) && (name[0] == '.') &&
@ -3646,7 +3645,8 @@ static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
* so the old->de may no longer valid and need to find it again
* before reset old inode info.
*/
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
&old.inlined);
if (IS_ERR(old.bh))
retval = PTR_ERR(old.bh);
if (!old.bh)
@ -3813,9 +3813,20 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
return retval;
}
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
if (IS_ERR(old.bh))
return PTR_ERR(old.bh);
/*
* We need to protect against old.inode directory getting converted
* from inline directory format into a normal one.
*/
if (S_ISDIR(old.inode->i_mode))
inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
&old.inlined);
if (IS_ERR(old.bh)) {
retval = PTR_ERR(old.bh);
goto unlock_moved_dir;
}
/*
* Check for inode number is _not_ due to possible IO errors.
* We might rmdir the source, keep it as pwd of some process
@ -3872,11 +3883,6 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
goto end_rename;
}
/*
* We need to protect against old.inode directory getting
* converted from inline directory format into a normal one.
*/
inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
retval = ext4_rename_dir_prepare(handle, &old);
if (retval) {
inode_unlock(old.inode);
@ -4013,12 +4019,15 @@ end_rename:
} else {
ext4_journal_stop(handle);
}
if (old.dir_bh)
inode_unlock(old.inode);
release_bh:
brelse(old.dir_bh);
brelse(old.bh);
brelse(new.bh);
unlock_moved_dir:
if (S_ISDIR(old.inode->i_mode))
inode_unlock(old.inode);
return retval;
}

View File

@ -409,7 +409,8 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
static void io_submit_add_bh(struct ext4_io_submit *io,
struct inode *inode,
struct page *page,
struct page *pagecache_page,
struct page *bounce_page,
struct buffer_head *bh)
{
int ret;
@ -421,10 +422,11 @@ submit_and_retry:
}
if (io->io_bio == NULL)
io_submit_init_bio(io, bh);
ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
ret = bio_add_page(io->io_bio, bounce_page ?: pagecache_page,
bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
wbc_account_cgroup_owner(io->io_wbc, page, bh->b_size);
wbc_account_cgroup_owner(io->io_wbc, pagecache_page, bh->b_size);
io->io_next_block++;
}
@ -561,8 +563,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
if (!buffer_async_write(bh))
continue;
io_submit_add_bh(io, inode,
bounce_page ? bounce_page : page, bh);
io_submit_add_bh(io, inode, page, bounce_page, bh);
} while ((bh = bh->b_this_page) != head);
unlock:
unlock_page(page);

View File

@ -5726,6 +5726,28 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
return journal_inode;
}
static int ext4_journal_bmap(journal_t *journal, sector_t *block)
{
struct ext4_map_blocks map;
int ret;
if (journal->j_inode == NULL)
return 0;
map.m_lblk = *block;
map.m_len = 1;
ret = ext4_map_blocks(NULL, journal->j_inode, &map, 0);
if (ret <= 0) {
ext4_msg(journal->j_inode->i_sb, KERN_CRIT,
"journal bmap failed: block %llu ret %d\n",
*block, ret);
jbd2_journal_abort(journal, ret ? ret : -EIO);
return ret;
}
*block = map.m_pblk;
return 0;
}
static journal_t *ext4_get_journal(struct super_block *sb,
unsigned int journal_inum)
{
@ -5746,6 +5768,7 @@ static journal_t *ext4_get_journal(struct super_block *sb,
return NULL;
}
journal->j_private = sb;
journal->j_bmap = ext4_journal_bmap;
ext4_init_journal_params(sb, journal);
return journal;
}
@ -5920,6 +5943,7 @@ static int ext4_load_journal(struct super_block *sb,
err = jbd2_journal_wipe(journal, !really_read_only);
if (!err) {
char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
if (save)
memcpy(save, ((char *) es) +
EXT4_S_ERR_START, EXT4_S_ERR_LEN);
@ -5928,6 +5952,14 @@ static int ext4_load_journal(struct super_block *sb,
memcpy(((char *) es) + EXT4_S_ERR_START,
save, EXT4_S_ERR_LEN);
kfree(save);
es->s_state |= cpu_to_le16(EXT4_SB(sb)->s_mount_state &
EXT4_ERROR_FS);
/* Write out restored error information to the superblock */
if (!bdev_read_only(sb->s_bdev)) {
int err2;
err2 = ext4_commit_super(sb);
err = err ? : err2;
}
}
if (err) {
@ -6157,11 +6189,13 @@ static int ext4_clear_journal_err(struct super_block *sb,
errstr = ext4_decode_error(sb, j_errno, nbuf);
ext4_warning(sb, "Filesystem error recorded "
"from previous mount: %s", errstr);
ext4_warning(sb, "Marking fs in need of filesystem check.");
EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
ext4_commit_super(sb);
j_errno = ext4_commit_super(sb);
if (j_errno)
return j_errno;
ext4_warning(sb, "Marked fs in need of filesystem check.");
jbd2_journal_clear_err(journal);
jbd2_journal_update_sb_errno(journal);

View File

@ -501,13 +501,13 @@ static const struct sysfs_ops ext4_attr_ops = {
.store = ext4_attr_store,
};
static struct kobj_type ext4_sb_ktype = {
static const struct kobj_type ext4_sb_ktype = {
.default_groups = ext4_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_sb_release,
};
static struct kobj_type ext4_feat_ktype = {
static const struct kobj_type ext4_feat_ktype = {
.default_groups = ext4_feat_groups,
.sysfs_ops = &ext4_attr_ops,
.release = ext4_feat_release,

View File

@ -2852,6 +2852,9 @@ shift:
(void *)header, total_ino);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
if (ext4_has_inline_data(inode))
error = ext4_find_inline_data_nolock(inode);
cleanup:
if (error && (mnt_count != le16_to_cpu(sbi->s_es->s_mnt_count))) {
ext4_warning(inode->i_sb, "Unable to expand inode %lu. Delete some EAs or run e2fsck.",

View File

@ -969,10 +969,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
{
int err = 0;
unsigned long long ret;
sector_t block = 0;
sector_t block = blocknr;
if (journal->j_inode) {
block = blocknr;
if (journal->j_bmap) {
err = journal->j_bmap(journal, &block);
if (err == 0)
*retp = block;
} else if (journal->j_inode) {
ret = bmap(journal->j_inode, &block);
if (ret || !block) {

View File

@ -1308,6 +1308,14 @@ struct journal_s
struct buffer_head *bh,
enum passtype pass, int off,
tid_t expected_commit_id);
/**
* @j_bmap:
*
* Bmap function that should be used instead of the generic
* VFS bmap function.
*/
int (*j_bmap)(struct journal_s *journal, sector_t *block);
};
#define jbd2_might_wait_for_commit(j) \