Various bug fixes for ext4. The most important is a fix for the new

extent cache's slab shrinker which can cause significant, user-visible
 pauses when the system is under memory pressure.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABCAAGBQJRMpM3AAoJENNvdpvBGATwiXgP/3eSg3C+M0ZUeL6lH3aXRxQO
 PHxUL/Di5cfFs3GX4DksVzsD1KkTIz8B424AhdahrrgGh1jTx4/J23OrEdu9nK24
 JGU5hmowoCyG8PZG1kGMbX6EYcblYTx+O2tX/RInnRExm9ajkfxb0S1g0Vl340qw
 58WTSWfl2+J/3RnJ9TYX/qNVeCJdxLH3GkpFbvQbLGyylfM9hsUD5MZMAR1bpOJF
 U2vNdK3n65W0AtKhLo7TYnoJ4ll2PoFRvffS0rqhEpIAcRxpVsNThFJLBcOQ1a79
 6cCN5uhrJOlL5jLN/fYCViU1+03y7itCMJmtSpuyV8DtUGjf4r1tzlvWGeiSmpB9
 NprZ/MgO1ROnzO/gzPM2s4nWWeGZiGaf7vMDyScIDtqF1ckfHN17jqazuSJcybN8
 U83O9+KyhHkvr/+zqlySXiBX2MUSUdSE37CsMC7R+mAz7C46yjXEPuG8pLkLCWiG
 gjMD30D1f6+h+K646WN497+Crxl1CurEH+ON7k158cNvVNlX1FfFHUprRHeNUXkV
 tEKjiCUCf5WjNeFEc93nC/nDi4OIISD25N9LyHzp2CcV/XXRjpsrNPBFDAZjwgiK
 YVUQIwocVUVlRaACzrM9sDFtSELqNzy/GLuERITu1Mb2R4sMXIyvvJkjc+EuQS0F
 XVQ3BU5ypWyxJGrSGCPd
 =+vcC
 -----END PGP SIGNATURE-----

Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4

Pull ext4 bug fixes from Ted Ts'o:
 "Various bug fixes for ext4.  The most important is a fix for the new
  extent cache's slab shrinker which can cause significant, user-visible
  pauses when the system is under memory pressure."

* tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: enable quotas before orphan cleanup
  ext4: don't allow quota mount options when quota feature enabled
  ext4: fix a warning from sparse check for ext4_dir_llseek
  ext4: convert number of blocks to clusters properly
  ext4: fix possible memory leak in ext4_remount()
  jbd2: fix ERR_PTR dereference in jbd2__journal_start
  ext4: use percpu counter for extent cache count
  ext4: optimize ext4_es_shrink()
This commit is contained in:
Linus Torvalds 2013-03-02 19:33:21 -08:00
commit a7c1120d2d
9 changed files with 79 additions and 82 deletions

View File

@ -635,7 +635,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
brelse(bitmap_bh);
printk(KERN_DEBUG "ext4_count_free_clusters: stored = %llu"
", computed = %llu, %llu\n",
EXT4_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
EXT4_NUM_B2C(EXT4_SB(sb), ext4_free_blocks_count(es)),
desc_count, bitmap_count);
return bitmap_count;
#else

View File

@ -334,7 +334,7 @@ static inline loff_t ext4_get_htree_eof(struct file *filp)
*
* For non-htree, ext4_llseek already chooses the proper max offset.
*/
loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
static loff_t ext4_dir_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *inode = file->f_mapping->host;
int dx_dir = is_dx_dir(inode);

View File

@ -1309,6 +1309,7 @@ struct ext4_sb_info {
/* Reclaim extents from extent status tree */
struct shrinker s_es_shrinker;
struct list_head s_es_lru;
struct percpu_counter s_extent_cache_cnt;
spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
};

View File

@ -147,11 +147,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
ext4_lblk_t end);
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
int nr_to_scan);
static int ext4_es_reclaim_extents_count(struct super_block *sb);
int __init ext4_init_es(void)
{
ext4_es_cachep = KMEM_CACHE(extent_status, SLAB_RECLAIM_ACCOUNT);
ext4_es_cachep = kmem_cache_create("ext4_extent_status",
sizeof(struct extent_status),
0, (SLAB_RECLAIM_ACCOUNT), NULL);
if (ext4_es_cachep == NULL)
return -ENOMEM;
return 0;
@ -302,8 +303,10 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
/*
* We don't count delayed extent because we never try to reclaim them
*/
if (!ext4_es_is_delayed(es))
if (!ext4_es_is_delayed(es)) {
EXT4_I(inode)->i_es_lru_nr++;
percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
}
return es;
}
@ -314,6 +317,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
if (!ext4_es_is_delayed(es)) {
BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
EXT4_I(inode)->i_es_lru_nr--;
percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt);
}
kmem_cache_free(ext4_es_cachep, es);
@ -674,10 +678,11 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
int ret, nr_shrunk = 0;
trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan);
ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret);
if (!nr_to_scan)
return ext4_es_reclaim_extents_count(sbi->s_sb);
return ret;
INIT_LIST_HEAD(&scanned);
@ -705,9 +710,10 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
}
list_splice_tail(&scanned, &sbi->s_es_lru);
spin_unlock(&sbi->s_es_lru_lock);
trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk);
return ext4_es_reclaim_extents_count(sbi->s_sb);
ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
return ret;
}
void ext4_es_register_shrinker(struct super_block *sb)
@ -751,25 +757,6 @@ void ext4_es_lru_del(struct inode *inode)
spin_unlock(&sbi->s_es_lru_lock);
}
static int ext4_es_reclaim_extents_count(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_inode_info *ei;
struct list_head *cur;
int nr_cached = 0;
spin_lock(&sbi->s_es_lru_lock);
list_for_each(cur, &sbi->s_es_lru) {
ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
read_lock(&ei->i_es_lock);
nr_cached += ei->i_es_lru_nr;
read_unlock(&ei->i_es_lock);
}
spin_unlock(&sbi->s_es_lru_lock);
trace_ext4_es_reclaim_extents_count(sb, nr_cached);
return nr_cached;
}
static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
int nr_to_scan)
{

View File

@ -3419,7 +3419,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
win = offs;
ac->ac_b_ex.fe_logical = ac->ac_o_ex.fe_logical -
EXT4_B2C(sbi, win);
EXT4_NUM_B2C(sbi, win);
BUG_ON(ac->ac_o_ex.fe_logical < ac->ac_b_ex.fe_logical);
BUG_ON(ac->ac_o_ex.fe_len > ac->ac_b_ex.fe_len);
}
@ -4565,7 +4565,7 @@ do_more:
EXT4_BLOCKS_PER_GROUP(sb);
count -= overflow;
}
count_clusters = EXT4_B2C(sbi, count);
count_clusters = EXT4_NUM_B2C(sbi, count);
bitmap_bh = ext4_read_block_bitmap(sb, block_group);
if (!bitmap_bh) {
err = -EIO;
@ -4807,11 +4807,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
ext4_group_desc_csum_set(sb, block_group, desc);
ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_B2C(sbi, blocks_freed));
EXT4_NUM_B2C(sbi, blocks_freed));
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic_add(EXT4_B2C(sbi, blocks_freed),
atomic_add(EXT4_NUM_B2C(sbi, blocks_freed),
&sbi->s_flex_groups[flex_group].free_clusters);
}

View File

@ -1247,7 +1247,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
ext4_inode_table_set(sb, gdp, group_data->inode_table);
ext4_free_group_clusters_set(sb, gdp,
EXT4_B2C(sbi, group_data->free_blocks_count));
EXT4_NUM_B2C(sbi, group_data->free_blocks_count));
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
if (ext4_has_group_desc_csum(sb))
ext4_itable_unused_set(sb, gdp,
@ -1349,7 +1349,7 @@ static void ext4_update_super(struct super_block *sb,
/* Update the free space counts */
percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_B2C(sbi, free_blocks));
EXT4_NUM_B2C(sbi, free_blocks));
percpu_counter_add(&sbi->s_freeinodes_counter,
EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
@ -1360,7 +1360,7 @@ static void ext4_update_super(struct super_block *sb,
sbi->s_log_groups_per_flex) {
ext4_group_t flex_group;
flex_group = ext4_flex_group(sbi, group_data[0].group);
atomic_add(EXT4_B2C(sbi, free_blocks),
atomic_add(EXT4_NUM_B2C(sbi, free_blocks),
&sbi->s_flex_groups[flex_group].free_clusters);
atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
&sbi->s_flex_groups[flex_group].free_inodes);

View File

@ -783,6 +783,7 @@ static void ext4_put_super(struct super_block *sb)
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_counter_destroy(&sbi->s_extent_cache_cnt);
brelse(sbi->s_sbh);
#ifdef CONFIG_QUOTA
for (i = 0; i < MAXQUOTAS; i++)
@ -1247,6 +1248,11 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
"quota options when quota turned on");
return -1;
}
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
"when QUOTA feature is enabled");
return -1;
}
qname = match_strdup(args);
if (!qname) {
ext4_msg(sb, KERN_ERR,
@ -1544,6 +1550,13 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
"quota options when quota turned on");
return -1;
}
if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
EXT4_FEATURE_RO_COMPAT_QUOTA)) {
ext4_msg(sb, KERN_ERR,
"Cannot set journaled quota options "
"when QUOTA feature is enabled");
return -1;
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
} else {
@ -1592,6 +1605,12 @@ static int parse_options(char *options, struct super_block *sb,
return 0;
}
#ifdef CONFIG_QUOTA
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
(test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
"feature is enabled");
return 0;
}
if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
clear_opt(sb, USRQUOTA);
@ -3161,7 +3180,7 @@ int ext4_calculate_overhead(struct super_block *sb)
}
/* Add the journal blocks as well */
if (sbi->s_journal)
overhead += EXT4_B2C(sbi, sbi->s_journal->j_maxlen);
overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen);
sbi->s_overhead = overhead;
smp_wmb();
@ -3688,6 +3707,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (!err) {
err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
}
if (!err) {
err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0);
}
if (err) {
ext4_msg(sb, KERN_ERR, "insufficient memory");
goto failed_mount3;
@ -3711,13 +3733,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_export_op = &ext4_export_ops;
sb->s_xattr = ext4_xattr_handlers;
#ifdef CONFIG_QUOTA
sb->s_qcop = &ext4_qctl_operations;
sb->dq_op = &ext4_quota_operations;
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA)) {
/* Use qctl operations for hidden quota files. */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA))
sb->s_qcop = &ext4_qctl_sysfile_operations;
}
else
sb->s_qcop = &ext4_qctl_operations;
#endif
memcpy(sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
@ -3913,6 +3933,16 @@ no_journal:
if (err)
goto failed_mount7;
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
!(sb->s_flags & MS_RDONLY)) {
err = ext4_enable_quotas(sb);
if (err)
goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
@ -3930,16 +3960,6 @@ no_journal:
} else
descr = "out journal";
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) &&
!(sb->s_flags & MS_RDONLY)) {
err = ext4_enable_quotas(sb);
if (err)
goto failed_mount8;
}
#endif /* CONFIG_QUOTA */
if (test_opt(sb, DISCARD)) {
struct request_queue *q = bdev_get_queue(sb->s_bdev);
if (!blk_queue_discard(q))
@ -3993,6 +4013,7 @@ failed_mount3:
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
percpu_counter_destroy(&sbi->s_extent_cache_cnt);
if (sbi->s_mmp_tsk)
kthread_stop(sbi->s_mmp_tsk);
failed_mount2:
@ -4538,6 +4559,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
if (!old_opts.s_qf_names[i]) {
for (j = 0; j < i; j++)
kfree(old_opts.s_qf_names[j]);
kfree(orig_data);
return -ENOMEM;
}
} else
@ -4816,9 +4838,12 @@ static int ext4_release_dquot(struct dquot *dquot)
static int ext4_mark_dquot_dirty(struct dquot *dquot)
{
struct super_block *sb = dquot->dq_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
/* Are we journaling quotas? */
if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_QUOTA) ||
sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
dquot_mark_dquot_dirty(dquot);
return ext4_write_dquot(dquot);
} else {

View File

@ -382,7 +382,7 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask,
if (err < 0) {
jbd2_free_handle(handle);
current->journal_info = NULL;
handle = ERR_PTR(err);
return ERR_PTR(err);
}
handle->h_type = type;
handle->h_line_no = line_no;

View File

@ -2255,64 +2255,48 @@ TRACE_EVENT(ext4_es_lookup_extent_exit,
__entry->found ? __entry->status : 0)
);
TRACE_EVENT(ext4_es_reclaim_extents_count,
TP_PROTO(struct super_block *sb, int nr_cached),
TP_ARGS(sb, nr_cached),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, nr_cached )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->nr_cached = nr_cached;
),
TP_printk("dev %d,%d cached objects nr %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->nr_cached)
);
TRACE_EVENT(ext4_es_shrink_enter,
TP_PROTO(struct super_block *sb, int nr_to_scan),
TP_PROTO(struct super_block *sb, int nr_to_scan, int cache_cnt),
TP_ARGS(sb, nr_to_scan),
TP_ARGS(sb, nr_to_scan, cache_cnt),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, nr_to_scan )
__field( int, cache_cnt )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->nr_to_scan = nr_to_scan;
__entry->cache_cnt = cache_cnt;
),
TP_printk("dev %d,%d nr to scan %d",
TP_printk("dev %d,%d nr_to_scan %d cache_cnt %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->nr_to_scan)
__entry->nr_to_scan, __entry->cache_cnt)
);
TRACE_EVENT(ext4_es_shrink_exit,
TP_PROTO(struct super_block *sb, int shrunk_nr),
TP_PROTO(struct super_block *sb, int shrunk_nr, int cache_cnt),
TP_ARGS(sb, shrunk_nr),
TP_ARGS(sb, shrunk_nr, cache_cnt),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( int, shrunk_nr )
__field( int, cache_cnt )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->shrunk_nr = shrunk_nr;
__entry->cache_cnt = cache_cnt;
),
TP_printk("dev %d,%d nr to scan %d",
TP_printk("dev %d,%d shrunk_nr %d cache_cnt %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->shrunk_nr)
__entry->shrunk_nr, __entry->cache_cnt)
);
#endif /* _TRACE_EXT4_H */