ext4: use dedicated slab caches for group_info structures

ext4_group_info structures are currently allocated with kmalloc().
With a typical 4K block size, these are 136 bytes each -- meaning
they'll each consume a 256-byte slab object.  On a system with many
ext4 large partitions, that's a lot of wasted kernel slab space.
(E.g., a single 1TB partition will have about 8000 block groups, using
about 2MB of slab, of which nearly 1MB is wasted.)

This patch creates an array of slab pointers created as needed --
depending on the superblock block size -- and uses these slabs to
allocate the group info objects.

Google-Bug-Id: 2980809

Signed-off-by: Curt Wohlgemuth <curtw@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
This commit is contained in:
Curt Wohlgemuth 2010-10-27 21:29:12 -04:00 committed by Theodore Ts'o
parent b853fd3648
commit fb1813f4a8
2 changed files with 78 additions and 21 deletions

View File

@ -205,6 +205,7 @@ typedef struct ext4_io_end {
#define EXT4_MIN_BLOCK_SIZE 1024 #define EXT4_MIN_BLOCK_SIZE 1024
#define EXT4_MAX_BLOCK_SIZE 65536 #define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10 #define EXT4_MIN_BLOCK_LOG_SIZE 10
#define EXT4_MAX_BLOCK_LOG_SIZE 16
#ifdef __KERNEL__ #ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize) # define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else #else

View File

@ -338,6 +338,14 @@
static struct kmem_cache *ext4_pspace_cachep; static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep; static struct kmem_cache *ext4_ac_cachep;
static struct kmem_cache *ext4_free_ext_cachep; static struct kmem_cache *ext4_free_ext_cachep;
/* We create slab caches for groupinfo data structures based on the
* superblock block size. There will be one per mounted filesystem for
* each unique s_blocksize_bits */
#define NR_GRPINFO_CACHES \
(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group); ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@ -2233,15 +2241,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
.release = seq_release, .release = seq_release,
}; };
static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
{
int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
BUG_ON(!cachep);
return cachep;
}
/* Create and initialize ext4_group_info data for the given group. */ /* Create and initialize ext4_group_info data for the given group. */
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *desc) struct ext4_group_desc *desc)
{ {
int i, len; int i;
int metalen = 0; int metalen = 0;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info **meta_group_info; struct ext4_group_info **meta_group_info;
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
/* /*
* First check if this group is the first of a reserved block. * First check if this group is the first of a reserved block.
@ -2261,22 +2278,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
meta_group_info; meta_group_info;
} }
/*
* calculate needed size. if change bb_counters size,
* don't forget about ext4_mb_generate_buddy()
*/
len = offsetof(typeof(**meta_group_info),
bb_counters[sb->s_blocksize_bits + 2]);
meta_group_info = meta_group_info =
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
meta_group_info[i] = kzalloc(len, GFP_KERNEL); meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) { if (meta_group_info[i] == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n"); printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
goto exit_group_info; goto exit_group_info;
} }
memset(meta_group_info[i], 0, kmem_cache_size(cachep));
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
&(meta_group_info[i]->bb_state)); &(meta_group_info[i]->bb_state));
@ -2331,6 +2342,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
int num_meta_group_infos_max; int num_meta_group_infos_max;
int array_size; int array_size;
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
struct kmem_cache *cachep;
/* This is the number of blocks used by GDT */ /* This is the number of blocks used by GDT */
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@ -2388,8 +2400,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
return 0; return 0;
err_freebuddy: err_freebuddy:
cachep = get_groupinfo_cache(sb->s_blocksize_bits);
while (i-- > 0) while (i-- > 0)
kfree(ext4_get_group_info(sb, i)); kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = num_meta_group_infos; i = num_meta_group_infos;
while (i-- > 0) while (i-- > 0)
kfree(sbi->s_group_info[i]); kfree(sbi->s_group_info[i]);
@ -2406,19 +2419,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned offset; unsigned offset;
unsigned max; unsigned max;
int ret; int ret;
int cache_index;
struct kmem_cache *cachep;
char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets); i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_offsets == NULL) { if (sbi->s_mb_offsets == NULL) {
return -ENOMEM; ret = -ENOMEM;
goto out;
} }
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs); i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_maxs == NULL) { if (sbi->s_mb_maxs == NULL) {
kfree(sbi->s_mb_offsets); ret = -ENOMEM;
return -ENOMEM; goto out;
}
cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
cachep = ext4_groupinfo_caches[cache_index];
if (!cachep) {
char name[32];
int len = offsetof(struct ext4_group_info,
bb_counters[sb->s_blocksize_bits + 2]);
sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
namep = kstrdup(name, GFP_KERNEL);
if (!namep) {
ret = -ENOMEM;
goto out;
}
/* Need to free the kmem_cache_name() when we
* destroy the slab */
cachep = kmem_cache_create(namep, len, 0,
SLAB_RECLAIM_ACCOUNT, NULL);
if (!cachep) {
ret = -ENOMEM;
goto out;
}
ext4_groupinfo_caches[cache_index] = cachep;
} }
/* order 0 is regular bitmap */ /* order 0 is regular bitmap */
@ -2439,9 +2481,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
/* init file for buddy data */ /* init file for buddy data */
ret = ext4_mb_init_backend(sb); ret = ext4_mb_init_backend(sb);
if (ret != 0) { if (ret != 0) {
kfree(sbi->s_mb_offsets); goto out;
kfree(sbi->s_mb_maxs);
return ret;
} }
spin_lock_init(&sbi->s_md_lock); spin_lock_init(&sbi->s_md_lock);
@ -2456,9 +2496,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) { if (sbi->s_locality_groups == NULL) {
kfree(sbi->s_mb_offsets); ret = -ENOMEM;
kfree(sbi->s_mb_maxs); goto out;
return -ENOMEM;
} }
for_each_possible_cpu(i) { for_each_possible_cpu(i) {
struct ext4_locality_group *lg; struct ext4_locality_group *lg;
@ -2475,7 +2514,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
if (sbi->s_journal) if (sbi->s_journal)
sbi->s_journal->j_commit_callback = release_blocks_on_commit; sbi->s_journal->j_commit_callback = release_blocks_on_commit;
return 0; out:
if (ret) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
kfree(namep);
}
return ret;
} }
/* need to called with the ext4 group lock held */ /* need to called with the ext4 group lock held */
@ -2503,6 +2548,7 @@ int ext4_mb_release(struct super_block *sb)
int num_meta_group_infos; int num_meta_group_infos;
struct ext4_group_info *grinfo; struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
if (sbi->s_group_info) { if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) { for (i = 0; i < ngroups; i++) {
@ -2513,7 +2559,7 @@ int ext4_mb_release(struct super_block *sb)
ext4_lock_group(sb, i); ext4_lock_group(sb, i);
ext4_mb_cleanup_pa(grinfo); ext4_mb_cleanup_pa(grinfo);
ext4_unlock_group(sb, i); ext4_unlock_group(sb, i);
kfree(grinfo); kmem_cache_free(cachep, grinfo);
} }
num_meta_group_infos = (ngroups + num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >> EXT4_DESC_PER_BLOCK(sb) - 1) >>
@ -2691,6 +2737,7 @@ int __init init_ext4_mballoc(void)
void exit_ext4_mballoc(void) void exit_ext4_mballoc(void)
{ {
int i;
/* /*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep * Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache. * before destroying the slab cache.
@ -2699,6 +2746,15 @@ void exit_ext4_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep); kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep); kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep); kmem_cache_destroy(ext4_free_ext_cachep);
for (i = 0; i < NR_GRPINFO_CACHES; i++) {
struct kmem_cache *cachep = ext4_groupinfo_caches[i];
if (cachep) {
char *name = (char *)kmem_cache_name(cachep);
kmem_cache_destroy(cachep);
kfree(name);
}
}
ext4_remove_debugfs_entry(); ext4_remove_debugfs_entry();
} }