forked from Minki/linux
list_lru: introduce list_lru_shrink_{count,walk}
Kmem accounting of memcg is unusable now, because it lacks slab shrinker support. That means when we hit the limit we will get ENOMEM w/o any chance to recover. What we should do then is to call shrink_slab, which would reclaim old inode/dentry caches from this cgroup. This is what this patch set is intended to do. Basically, it does two things. First, it introduces the notion of per-memcg slab shrinker. A shrinker that wants to reclaim objects per cgroup should mark itself as SHRINKER_MEMCG_AWARE. Then it will be passed the memory cgroup to scan from in shrink_control->memcg. For such shrinkers shrink_slab iterates over the whole cgroup subtree under the target cgroup and calls the shrinker for each kmem-active memory cgroup. Secondly, this patch set makes the list_lru structure per-memcg. It's done transparently to list_lru users - everything they have to do is to tell list_lru_init that they want memcg-aware list_lru. Then the list_lru will automatically distribute objects among per-memcg lists basing on which cgroup the object is accounted to. This way to make FS shrinkers (icache, dcache) memcg-aware we only need to make them use memcg-aware list_lru, and this is what this patch set does. As before, this patch set only enables per-memcg kmem reclaim when the pressure goes from memory.limit, not from memory.kmem.limit. Handling memory.kmem.limit is going to be tricky due to GFP_NOFS allocations, and it is still unclear whether we will have this knob in the unified hierarchy. This patch (of 9): NUMA aware slab shrinkers use the list_lru structure to distribute objects coming from different NUMA nodes to different lists. Whenever such a shrinker needs to count or scan objects from a particular node, it issues commands like this: count = list_lru_count_node(lru, sc->nid); freed = list_lru_walk_node(lru, sc->nid, isolate_func, isolate_arg, &sc->nr_to_scan); where sc is an instance of the shrink_control structure passed to it from vmscan. To simplify this, let's add special list_lru functions to be used by shrinkers, list_lru_shrink_count() and list_lru_shrink_walk(), which consolidate the nid and nr_to_scan arguments in the shrink_control structure. This will also allow us to avoid patching shrinkers that use list_lru when we make shrink_slab() per-memcg - all we will have to do is extend the shrink_control structure to include the target memcg and make list_lru_shrink_{count,walk} handle this appropriately. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Suggested-by: Dave Chinner <david@fromorbit.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Greg Thelen <gthelen@google.com> Cc: Glauber Costa <glommer@gmail.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
10c1045f28
commit
503c358cf1
14
fs/dcache.c
14
fs/dcache.c
@ -930,24 +930,22 @@ dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
|
|||||||
/**
|
/**
|
||||||
* prune_dcache_sb - shrink the dcache
|
* prune_dcache_sb - shrink the dcache
|
||||||
* @sb: superblock
|
* @sb: superblock
|
||||||
* @nr_to_scan : number of entries to try to free
|
* @sc: shrink control, passed to list_lru_shrink_walk()
|
||||||
* @nid: which node to scan for freeable entities
|
|
||||||
*
|
*
|
||||||
* Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
|
* Attempt to shrink the superblock dcache LRU by @sc->nr_to_scan entries. This
|
||||||
* done when we need more memory an called from the superblock shrinker
|
* is done when we need more memory and called from the superblock shrinker
|
||||||
* function.
|
* function.
|
||||||
*
|
*
|
||||||
* This function may fail to free any resources if all the dentries are in
|
* This function may fail to free any resources if all the dentries are in
|
||||||
* use.
|
* use.
|
||||||
*/
|
*/
|
||||||
long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
|
long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc)
|
||||||
int nid)
|
|
||||||
{
|
{
|
||||||
LIST_HEAD(dispose);
|
LIST_HEAD(dispose);
|
||||||
long freed;
|
long freed;
|
||||||
|
|
||||||
freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate,
|
freed = list_lru_shrink_walk(&sb->s_dentry_lru, sc,
|
||||||
&dispose, &nr_to_scan);
|
dentry_lru_isolate, &dispose);
|
||||||
shrink_dentry_list(&dispose);
|
shrink_dentry_list(&dispose);
|
||||||
return freed;
|
return freed;
|
||||||
}
|
}
|
||||||
|
@ -171,8 +171,8 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
|
|||||||
if (!(sc->gfp_mask & __GFP_FS))
|
if (!(sc->gfp_mask & __GFP_FS))
|
||||||
return SHRINK_STOP;
|
return SHRINK_STOP;
|
||||||
|
|
||||||
freed = list_lru_walk_node(&gfs2_qd_lru, sc->nid, gfs2_qd_isolate,
|
freed = list_lru_shrink_walk(&gfs2_qd_lru, sc,
|
||||||
&dispose, &sc->nr_to_scan);
|
gfs2_qd_isolate, &dispose);
|
||||||
|
|
||||||
gfs2_qd_dispose(&dispose);
|
gfs2_qd_dispose(&dispose);
|
||||||
|
|
||||||
@ -182,7 +182,7 @@ static unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
|
|||||||
static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
|
static unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
|
||||||
struct shrink_control *sc)
|
struct shrink_control *sc)
|
||||||
{
|
{
|
||||||
return vfs_pressure_ratio(list_lru_count_node(&gfs2_qd_lru, sc->nid));
|
return vfs_pressure_ratio(list_lru_shrink_count(&gfs2_qd_lru, sc));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct shrinker gfs2_qd_shrinker = {
|
struct shrinker gfs2_qd_shrinker = {
|
||||||
|
@ -751,14 +751,13 @@ inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
|
|||||||
* to trim from the LRU. Inodes to be freed are moved to a temporary list and
|
* to trim from the LRU. Inodes to be freed are moved to a temporary list and
|
||||||
* then are freed outside inode_lock by dispose_list().
|
* then are freed outside inode_lock by dispose_list().
|
||||||
*/
|
*/
|
||||||
long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
|
long prune_icache_sb(struct super_block *sb, struct shrink_control *sc)
|
||||||
int nid)
|
|
||||||
{
|
{
|
||||||
LIST_HEAD(freeable);
|
LIST_HEAD(freeable);
|
||||||
long freed;
|
long freed;
|
||||||
|
|
||||||
freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate,
|
freed = list_lru_shrink_walk(&sb->s_inode_lru, sc,
|
||||||
&freeable, &nr_to_scan);
|
inode_lru_isolate, &freeable);
|
||||||
dispose_list(&freeable);
|
dispose_list(&freeable);
|
||||||
return freed;
|
return freed;
|
||||||
}
|
}
|
||||||
|
@ -14,6 +14,7 @@ struct file_system_type;
|
|||||||
struct linux_binprm;
|
struct linux_binprm;
|
||||||
struct path;
|
struct path;
|
||||||
struct mount;
|
struct mount;
|
||||||
|
struct shrink_control;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* block_dev.c
|
* block_dev.c
|
||||||
@ -111,8 +112,7 @@ extern int open_check_o_direct(struct file *f);
|
|||||||
* inode.c
|
* inode.c
|
||||||
*/
|
*/
|
||||||
extern spinlock_t inode_sb_list_lock;
|
extern spinlock_t inode_sb_list_lock;
|
||||||
extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
|
extern long prune_icache_sb(struct super_block *sb, struct shrink_control *sc);
|
||||||
int nid);
|
|
||||||
extern void inode_add_lru(struct inode *inode);
|
extern void inode_add_lru(struct inode *inode);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -129,8 +129,7 @@ extern int invalidate_inodes(struct super_block *, bool);
|
|||||||
*/
|
*/
|
||||||
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
|
extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
|
||||||
extern int d_set_mounted(struct dentry *dentry);
|
extern int d_set_mounted(struct dentry *dentry);
|
||||||
extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
|
extern long prune_dcache_sb(struct super_block *sb, struct shrink_control *sc);
|
||||||
int nid);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* read_write.c
|
* read_write.c
|
||||||
|
24
fs/super.c
24
fs/super.c
@ -77,8 +77,8 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
|
|||||||
if (sb->s_op->nr_cached_objects)
|
if (sb->s_op->nr_cached_objects)
|
||||||
fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
|
fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
|
||||||
|
|
||||||
inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
|
inodes = list_lru_shrink_count(&sb->s_inode_lru, sc);
|
||||||
dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
|
dentries = list_lru_shrink_count(&sb->s_dentry_lru, sc);
|
||||||
total_objects = dentries + inodes + fs_objects + 1;
|
total_objects = dentries + inodes + fs_objects + 1;
|
||||||
if (!total_objects)
|
if (!total_objects)
|
||||||
total_objects = 1;
|
total_objects = 1;
|
||||||
@ -86,20 +86,20 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
|
|||||||
/* proportion the scan between the caches */
|
/* proportion the scan between the caches */
|
||||||
dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
|
dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
|
||||||
inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
|
inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
|
||||||
|
fs_objects = mult_frac(sc->nr_to_scan, fs_objects, total_objects);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* prune the dcache first as the icache is pinned by it, then
|
* prune the dcache first as the icache is pinned by it, then
|
||||||
* prune the icache, followed by the filesystem specific caches
|
* prune the icache, followed by the filesystem specific caches
|
||||||
*/
|
*/
|
||||||
freed = prune_dcache_sb(sb, dentries, sc->nid);
|
sc->nr_to_scan = dentries;
|
||||||
freed += prune_icache_sb(sb, inodes, sc->nid);
|
freed = prune_dcache_sb(sb, sc);
|
||||||
|
sc->nr_to_scan = inodes;
|
||||||
|
freed += prune_icache_sb(sb, sc);
|
||||||
|
|
||||||
if (fs_objects) {
|
if (fs_objects)
|
||||||
fs_objects = mult_frac(sc->nr_to_scan, fs_objects,
|
|
||||||
total_objects);
|
|
||||||
freed += sb->s_op->free_cached_objects(sb, fs_objects,
|
freed += sb->s_op->free_cached_objects(sb, fs_objects,
|
||||||
sc->nid);
|
sc->nid);
|
||||||
}
|
|
||||||
|
|
||||||
drop_super(sb);
|
drop_super(sb);
|
||||||
return freed;
|
return freed;
|
||||||
@ -118,17 +118,15 @@ static unsigned long super_cache_count(struct shrinker *shrink,
|
|||||||
* scalability bottleneck. The counts could get updated
|
* scalability bottleneck. The counts could get updated
|
||||||
* between super_cache_count and super_cache_scan anyway.
|
* between super_cache_count and super_cache_scan anyway.
|
||||||
* Call to super_cache_count with shrinker_rwsem held
|
* Call to super_cache_count with shrinker_rwsem held
|
||||||
* ensures the safety of call to list_lru_count_node() and
|
* ensures the safety of call to list_lru_shrink_count() and
|
||||||
* s_op->nr_cached_objects().
|
* s_op->nr_cached_objects().
|
||||||
*/
|
*/
|
||||||
if (sb->s_op && sb->s_op->nr_cached_objects)
|
if (sb->s_op && sb->s_op->nr_cached_objects)
|
||||||
total_objects = sb->s_op->nr_cached_objects(sb,
|
total_objects = sb->s_op->nr_cached_objects(sb,
|
||||||
sc->nid);
|
sc->nid);
|
||||||
|
|
||||||
total_objects += list_lru_count_node(&sb->s_dentry_lru,
|
total_objects += list_lru_shrink_count(&sb->s_dentry_lru, sc);
|
||||||
sc->nid);
|
total_objects += list_lru_shrink_count(&sb->s_inode_lru, sc);
|
||||||
total_objects += list_lru_count_node(&sb->s_inode_lru,
|
|
||||||
sc->nid);
|
|
||||||
|
|
||||||
total_objects = vfs_pressure_ratio(total_objects);
|
total_objects = vfs_pressure_ratio(total_objects);
|
||||||
return total_objects;
|
return total_objects;
|
||||||
|
@ -1583,10 +1583,9 @@ xfs_buftarg_shrink_scan(
|
|||||||
struct xfs_buftarg, bt_shrinker);
|
struct xfs_buftarg, bt_shrinker);
|
||||||
LIST_HEAD(dispose);
|
LIST_HEAD(dispose);
|
||||||
unsigned long freed;
|
unsigned long freed;
|
||||||
unsigned long nr_to_scan = sc->nr_to_scan;
|
|
||||||
|
|
||||||
freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate,
|
freed = list_lru_shrink_walk(&btp->bt_lru, sc,
|
||||||
&dispose, &nr_to_scan);
|
xfs_buftarg_isolate, &dispose);
|
||||||
|
|
||||||
while (!list_empty(&dispose)) {
|
while (!list_empty(&dispose)) {
|
||||||
struct xfs_buf *bp;
|
struct xfs_buf *bp;
|
||||||
@ -1605,7 +1604,7 @@ xfs_buftarg_shrink_count(
|
|||||||
{
|
{
|
||||||
struct xfs_buftarg *btp = container_of(shrink,
|
struct xfs_buftarg *btp = container_of(shrink,
|
||||||
struct xfs_buftarg, bt_shrinker);
|
struct xfs_buftarg, bt_shrinker);
|
||||||
return list_lru_count_node(&btp->bt_lru, sc->nid);
|
return list_lru_shrink_count(&btp->bt_lru, sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -523,7 +523,6 @@ xfs_qm_shrink_scan(
|
|||||||
struct xfs_qm_isolate isol;
|
struct xfs_qm_isolate isol;
|
||||||
unsigned long freed;
|
unsigned long freed;
|
||||||
int error;
|
int error;
|
||||||
unsigned long nr_to_scan = sc->nr_to_scan;
|
|
||||||
|
|
||||||
if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
|
if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
|
||||||
return 0;
|
return 0;
|
||||||
@ -531,8 +530,8 @@ xfs_qm_shrink_scan(
|
|||||||
INIT_LIST_HEAD(&isol.buffers);
|
INIT_LIST_HEAD(&isol.buffers);
|
||||||
INIT_LIST_HEAD(&isol.dispose);
|
INIT_LIST_HEAD(&isol.dispose);
|
||||||
|
|
||||||
freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol,
|
freed = list_lru_shrink_walk(&qi->qi_lru, sc,
|
||||||
&nr_to_scan);
|
xfs_qm_dquot_isolate, &isol);
|
||||||
|
|
||||||
error = xfs_buf_delwri_submit(&isol.buffers);
|
error = xfs_buf_delwri_submit(&isol.buffers);
|
||||||
if (error)
|
if (error)
|
||||||
@ -557,7 +556,7 @@ xfs_qm_shrink_count(
|
|||||||
struct xfs_quotainfo *qi = container_of(shrink,
|
struct xfs_quotainfo *qi = container_of(shrink,
|
||||||
struct xfs_quotainfo, qi_shrinker);
|
struct xfs_quotainfo, qi_shrinker);
|
||||||
|
|
||||||
return list_lru_count_node(&qi->qi_lru, sc->nid);
|
return list_lru_shrink_count(&qi->qi_lru, sc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -9,6 +9,7 @@
|
|||||||
|
|
||||||
#include <linux/list.h>
|
#include <linux/list.h>
|
||||||
#include <linux/nodemask.h>
|
#include <linux/nodemask.h>
|
||||||
|
#include <linux/shrinker.h>
|
||||||
|
|
||||||
/* list_lru_walk_cb has to always return one of those */
|
/* list_lru_walk_cb has to always return one of those */
|
||||||
enum lru_status {
|
enum lru_status {
|
||||||
@ -81,6 +82,13 @@ bool list_lru_del(struct list_lru *lru, struct list_head *item);
|
|||||||
* Callers that want such a guarantee need to provide an outer lock.
|
* Callers that want such a guarantee need to provide an outer lock.
|
||||||
*/
|
*/
|
||||||
unsigned long list_lru_count_node(struct list_lru *lru, int nid);
|
unsigned long list_lru_count_node(struct list_lru *lru, int nid);
|
||||||
|
|
||||||
|
static inline unsigned long list_lru_shrink_count(struct list_lru *lru,
|
||||||
|
struct shrink_control *sc)
|
||||||
|
{
|
||||||
|
return list_lru_count_node(lru, sc->nid);
|
||||||
|
}
|
||||||
|
|
||||||
static inline unsigned long list_lru_count(struct list_lru *lru)
|
static inline unsigned long list_lru_count(struct list_lru *lru)
|
||||||
{
|
{
|
||||||
long count = 0;
|
long count = 0;
|
||||||
@ -119,6 +127,14 @@ unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
|
|||||||
list_lru_walk_cb isolate, void *cb_arg,
|
list_lru_walk_cb isolate, void *cb_arg,
|
||||||
unsigned long *nr_to_walk);
|
unsigned long *nr_to_walk);
|
||||||
|
|
||||||
|
static inline unsigned long
|
||||||
|
list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
|
||||||
|
list_lru_walk_cb isolate, void *cb_arg)
|
||||||
|
{
|
||||||
|
return list_lru_walk_node(lru, sc->nid, isolate, cb_arg,
|
||||||
|
&sc->nr_to_scan);
|
||||||
|
}
|
||||||
|
|
||||||
static inline unsigned long
|
static inline unsigned long
|
||||||
list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
|
list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
|
||||||
void *cb_arg, unsigned long nr_to_walk)
|
void *cb_arg, unsigned long nr_to_walk)
|
||||||
|
@ -275,7 +275,7 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
|
|||||||
|
|
||||||
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
|
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
shadow_nodes = list_lru_count_node(&workingset_shadow_nodes, sc->nid);
|
shadow_nodes = list_lru_shrink_count(&workingset_shadow_nodes, sc);
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
|
|
||||||
pages = node_present_pages(sc->nid);
|
pages = node_present_pages(sc->nid);
|
||||||
@ -376,8 +376,8 @@ static unsigned long scan_shadow_nodes(struct shrinker *shrinker,
|
|||||||
|
|
||||||
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
|
/* list_lru lock nests inside IRQ-safe mapping->tree_lock */
|
||||||
local_irq_disable();
|
local_irq_disable();
|
||||||
ret = list_lru_walk_node(&workingset_shadow_nodes, sc->nid,
|
ret = list_lru_shrink_walk(&workingset_shadow_nodes, sc,
|
||||||
shadow_lru_isolate, NULL, &sc->nr_to_scan);
|
shadow_lru_isolate, NULL);
|
||||||
local_irq_enable();
|
local_irq_enable();
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user