mm: move memcg hierarchy reclaim to generic reclaim code
Memory cgroup limit reclaim and traditional global pressure reclaim will soon share the same code to reclaim from a hierarchical tree of memory cgroups. In preparation of this, move the two right next to each other in shrink_zone(). The mem_cgroup_hierarchical_reclaim() polymath is split into a soft limit reclaim function, which still does hierarchy walking on its own, and a limit (shrinking) reclaim function, which relies on generic reclaim code to walk the hierarchy. Signed-off-by: Johannes Weiner <jweiner@redhat.com> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Reviewed-by: Kirill A. Shutemov <kirill@shutemov.name> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Ying Han <yinghan@google.com> Cc: Greg Thelen <gthelen@google.com> Cc: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
527a5ec9a5
commit
5660048cca
@ -40,6 +40,12 @@ extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
|
||||
struct mem_cgroup *mem_cont,
|
||||
int active, int file);
|
||||
|
||||
struct mem_cgroup_reclaim_cookie {
|
||||
struct zone *zone;
|
||||
int priority;
|
||||
unsigned int generation;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CGROUP_MEM_RES_CTLR
|
||||
/*
|
||||
* All "charge" functions with gfp_mask should use GFP_KERNEL or
|
||||
@ -106,6 +112,11 @@ mem_cgroup_prepare_migration(struct page *page,
|
||||
extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
|
||||
struct page *oldpage, struct page *newpage, bool migration_ok);
|
||||
|
||||
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
|
||||
struct mem_cgroup *,
|
||||
struct mem_cgroup_reclaim_cookie *);
|
||||
void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
|
||||
|
||||
/*
|
||||
* For memory reclaim.
|
||||
*/
|
||||
@ -281,6 +292,19 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
|
||||
{
|
||||
}
|
||||
|
||||
static inline struct mem_cgroup *
|
||||
mem_cgroup_iter(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev,
|
||||
struct mem_cgroup_reclaim_cookie *reclaim)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void mem_cgroup_iter_break(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev)
|
||||
{
|
||||
}
|
||||
|
||||
static inline int mem_cgroup_get_reclaim_priority(struct mem_cgroup *memcg)
|
||||
{
|
||||
return 0;
|
||||
|
169
mm/memcontrol.c
169
mm/memcontrol.c
@ -370,8 +370,6 @@ enum charge_type {
|
||||
#define MEM_CGROUP_RECLAIM_NOSWAP (1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
|
||||
#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
|
||||
#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
|
||||
#define MEM_CGROUP_RECLAIM_SOFT_BIT 0x2
|
||||
#define MEM_CGROUP_RECLAIM_SOFT (1 << MEM_CGROUP_RECLAIM_SOFT_BIT)
|
||||
|
||||
static void mem_cgroup_get(struct mem_cgroup *memcg);
|
||||
static void mem_cgroup_put(struct mem_cgroup *memcg);
|
||||
@ -857,20 +855,33 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
|
||||
return memcg;
|
||||
}
|
||||
|
||||
struct mem_cgroup_reclaim_cookie {
|
||||
struct zone *zone;
|
||||
int priority;
|
||||
unsigned int generation;
|
||||
};
|
||||
|
||||
static struct mem_cgroup *
|
||||
mem_cgroup_iter(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev,
|
||||
struct mem_cgroup_reclaim_cookie *reclaim)
|
||||
/**
|
||||
* mem_cgroup_iter - iterate over memory cgroup hierarchy
|
||||
* @root: hierarchy root
|
||||
* @prev: previously returned memcg, NULL on first invocation
|
||||
* @reclaim: cookie for shared reclaim walks, NULL for full walks
|
||||
*
|
||||
* Returns references to children of the hierarchy below @root, or
|
||||
* @root itself, or %NULL after a full round-trip.
|
||||
*
|
||||
* Caller must pass the return value in @prev on subsequent
|
||||
* invocations for reference counting, or use mem_cgroup_iter_break()
|
||||
* to cancel a hierarchy walk before the round-trip is complete.
|
||||
*
|
||||
* Reclaimers can specify a zone and a priority level in @reclaim to
|
||||
* divide up the memcgs in the hierarchy among all concurrent
|
||||
* reclaimers operating on the same zone and priority.
|
||||
*/
|
||||
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev,
|
||||
struct mem_cgroup_reclaim_cookie *reclaim)
|
||||
{
|
||||
struct mem_cgroup *memcg = NULL;
|
||||
int id = 0;
|
||||
|
||||
if (mem_cgroup_disabled())
|
||||
return NULL;
|
||||
|
||||
if (!root)
|
||||
root = root_mem_cgroup;
|
||||
|
||||
@ -926,8 +937,13 @@ mem_cgroup_iter(struct mem_cgroup *root,
|
||||
return memcg;
|
||||
}
|
||||
|
||||
static void mem_cgroup_iter_break(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev)
|
||||
/**
|
||||
* mem_cgroup_iter_break - abort a hierarchy walk prematurely
|
||||
* @root: hierarchy root
|
||||
* @prev: last visited hierarchy member as returned by mem_cgroup_iter()
|
||||
*/
|
||||
void mem_cgroup_iter_break(struct mem_cgroup *root,
|
||||
struct mem_cgroup *prev)
|
||||
{
|
||||
if (!root)
|
||||
root = root_mem_cgroup;
|
||||
@ -1555,6 +1571,42 @@ u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
|
||||
return min(limit, memsw);
|
||||
}
|
||||
|
||||
static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned long total = 0;
|
||||
bool noswap = false;
|
||||
int loop;
|
||||
|
||||
if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
|
||||
noswap = true;
|
||||
if (!(flags & MEM_CGROUP_RECLAIM_SHRINK) && memcg->memsw_is_minimum)
|
||||
noswap = true;
|
||||
|
||||
for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
|
||||
if (loop)
|
||||
drain_all_stock_async(memcg);
|
||||
total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
|
||||
/*
|
||||
* Allow limit shrinkers, which are triggered directly
|
||||
* by userspace, to catch signals and stop reclaim
|
||||
* after minimal progress, regardless of the margin.
|
||||
*/
|
||||
if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
|
||||
break;
|
||||
if (mem_cgroup_margin(memcg))
|
||||
break;
|
||||
/*
|
||||
* If nothing was reclaimed after two attempts, there
|
||||
* may be no reclaimable pages in this hierarchy.
|
||||
*/
|
||||
if (loop && !total)
|
||||
break;
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/**
|
||||
* test_mem_cgroup_node_reclaimable
|
||||
* @mem: the target memcg
|
||||
@ -1692,30 +1744,14 @@ bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Scan the hierarchy if needed to reclaim memory. We remember the last child
|
||||
* we reclaimed from, so that we don't end up penalizing one child extensively
|
||||
* based on its position in the children list.
|
||||
*
|
||||
* root_memcg is the original ancestor that we've been reclaim from.
|
||||
*
|
||||
* We give up and return to the caller when we visit root_memcg twice.
|
||||
* (other groups can be removed while we're walking....)
|
||||
*
|
||||
* If shrink==true, for avoiding to free too much, this returns immedieately.
|
||||
*/
|
||||
static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
|
||||
struct zone *zone,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long reclaim_options,
|
||||
unsigned long *total_scanned)
|
||||
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
|
||||
struct zone *zone,
|
||||
gfp_t gfp_mask,
|
||||
unsigned long *total_scanned)
|
||||
{
|
||||
struct mem_cgroup *victim = NULL;
|
||||
int ret, total = 0;
|
||||
int total = 0;
|
||||
int loop = 0;
|
||||
bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP;
|
||||
bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK;
|
||||
bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT;
|
||||
unsigned long excess;
|
||||
unsigned long nr_scanned;
|
||||
struct mem_cgroup_reclaim_cookie reclaim = {
|
||||
@ -1725,29 +1761,17 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
|
||||
|
||||
excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
|
||||
|
||||
/* If memsw_is_minimum==1, swap-out is of-no-use. */
|
||||
if (!check_soft && !shrink && root_memcg->memsw_is_minimum)
|
||||
noswap = true;
|
||||
|
||||
while (1) {
|
||||
victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
|
||||
if (!victim) {
|
||||
loop++;
|
||||
/*
|
||||
* We are not draining per cpu cached charges during
|
||||
* soft limit reclaim because global reclaim doesn't
|
||||
* care about charges. It tries to free some memory and
|
||||
* charges will not give any.
|
||||
*/
|
||||
if (!check_soft && loop >= 1)
|
||||
drain_all_stock_async(root_memcg);
|
||||
if (loop >= 2) {
|
||||
/*
|
||||
* If we have not been able to reclaim
|
||||
* anything, it might because there are
|
||||
* no reclaimable pages under this hierarchy
|
||||
*/
|
||||
if (!check_soft || !total)
|
||||
if (!total)
|
||||
break;
|
||||
/*
|
||||
* We want to do more targeted reclaim.
|
||||
@ -1761,30 +1785,12 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_memcg,
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (!mem_cgroup_reclaimable(victim, noswap)) {
|
||||
/* this cgroup's local usage == 0 */
|
||||
if (!mem_cgroup_reclaimable(victim, false))
|
||||
continue;
|
||||
}
|
||||
/* we use swappiness of local cgroup */
|
||||
if (check_soft) {
|
||||
ret = mem_cgroup_shrink_node_zone(victim, gfp_mask,
|
||||
noswap, zone, &nr_scanned);
|
||||
*total_scanned += nr_scanned;
|
||||
} else
|
||||
ret = try_to_free_mem_cgroup_pages(victim, gfp_mask,
|
||||
noswap);
|
||||
total += ret;
|
||||
/*
|
||||
* At shrinking usage, we can't check we should stop here or
|
||||
* reclaim more. It's depends on callers. last_scanned_child
|
||||
* will work enough for keeping fairness under tree.
|
||||
*/
|
||||
if (shrink)
|
||||
break;
|
||||
if (check_soft) {
|
||||
if (!res_counter_soft_limit_excess(&root_memcg->res))
|
||||
break;
|
||||
} else if (mem_cgroup_margin(root_memcg))
|
||||
total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
|
||||
zone, &nr_scanned);
|
||||
*total_scanned += nr_scanned;
|
||||
if (!res_counter_soft_limit_excess(&root_memcg->res))
|
||||
break;
|
||||
}
|
||||
mem_cgroup_iter_break(root_memcg, victim);
|
||||
@ -2281,8 +2287,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
|
||||
if (!(gfp_mask & __GFP_WAIT))
|
||||
return CHARGE_WOULDBLOCK;
|
||||
|
||||
ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL,
|
||||
gfp_mask, flags, NULL);
|
||||
ret = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
|
||||
if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
|
||||
return CHARGE_RETRY;
|
||||
/*
|
||||
@ -3559,9 +3564,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
|
||||
if (!ret)
|
||||
break;
|
||||
|
||||
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
|
||||
MEM_CGROUP_RECLAIM_SHRINK,
|
||||
NULL);
|
||||
mem_cgroup_reclaim(memcg, GFP_KERNEL,
|
||||
MEM_CGROUP_RECLAIM_SHRINK);
|
||||
curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
|
||||
/* Usage is reduced ? */
|
||||
if (curusage >= oldusage)
|
||||
@ -3619,10 +3623,9 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
|
||||
if (!ret)
|
||||
break;
|
||||
|
||||
mem_cgroup_hierarchical_reclaim(memcg, NULL, GFP_KERNEL,
|
||||
MEM_CGROUP_RECLAIM_NOSWAP |
|
||||
MEM_CGROUP_RECLAIM_SHRINK,
|
||||
NULL);
|
||||
mem_cgroup_reclaim(memcg, GFP_KERNEL,
|
||||
MEM_CGROUP_RECLAIM_NOSWAP |
|
||||
MEM_CGROUP_RECLAIM_SHRINK);
|
||||
curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
|
||||
/* Usage is reduced ? */
|
||||
if (curusage >= oldusage)
|
||||
@ -3665,10 +3668,8 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
|
||||
break;
|
||||
|
||||
nr_scanned = 0;
|
||||
reclaimed = mem_cgroup_hierarchical_reclaim(mz->mem, zone,
|
||||
gfp_mask,
|
||||
MEM_CGROUP_RECLAIM_SOFT,
|
||||
&nr_scanned);
|
||||
reclaimed = mem_cgroup_soft_reclaim(mz->mem, zone,
|
||||
gfp_mask, &nr_scanned);
|
||||
nr_reclaimed += reclaimed;
|
||||
*total_scanned += nr_scanned;
|
||||
spin_lock(&mctz->lock);
|
||||
|
43
mm/vmscan.c
43
mm/vmscan.c
@ -2104,12 +2104,43 @@ restart:
|
||||
static void shrink_zone(int priority, struct zone *zone,
|
||||
struct scan_control *sc)
|
||||
{
|
||||
struct mem_cgroup_zone mz = {
|
||||
.mem_cgroup = sc->target_mem_cgroup,
|
||||
struct mem_cgroup *root = sc->target_mem_cgroup;
|
||||
struct mem_cgroup_reclaim_cookie reclaim = {
|
||||
.zone = zone,
|
||||
.priority = priority,
|
||||
};
|
||||
struct mem_cgroup *memcg;
|
||||
|
||||
shrink_mem_cgroup_zone(priority, &mz, sc);
|
||||
if (global_reclaim(sc)) {
|
||||
struct mem_cgroup_zone mz = {
|
||||
.mem_cgroup = NULL,
|
||||
.zone = zone,
|
||||
};
|
||||
|
||||
shrink_mem_cgroup_zone(priority, &mz, sc);
|
||||
return;
|
||||
}
|
||||
|
||||
memcg = mem_cgroup_iter(root, NULL, &reclaim);
|
||||
do {
|
||||
struct mem_cgroup_zone mz = {
|
||||
.mem_cgroup = memcg,
|
||||
.zone = zone,
|
||||
};
|
||||
|
||||
shrink_mem_cgroup_zone(priority, &mz, sc);
|
||||
/*
|
||||
* Limit reclaim has historically picked one memcg and
|
||||
* scanned it with decreasing priority levels until
|
||||
* nr_to_reclaim had been reclaimed. This priority
|
||||
* cycle is thus over after a single memcg.
|
||||
*/
|
||||
if (!global_reclaim(sc)) {
|
||||
mem_cgroup_iter_break(root, memcg);
|
||||
break;
|
||||
}
|
||||
memcg = mem_cgroup_iter(root, memcg, &reclaim);
|
||||
} while (memcg);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2374,6 +2405,10 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
|
||||
.order = 0,
|
||||
.target_mem_cgroup = mem,
|
||||
};
|
||||
struct mem_cgroup_zone mz = {
|
||||
.mem_cgroup = mem,
|
||||
.zone = zone,
|
||||
};
|
||||
|
||||
sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
|
||||
(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
|
||||
@ -2389,7 +2424,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
|
||||
* will pick up pages from other mem cgroup's as well. We hack
|
||||
* the priority and make it zero.
|
||||
*/
|
||||
shrink_zone(0, zone, &sc);
|
||||
shrink_mem_cgroup_zone(0, &mz, &sc);
|
||||
|
||||
trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user