mirror of
https://github.com/torvalds/linux.git
synced 2024-12-28 13:51:44 +00:00
oom-kill: fix NUMA constraint check with nodemask
Fix node-oriented allocation handling in oom-kill.c I myself think of this as a bugfix not as an ehnancement. In these days, things are changed as - alloc_pages() eats nodemask as its arguments, __alloc_pages_nodemask(). - mempolicy don't maintain its own private zonelists. (And cpuset doesn't use nodemask for __alloc_pages_nodemask()) So, current oom-killer's check function is wrong. This patch does - check nodemask, if nodemask && nodemask doesn't cover all node_states[N_HIGH_MEMORY], this is CONSTRAINT_MEMORY_POLICY. - Scan all zonelist under nodemask, if it hits cpuset's wall this faiulre is from cpuset. And - modifies the caller of out_of_memory not to call oom if __GFP_THISNODE. This doesn't change "current" behavior. If callers use __GFP_THISNODE it should handle "page allocation failure" by itself. - handle __GFP_NOFAIL+__GFP_THISNODE path. This is something like a FIXME but this gfpmask is not used now. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hioryu@jp.fujitsu.com> Acked-by: David Rientjes <rientjes@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
3b4798cbc1
commit
4365a5676f
@ -339,7 +339,7 @@ static struct sysrq_key_op sysrq_term_op = {
|
||||
|
||||
static void moom_callback(struct work_struct *ignored)
|
||||
{
|
||||
out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0);
|
||||
out_of_memory(node_zonelist(0, GFP_KERNEL), GFP_KERNEL, 0, NULL);
|
||||
}
|
||||
|
||||
static DECLARE_WORK(moom_work, moom_callback);
|
||||
|
@ -10,6 +10,7 @@
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/nodemask.h>
|
||||
|
||||
struct zonelist;
|
||||
struct notifier_block;
|
||||
@ -26,7 +27,8 @@ enum oom_constraint {
|
||||
extern int try_set_zone_oom(struct zonelist *zonelist, gfp_t gfp_flags);
|
||||
extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);
|
||||
|
||||
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order);
|
||||
extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
|
||||
int order, nodemask_t *mask);
|
||||
extern int register_oom_notifier(struct notifier_block *nb);
|
||||
extern int unregister_oom_notifier(struct notifier_block *nb);
|
||||
|
||||
|
@ -196,27 +196,46 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
|
||||
/*
|
||||
* Determine the type of allocation constraint.
|
||||
*/
|
||||
static inline enum oom_constraint constrained_alloc(struct zonelist *zonelist,
|
||||
gfp_t gfp_mask)
|
||||
{
|
||||
#ifdef CONFIG_NUMA
|
||||
static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
|
||||
gfp_t gfp_mask, nodemask_t *nodemask)
|
||||
{
|
||||
struct zone *zone;
|
||||
struct zoneref *z;
|
||||
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
|
||||
nodemask_t nodes = node_states[N_HIGH_MEMORY];
|
||||
|
||||
for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
|
||||
if (cpuset_zone_allowed_softwall(zone, gfp_mask))
|
||||
node_clear(zone_to_nid(zone), nodes);
|
||||
else
|
||||
return CONSTRAINT_CPUSET;
|
||||
/*
|
||||
* Reach here only when __GFP_NOFAIL is used. So, we should avoid
|
||||
* to kill current.We have to random task kill in this case.
|
||||
* Hopefully, CONSTRAINT_THISNODE...but no way to handle it, now.
|
||||
*/
|
||||
if (gfp_mask & __GFP_THISNODE)
|
||||
return CONSTRAINT_NONE;
|
||||
|
||||
if (!nodes_empty(nodes))
|
||||
/*
|
||||
* The nodemask here is a nodemask passed to alloc_pages(). Now,
|
||||
* cpuset doesn't use this nodemask for its hardwall/softwall/hierarchy
|
||||
* feature. mempolicy is an only user of nodemask here.
|
||||
* check mempolicy's nodemask contains all N_HIGH_MEMORY
|
||||
*/
|
||||
if (nodemask && !nodes_subset(node_states[N_HIGH_MEMORY], *nodemask))
|
||||
return CONSTRAINT_MEMORY_POLICY;
|
||||
#endif
|
||||
|
||||
/* Check this allocation failure is caused by cpuset's wall function */
|
||||
for_each_zone_zonelist_nodemask(zone, z, zonelist,
|
||||
high_zoneidx, nodemask)
|
||||
if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
|
||||
return CONSTRAINT_CPUSET;
|
||||
|
||||
return CONSTRAINT_NONE;
|
||||
}
|
||||
#else
|
||||
static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
|
||||
gfp_t gfp_mask, nodemask_t *nodemask)
|
||||
{
|
||||
return CONSTRAINT_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Simple selection loop. We chose the process with the highest
|
||||
@ -613,7 +632,8 @@ rest_and_return:
|
||||
* OR try to be smart about which process to kill. Note that we
|
||||
* don't have to be perfect here, we just have to be good.
|
||||
*/
|
||||
void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
|
||||
void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
|
||||
int order, nodemask_t *nodemask)
|
||||
{
|
||||
unsigned long freed = 0;
|
||||
enum oom_constraint constraint;
|
||||
@ -632,7 +652,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask, int order)
|
||||
* Check if there were limitations on the allocation (only relevant for
|
||||
* NUMA) that may require different handling.
|
||||
*/
|
||||
constraint = constrained_alloc(zonelist, gfp_mask);
|
||||
constraint = constrained_alloc(zonelist, gfp_mask, nodemask);
|
||||
read_lock(&tasklist_lock);
|
||||
|
||||
switch (constraint) {
|
||||
|
@ -1654,12 +1654,22 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
|
||||
if (page)
|
||||
goto out;
|
||||
|
||||
/* The OOM killer will not help higher order allocs */
|
||||
if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_NOFAIL))
|
||||
goto out;
|
||||
|
||||
if (!(gfp_mask & __GFP_NOFAIL)) {
|
||||
/* The OOM killer will not help higher order allocs */
|
||||
if (order > PAGE_ALLOC_COSTLY_ORDER)
|
||||
goto out;
|
||||
/*
|
||||
* GFP_THISNODE contains __GFP_NORETRY and we never hit this.
|
||||
* Sanity check for bare calls of __GFP_THISNODE, not real OOM.
|
||||
* The caller should handle page allocation failure by itself if
|
||||
* it specifies __GFP_THISNODE.
|
||||
* Note: Hugepage uses it but will hit PAGE_ALLOC_COSTLY_ORDER.
|
||||
*/
|
||||
if (gfp_mask & __GFP_THISNODE)
|
||||
goto out;
|
||||
}
|
||||
/* Exhausted what can be done so it's blamo time */
|
||||
out_of_memory(zonelist, gfp_mask, order);
|
||||
out_of_memory(zonelist, gfp_mask, order, nodemask);
|
||||
|
||||
out:
|
||||
clear_zonelist_oom(zonelist, gfp_mask);
|
||||
@ -3123,7 +3133,7 @@ static int __cpuinit process_zones(int cpu)
|
||||
|
||||
if (percpu_pagelist_fraction)
|
||||
setup_pagelist_highmark(zone_pcp(zone, cpu),
|
||||
(zone->present_pages / percpu_pagelist_fraction));
|
||||
(zone->present_pages / percpu_pagelist_fraction));
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user