oom: move oom_adj value from task_struct to signal_struct
Currently, OOM logic callflow is here.
__out_of_memory()
select_bad_process() for each task
badness() calculate badness of one task
oom_kill_process() search child
oom_kill_task() kill target task and mm shared tasks with it
example, process-A have two thread, thread-A and thread-B and it have very
fat memory and each thread have following oom_adj and oom_score.
thread-A: oom_adj = OOM_DISABLE, oom_score = 0
thread-B: oom_adj = 0, oom_score = very-high
Then, select_bad_process() select thread-B, but oom_kill_task() refuse
kill the task because thread-A have OOM_DISABLE. Thus __out_of_memory()
call select_bad_process() again. but select_bad_process() select the same
task. It mean kernel fall in livelock.
The fact is, select_bad_process() must select killable task. otherwise
OOM logic go into livelock.
And root cause is, oom_adj shouldn't be per-thread value. it should be
per-process value because OOM-killer kill a process, not thread. Thus
This patch moves oomkilladj (now more appropriately named oom_adj) from
struct task_struct to struct signal_struct. it naturally prevent
select_bad_process() choose wrong task.
Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Paul Menage <menage@google.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
committed by
Linus Torvalds
parent
f168e1b639
commit
28b83c5193
@@ -58,6 +58,10 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
|
||||
unsigned long points, cpu_time, run_time;
|
||||
struct mm_struct *mm;
|
||||
struct task_struct *child;
|
||||
int oom_adj = p->signal->oom_adj;
|
||||
|
||||
if (oom_adj == OOM_DISABLE)
|
||||
return 0;
|
||||
|
||||
task_lock(p);
|
||||
mm = p->mm;
|
||||
@@ -148,15 +152,15 @@ unsigned long badness(struct task_struct *p, unsigned long uptime)
|
||||
points /= 8;
|
||||
|
||||
/*
|
||||
* Adjust the score by oomkilladj.
|
||||
* Adjust the score by oom_adj.
|
||||
*/
|
||||
if (p->oomkilladj) {
|
||||
if (p->oomkilladj > 0) {
|
||||
if (oom_adj) {
|
||||
if (oom_adj > 0) {
|
||||
if (!points)
|
||||
points = 1;
|
||||
points <<= p->oomkilladj;
|
||||
points <<= oom_adj;
|
||||
} else
|
||||
points >>= -(p->oomkilladj);
|
||||
points >>= -(oom_adj);
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
@@ -251,7 +255,7 @@ static struct task_struct *select_bad_process(unsigned long *ppoints,
|
||||
*ppoints = ULONG_MAX;
|
||||
}
|
||||
|
||||
if (p->oomkilladj == OOM_DISABLE)
|
||||
if (p->signal->oom_adj == OOM_DISABLE)
|
||||
continue;
|
||||
|
||||
points = badness(p, uptime.tv_sec);
|
||||
@@ -304,7 +308,7 @@ static void dump_tasks(const struct mem_cgroup *mem)
|
||||
}
|
||||
printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n",
|
||||
p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm,
|
||||
get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj,
|
||||
get_mm_rss(mm), (int)task_cpu(p), p->signal->oom_adj,
|
||||
p->comm);
|
||||
task_unlock(p);
|
||||
} while_each_thread(g, p);
|
||||
@@ -359,18 +363,9 @@ static int oom_kill_task(struct task_struct *p)
|
||||
* change to NULL at any time since we do not hold task_lock(p).
|
||||
* However, this is of no concern to us.
|
||||
*/
|
||||
|
||||
if (mm == NULL)
|
||||
if (!mm || p->signal->oom_adj == OOM_DISABLE)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Don't kill the process if any threads are set to OOM_DISABLE
|
||||
*/
|
||||
do_each_thread(g, q) {
|
||||
if (q->mm == mm && q->oomkilladj == OOM_DISABLE)
|
||||
return 1;
|
||||
} while_each_thread(g, q);
|
||||
|
||||
__oom_kill_task(p, 1);
|
||||
|
||||
/*
|
||||
@@ -394,8 +389,9 @@ static int oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
|
||||
|
||||
if (printk_ratelimit()) {
|
||||
printk(KERN_WARNING "%s invoked oom-killer: "
|
||||
"gfp_mask=0x%x, order=%d, oomkilladj=%d\n",
|
||||
current->comm, gfp_mask, order, current->oomkilladj);
|
||||
"gfp_mask=0x%x, order=%d, oom_adj=%d\n",
|
||||
current->comm, gfp_mask, order,
|
||||
current->signal->oom_adj);
|
||||
task_lock(current);
|
||||
cpuset_print_task_mems_allowed(current);
|
||||
task_unlock(current);
|
||||
|
||||
Reference in New Issue
Block a user