exec: make argv/envp memory visible to oom-killer

Brad Spengler published a local memory-allocation DoS that
evades the OOM-killer (though not the virtual memory RLIMIT):
http://www.grsecurity.net/~spender/64bit_dos.c

execve()->copy_strings() can allocate a lot of memory, but
this is not visible to oom-killer, nobody can see the nascent
bprm->mm and take it into account.

With this patch get_arg_page() increments current's MM_ANONPAGES
counter every time we allocate the new page for argv/envp. When
do_execve() succeds or fails, we change this counter back.

Technically this is not 100% correct, we can't know if the new
page is swapped out and turn MM_ANONPAGES into MM_SWAPENTS, but
I don't think this really matters and everything becomes correct
once exec changes ->mm or fails.

Reported-by: Brad Spengler <spender@grsecurity.net>
Reviewed-and-discussed-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Oleg Nesterov 2010-11-30 20:55:34 +01:00 committed by Linus Torvalds
parent 37a09f0745
commit 3c77f84572
2 changed files with 31 additions and 2 deletions

View File

@ -164,6 +164,25 @@ out:
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
struct mm_struct *mm = current->mm;
long diff = (long)(pages - bprm->vma_pages);
if (!mm || !diff)
return;
bprm->vma_pages = pages;
#ifdef SPLIT_RSS_COUNTING
add_mm_counter(mm, MM_ANONPAGES, diff);
#else
spin_lock(&mm->page_table_lock);
add_mm_counter(mm, MM_ANONPAGES, diff);
spin_unlock(&mm->page_table_lock);
#endif
}
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write) int write)
{ {
@ -186,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start; unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
struct rlimit *rlim; struct rlimit *rlim;
acct_arg_size(bprm, size / PAGE_SIZE);
/* /*
* We've historically supported up to 32 pages (ARG_MAX) * We've historically supported up to 32 pages (ARG_MAX)
* of argument strings even with small stacks * of argument strings even with small stacks
@ -276,6 +297,10 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
#else #else
static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
{
}
static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
int write) int write)
{ {
@ -1003,6 +1028,7 @@ int flush_old_exec(struct linux_binprm * bprm)
/* /*
* Release all of the old mmap stuff * Release all of the old mmap stuff
*/ */
acct_arg_size(bprm, 0);
retval = exec_mmap(bprm->mm); retval = exec_mmap(bprm->mm);
if (retval) if (retval)
goto out; goto out;
@ -1426,8 +1452,10 @@ int do_execve(const char * filename,
return retval; return retval;
out: out:
if (bprm->mm) if (bprm->mm) {
acct_arg_size(bprm, 0);
mmput(bprm->mm); mmput(bprm->mm);
}
out_file: out_file:
if (bprm->file) { if (bprm->file) {

View File

@ -29,6 +29,7 @@ struct linux_binprm{
char buf[BINPRM_BUF_SIZE]; char buf[BINPRM_BUF_SIZE];
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long vma_pages;
#else #else
# define MAX_ARG_PAGES 32 # define MAX_ARG_PAGES 32
struct page *page[MAX_ARG_PAGES]; struct page *page[MAX_ARG_PAGES];