From 3d712546d8ba9f25cdf080d79f90482aa4231ed4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 13 Jul 2019 13:40:13 -0700 Subject: [PATCH 1/2] /proc//cmdline: remove all the special cases Start off with a clean slate that only reads exactly from arg_start to arg_end, without any oddities. This simplifies the code and in the process removes the case that caused us to potentially leak an uninitialized byte from the temporary kernel buffer. Note that in order to start from scratch with an understandable base, this simplifies things _too_ much, and removes all the legacy logic to handle setproctitle() having changed the argument strings. We'll add back those special cases very differently in the next commit. Link: https://lore.kernel.org/lkml/20190712160913.17727-1-izbyshev@ispras.ru/ Fixes: f5b65348fd77 ("proc: fix missing final NUL in get_mm_cmdline() rewrite") Cc: Alexey Izbyshev Cc: Alexey Dobriyan Signed-off-by: Linus Torvalds --- fs/proc/base.c | 71 ++++++-------------------------------------------- 1 file changed, 8 insertions(+), 63 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 255f6754c70d..8040f9d1cf07 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -212,7 +212,7 @@ static int proc_root_link(struct dentry *dentry, struct path *path) static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, size_t count, loff_t *ppos) { - unsigned long arg_start, arg_end, env_start, env_end; + unsigned long arg_start, arg_end; unsigned long pos, len; char *page; @@ -223,36 +223,18 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; - env_start = mm->env_start; - env_end = mm->env_end; spin_unlock(&mm->arg_lock); if (arg_start >= arg_end) return 0; - /* - * We have traditionally allowed the user to re-write - * the argument strings and overflow the end result - * into the environment section. But only do that if - * the environment area is contiguous to the arguments. - */ - if (env_start != arg_end || env_start >= env_end) - env_start = env_end = arg_end; - - /* .. and limit it to a maximum of one page of slop */ - if (env_end >= arg_end + PAGE_SIZE) - env_end = arg_end + PAGE_SIZE - 1; - /* We're not going to care if "*ppos" has high bits set */ - pos = arg_start + *ppos; - /* .. but we do check the result is in the proper range */ - if (pos < arg_start || pos >= env_end) + pos = arg_start + *ppos; + if (pos < arg_start || pos >= arg_end) return 0; - - /* .. and we never go past env_end */ - if (env_end - pos < count) - count = env_end - pos; + if (count > arg_end - pos) + count = arg_end - pos; page = (char *)__get_free_page(GFP_KERNEL); if (!page) @@ -262,48 +244,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); - long offset; - /* - * Are we already starting past the official end? - * We always include the last byte that is *supposed* - * to be NUL - */ - offset = (pos >= arg_end) ? pos - arg_end + 1 : 0; - - got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON); - if (got <= offset) + got = access_remote_vm(mm, pos, page, size, FOLL_ANON); + if (got <= 0) break; - got -= offset; - - /* Don't walk past a NUL character once you hit arg_end */ - if (pos + got >= arg_end) { - int n = 0; - - /* - * If we started before 'arg_end' but ended up - * at or after it, we start the NUL character - * check at arg_end-1 (where we expect the normal - * EOF to be). - * - * NOTE! This is smaller than 'got', because - * pos + got >= arg_end - */ - if (pos < arg_end) - n = arg_end - pos - 1; - - /* Cut off at first NUL after 'n' */ - got = n + strnlen(page+n, offset+got-n); - if (got < offset) - break; - got -= offset; - - /* Include the NUL if it existed */ - if (got < size) - got++; - } - - got -= copy_to_user(buf, page+offset, got); + got -= copy_to_user(buf, page, got); if (unlikely(!got)) { if (!len) len = -EFAULT; From d26d0cd97c88eb1a5704b42e41ab443406807810 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 13 Jul 2019 14:27:14 -0700 Subject: [PATCH 2/2] /proc//cmdline: add back the setproctitle() special case This makes the setproctitle() special case very explicit indeed, and handles it with a separate helper function entirely. In the process, it re-instates the original semantics of simply stopping at the first NUL character when the original last NUL character is no longer there. [ The original semantics can still be seen in mm/util.c: get_cmdline() that is limited to a fixed-size buffer ] This makes the logic about when we use the string lengths etc much more obvious, and makes it easier to see what we do and what the two very different cases are. Note that even when we allow walking past the end of the argument array (because the setproctitle() might have overwritten and overflowed the original argv[] strings), we only allow it when it overflows into the environment region if it is immediately adjacent. [ Fixed for missing 'count' checks noted by Alexey Izbyshev ] Link: https://lore.kernel.org/lkml/alpine.LNX.2.21.1904052326230.3249@kich.toxcorp.com/ Fixes: 5ab827189965 ("fs/proc: simplify and clarify get_mm_cmdline() function") Cc: Jakub Jankowski Cc: Alexey Dobriyan Cc: Alexey Izbyshev Signed-off-by: Linus Torvalds --- fs/proc/base.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 4 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 8040f9d1cf07..8fbab527bd24 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -209,12 +209,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path) return result; } +/* + * If the user used setproctitle(), we just get the string from + * user space at arg_start, and limit it to a maximum of one page. + */ +static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, + size_t count, unsigned long pos, + unsigned long arg_start) +{ + char *page; + int ret, got; + + if (pos >= PAGE_SIZE) + return 0; + + page = (char *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + ret = 0; + got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); + if (got > 0) { + int len = strnlen(page, got); + + /* Include the NUL character if it was found */ + if (len < got) + len++; + + if (len > pos) { + len -= pos; + if (len > count) + len = count; + len -= copy_to_user(buf, page+pos, len); + if (!len) + len = -EFAULT; + ret = len; + } + } + free_page((unsigned long)page); + return ret; +} + static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, size_t count, loff_t *ppos) { - unsigned long arg_start, arg_end; + unsigned long arg_start, arg_end, env_start, env_end; unsigned long pos, len; - char *page; + char *page, c; /* Check if process spawned far enough to have cmdline. */ if (!mm->env_end) @@ -223,14 +264,46 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; + env_start = mm->env_start; + env_end = mm->env_end; spin_unlock(&mm->arg_lock); if (arg_start >= arg_end) return 0; + /* + * We allow setproctitle() to overwrite the argument + * strings, and overflow past the original end. But + * only when it overflows into the environment area. + */ + if (env_start != arg_end || env_end < env_start) + env_start = env_end = arg_end; + len = env_end - arg_start; + /* We're not going to care if "*ppos" has high bits set */ - /* .. but we do check the result is in the proper range */ - pos = arg_start + *ppos; + pos = *ppos; + if (pos >= len) + return 0; + if (count > len - pos) + count = len - pos; + if (!count) + return 0; + + /* + * Magical special case: if the argv[] end byte is not + * zero, the user has overwritten it with setproctitle(3). + * + * Possible future enhancement: do this only once when + * pos is 0, and set a flag in the 'struct file'. + */ + if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) + return get_mm_proctitle(mm, buf, count, pos, arg_start); + + /* + * For the non-setproctitle() case we limit things strictly + * to the [arg_start, arg_end[ range. + */ + pos += arg_start; if (pos < arg_start || pos >= arg_end) return 0; if (count > arg_end - pos)