From b57a2907c9d96c56494ef25f8ec821cd0b355dd6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 8 May 2024 10:31:46 -0700 Subject: [PATCH 1/7] selftests/exec: Build both static and non-static load_address tests After commit 4d1cd3b2c5c1 ("tools/testing/selftests/exec: fix link error"), the load address alignment tests tried to build statically. This was silently ignored in some cases. However, after attempting to further fix the build by switching to "-static-pie", the test started failing. This appears to be due to non-PT_INTERP ET_DYN execs ("static PIE") not doing alignment correctly, which remains unfixed[1]. See commit aeb7923733d1 ("revert "fs/binfmt_elf: use PT_LOAD p_align values for static PIE"") for more details. Provide rules to build both static and non-static PIE binaries, improve debug reporting, and perform several test steps instead of a single all-or-nothing test. However, do not actually enable static-pie tests; alignment specification is only supported for ET_DYN with PT_INTERP ("regular PIE"). Link: https://bugzilla.kernel.org/show_bug.cgi?id=215275 [1] Link: https://lore.kernel.org/r/20240508173149.677910-1-keescook@chromium.org Signed-off-by: Kees Cook --- tools/testing/selftests/exec/Makefile | 19 +++--- tools/testing/selftests/exec/load_address.c | 67 +++++++++++++++++---- 2 files changed, 66 insertions(+), 20 deletions(-) diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index fb4472ddffd8..619cff81d796 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -3,8 +3,13 @@ CFLAGS = -Wall CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE +ALIGNS := 0x1000 0x200000 0x1000000 +ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) +ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS)) +ALIGNMENT_TESTS := $(ALIGN_PIES) + TEST_PROGS := binfmt_script.py -TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular +TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS) TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile @@ -28,9 +33,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ -$(OUTPUT)/load_address_4096: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@ -$(OUTPUT)/load_address_2097152: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@ -$(OUTPUT)/load_address_16777216: load_address.c - $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@ +$(OUTPUT)/load_address.0x%: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ + -fPIE -pie $< -o $@ +$(OUTPUT)/load_address.static.0x%: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \ + -fPIE -static-pie $< -o $@ diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c index 17e3207d34ae..8257fddba8c8 100644 --- a/tools/testing/selftests/exec/load_address.c +++ b/tools/testing/selftests/exec/load_address.c @@ -5,11 +5,13 @@ #include #include #include +#include #include "../kselftest.h" struct Statistics { unsigned long long load_address; unsigned long long alignment; + bool interp; }; int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) @@ -26,11 +28,20 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) stats->alignment = 0; for (i = 0; i < info->dlpi_phnum; i++) { + unsigned long long align; + + if (info->dlpi_phdr[i].p_type == PT_INTERP) { + stats->interp = true; + continue; + } + if (info->dlpi_phdr[i].p_type != PT_LOAD) continue; - if (info->dlpi_phdr[i].p_align > stats->alignment) - stats->alignment = info->dlpi_phdr[i].p_align; + align = info->dlpi_phdr[i].p_align; + + if (align > stats->alignment) + stats->alignment = align; } return 1; // Terminate dl_iterate_phdr. @@ -38,27 +49,57 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) int main(int argc, char **argv) { - struct Statistics extracted; - unsigned long long misalign; + struct Statistics extracted = { }; + unsigned long long misalign, pow2; + bool interp_needed; + char buf[1024]; + FILE *maps; int ret; ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(4); + /* Dump maps file for debugging reference. */ + maps = fopen("/proc/self/maps", "r"); + if (!maps) + ksft_exit_fail_msg("FAILED: /proc/self/maps: %s\n", strerror(errno)); + while (fgets(buf, sizeof(buf), maps)) { + ksft_print_msg("%s", buf); + } + fclose(maps); + + /* Walk the program headers. */ ret = dl_iterate_phdr(ExtractStatistics, &extracted); if (ret != 1) ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n"); - if (extracted.alignment == 0) - ksft_exit_fail_msg("FAILED: No alignment found\n"); - else if (extracted.alignment & (extracted.alignment - 1)) - ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n"); + /* Report our findings. */ + ksft_print_msg("load_address=%#llx alignment=%#llx\n", + extracted.load_address, extracted.alignment); + /* If we're named with ".static." we expect no INTERP. */ + interp_needed = strstr(argv[0], ".static.") == NULL; + + /* Were we built as expected? */ + ksft_test_result(interp_needed == extracted.interp, + "%s INTERP program header %s\n", + interp_needed ? "Wanted" : "Unwanted", + extracted.interp ? "seen" : "missing"); + + /* Did we find an alignment? */ + ksft_test_result(extracted.alignment != 0, + "Alignment%s found\n", extracted.alignment ? "" : " NOT"); + + /* Is the alignment sane? */ + pow2 = extracted.alignment & (extracted.alignment - 1); + ksft_test_result(pow2 == 0, + "Alignment is%s a power of 2: %#llx\n", + pow2 == 0 ? "" : " NOT", extracted.alignment); + + /* Is the load address aligned? */ misalign = extracted.load_address & (extracted.alignment - 1); - if (misalign) - ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n", - extracted.alignment, extracted.load_address); + ksft_test_result(misalign == 0, "Load Address is %saligned (%#llx)\n", + misalign ? "MIS" : "", misalign); - ksft_test_result_pass("Completed\n"); ksft_finished(); } From 2d4cf7b190bbfadd4986bf5c34da17c1a88adf8e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 8 May 2024 10:31:47 -0700 Subject: [PATCH 2/7] binfmt_elf: Calculate total_size earlier In preparation to support PT_LOAD with large p_align values on non-PT_INTERP ET_DYN executables (i.e. "static pie"), we'll need to use the total_size details earlier. Move this separately now to make the next patch more readable. As total_size and load_bias are currently calculated separately, this has no behavioral impact. Link: https://lore.kernel.org/r/20240508173149.677910-2-keescook@chromium.org Signed-off-by: Kees Cook --- fs/binfmt_elf.c | 52 +++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index a43897b03ce9..f59e23b29c3b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1061,7 +1061,34 @@ out_free_interp: * Header for ET_DYN binaries to calculate the * randomization (load_bias) for all the LOAD * Program Headers. + */ + + /* + * Calculate the entire size of the ELF mapping + * (total_size), used for the initial mapping, + * due to load_addr_set which is set to true later + * once the initial mapping is performed. * + * Note that this is only sensible when the LOAD + * segments are contiguous (or overlapping). If + * used for LOADs that are far apart, this would + * cause the holes between LOADs to be mapped, + * running the risk of having the mapping fail, + * as it would be larger than the ELF file itself. + * + * As a result, only ET_DYN does this, since + * some ET_EXEC (e.g. ia64) may have large virtual + * memory holes between LOADs. + * + */ + total_size = total_mapping_size(elf_phdata, + elf_ex->e_phnum); + if (!total_size) { + retval = -EINVAL; + goto out_free_dentry; + } + + /* * There are effectively two types of ET_DYN * binaries: programs (i.e. PIE: ET_DYN with INTERP) * and loaders (ET_DYN without INTERP, since they @@ -1102,31 +1129,6 @@ out_free_interp: * is then page aligned. */ load_bias = ELF_PAGESTART(load_bias - vaddr); - - /* - * Calculate the entire size of the ELF mapping - * (total_size), used for the initial mapping, - * due to load_addr_set which is set to true later - * once the initial mapping is performed. - * - * Note that this is only sensible when the LOAD - * segments are contiguous (or overlapping). If - * used for LOADs that are far apart, this would - * cause the holes between LOADs to be mapped, - * running the risk of having the mapping fail, - * as it would be larger than the ELF file itself. - * - * As a result, only ET_DYN does this, since - * some ET_EXEC (e.g. ia64) may have large virtual - * memory holes between LOADs. - * - */ - total_size = total_mapping_size(elf_phdata, - elf_ex->e_phnum); - if (!total_size) { - retval = -EINVAL; - goto out_free_dentry; - } } error = elf_load(bprm->file, load_bias + vaddr, elf_ppnt, From 3545deff0ec7a37de7ed9632e262598582b140e9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 8 May 2024 10:31:48 -0700 Subject: [PATCH 3/7] binfmt_elf: Honor PT_LOAD alignment for static PIE The p_align values in PT_LOAD were ignored for static PIE executables (i.e. ET_DYN without PT_INTERP). This is because there is no way to request a non-fixed mmap region with a specific alignment. ET_DYN with PT_INTERP uses a separate base address (ELF_ET_DYN_BASE) and binfmt_elf performs the ASLR itself, which means it can also apply alignment. For the mmap region, the address selection happens deep within the vm_mmap() implementation (when the requested address is 0). The earlier attempt to implement this: commit 9630f0d60fec ("fs/binfmt_elf: use PT_LOAD p_align values for static PIE") commit 925346c129da ("fs/binfmt_elf: fix PT_LOAD p_align values for loaders") did not take into account the different base address origins, and were eventually reverted: aeb7923733d1 ("revert "fs/binfmt_elf: use PT_LOAD p_align values for static PIE"") In order to get the correct alignment from an mmap base, binfmt_elf must perform a 0-address load first, then tear down the mapping and perform alignment on the resulting address. Since this is slightly more overhead, only do this when it is needed (i.e. the alignment is not the default ELF alignment). This does, however, have the benefit of being able to use MAP_FIXED_NOREPLACE, to avoid potential collisions. With this fixed, enable the static PIE self tests again. Reported-by: H.J. Lu Closes: https://bugzilla.kernel.org/show_bug.cgi?id=215275 Link: https://lore.kernel.org/r/20240508173149.677910-3-keescook@chromium.org Signed-off-by: Kees Cook --- fs/binfmt_elf.c | 42 +++++++++++++++++++++++---- tools/testing/selftests/exec/Makefile | 2 +- 2 files changed, 38 insertions(+), 6 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f59e23b29c3b..7860d6504499 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1088,10 +1088,13 @@ out_free_interp: goto out_free_dentry; } + /* Calculate any requested alignment. */ + alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); + /* * There are effectively two types of ET_DYN - * binaries: programs (i.e. PIE: ET_DYN with INTERP) - * and loaders (ET_DYN without INTERP, since they + * binaries: programs (i.e. PIE: ET_DYN with PT_INTERP) + * and loaders (ET_DYN without PT_INTERP, since they * _are_ the ELF interpreter). The loaders must * be loaded away from programs since the program * may otherwise collide with the loader (especially @@ -1111,15 +1114,44 @@ out_free_interp: * without MAP_FIXED nor MAP_FIXED_NOREPLACE). */ if (interpreter) { + /* On ET_DYN with PT_INTERP, we do the ASLR. */ load_bias = ELF_ET_DYN_BASE; if (current->flags & PF_RANDOMIZE) load_bias += arch_mmap_rnd(); - alignment = maximum_alignment(elf_phdata, elf_ex->e_phnum); + /* Adjust alignment as requested. */ if (alignment) load_bias &= ~(alignment - 1); elf_flags |= MAP_FIXED_NOREPLACE; - } else - load_bias = 0; + } else { + /* + * For ET_DYN without PT_INTERP, we rely on + * the architectures's (potentially ASLR) mmap + * base address (via a load_bias of 0). + * + * When a large alignment is requested, we + * must do the allocation at address "0" right + * now to discover where things will load so + * that we can adjust the resulting alignment. + * In this case (load_bias != 0), we can use + * MAP_FIXED_NOREPLACE to make sure the mapping + * doesn't collide with anything. + */ + if (alignment > ELF_MIN_ALIGN) { + load_bias = elf_load(bprm->file, 0, elf_ppnt, + elf_prot, elf_flags, total_size); + if (BAD_ADDR(load_bias)) { + retval = IS_ERR_VALUE(load_bias) ? + PTR_ERR((void*)load_bias) : -EINVAL; + goto out_free_dentry; + } + vm_munmap(load_bias, total_size); + /* Adjust alignment as requested. */ + if (alignment) + load_bias &= ~(alignment - 1); + elf_flags |= MAP_FIXED_NOREPLACE; + } else + load_bias = 0; + } /* * Since load_bias is used for all subsequent loading diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 619cff81d796..ab67d58cfab7 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -6,7 +6,7 @@ CFLAGS += -D_GNU_SOURCE ALIGNS := 0x1000 0x200000 0x1000000 ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS)) ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS)) -ALIGNMENT_TESTS := $(ALIGN_PIES) +ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES) TEST_PROGS := binfmt_script.py TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS) From 60371f43e56bfad1f438621fae286e1de5bb6877 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 19 May 2024 17:17:59 -0700 Subject: [PATCH 4/7] exec: Add KUnit test for bprm_stack_limits() Since bprm_stack_limits() operates with very limited side-effects, add it as the first exec.c KUnit test. Add to Kconfig and adjust MAINTAINERS file to include it. Tested on 64-bit UML: $ tools/testing/kunit/kunit.py run exec Link: https://lore.kernel.org/lkml/20240520021615.741800-1-keescook@chromium.org/ Signed-off-by: Kees Cook --- MAINTAINERS | 2 + fs/Kconfig.binfmt | 8 ++++ fs/exec.c | 13 ++++++ fs/exec_test.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 fs/exec_test.c diff --git a/MAINTAINERS b/MAINTAINERS index 8754ac2c259d..8dfbe998f175 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8218,7 +8218,9 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/execve F: Documentation/userspace-api/ELF.rst F: fs/*binfmt_*.c +F: fs/Kconfig.binfmt F: fs/exec.c +F: fs/exec_test.c F: include/linux/binfmts.h F: include/linux/elf.h F: include/uapi/linux/binfmts.h diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index f5693164ca9a..bd2f530e5740 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt @@ -176,4 +176,12 @@ config COREDUMP certainly want to say Y here. Not necessary on systems that never need debugging or only ever run flawless code. +config EXEC_KUNIT_TEST + bool "Build execve tests" if !KUNIT_ALL_TESTS + depends on KUNIT=y + default KUNIT_ALL_TESTS + help + This builds the exec KUnit tests, which tests boundary conditions + of various aspects of the exec internals. + endmenu diff --git a/fs/exec.c b/fs/exec.c index 40073142288f..c3bec126505b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -486,6 +486,15 @@ static int count_strings_kernel(const char *const *argv) return i; } +/* + * Calculate bprm->argmin from: + * - _STK_LIM + * - ARG_MAX + * - bprm->rlim_stack.rlim_cur + * - bprm->argc + * - bprm->envc + * - bprm->p + */ static int bprm_stack_limits(struct linux_binprm *bprm) { unsigned long limit, ptr_size; @@ -2211,3 +2220,7 @@ static int __init init_fs_exec_sysctls(void) fs_initcall(init_fs_exec_sysctls); #endif /* CONFIG_SYSCTL */ + +#ifdef CONFIG_EXEC_KUNIT_TEST +#include "exec_test.c" +#endif diff --git a/fs/exec_test.c b/fs/exec_test.c new file mode 100644 index 000000000000..32a90c6f47e7 --- /dev/null +++ b/fs/exec_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include + +struct bprm_stack_limits_result { + struct linux_binprm bprm; + int expected_rc; + unsigned long expected_argmin; +}; + +static const struct bprm_stack_limits_result bprm_stack_limits_results[] = { + /* Giant values produce -E2BIG */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = INT_MAX, .envc = INT_MAX }, .expected_rc = -E2BIG }, + /* + * 0 rlim_stack will get raised to ARG_MAX. With 1 string pointer, + * we should see p - ARG_MAX + sizeof(void *). + */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = 1, .envc = 0 }, .expected_argmin = ULONG_MAX - ARG_MAX + sizeof(void *)}, + /* Validate that argc is always raised to a minimum of 1. */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = 0, .envc = 0 }, .expected_argmin = ULONG_MAX - ARG_MAX + sizeof(void *)}, + /* + * 0 rlim_stack will get raised to ARG_MAX. With pointers filling ARG_MAX, + * we should see -E2BIG. (Note argc is always raised to at least 1.) + */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = ARG_MAX / sizeof(void *), .envc = 0 }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = ARG_MAX / sizeof(void *) + 1, .envc = 0 }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = 0, .envc = ARG_MAX / sizeof(void *) }, .expected_rc = -E2BIG }, + /* And with one less, we see space for exactly 1 pointer. */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = (ARG_MAX / sizeof(void *)) - 1, .envc = 0 }, + .expected_argmin = ULONG_MAX - sizeof(void *) }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 0, + .argc = 0, .envc = (ARG_MAX / sizeof(void *)) - 2, }, + .expected_argmin = ULONG_MAX - sizeof(void *) }, + /* If we raise rlim_stack / 4 to exactly ARG_MAX, nothing changes. */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4, + .argc = ARG_MAX / sizeof(void *), .envc = 0 }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4, + .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4, + .argc = ARG_MAX / sizeof(void *) + 1, .envc = 0 }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4, + .argc = 0, .envc = ARG_MAX / sizeof(void *) }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4, + .argc = (ARG_MAX / sizeof(void *)) - 1, .envc = 0 }, + .expected_argmin = ULONG_MAX - sizeof(void *) }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ARG_MAX * 4, + .argc = 0, .envc = (ARG_MAX / sizeof(void *)) - 2, }, + .expected_argmin = ULONG_MAX - sizeof(void *) }, + /* But raising it another pointer * 4 will provide space for 1 more pointer. */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = (ARG_MAX + sizeof(void *)) * 4, + .argc = ARG_MAX / sizeof(void *), .envc = 0 }, + .expected_argmin = ULONG_MAX - sizeof(void *) }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = (ARG_MAX + sizeof(void *)) * 4, + .argc = 0, .envc = ARG_MAX / sizeof(void *) - 1 }, + .expected_argmin = ULONG_MAX - sizeof(void *) }, + /* Raising rlim_stack / 4 to _STK_LIM / 4 * 3 will see more space. */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3), + .argc = 0, .envc = 0 }, + .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3), + .argc = 0, .envc = 0 }, + .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, + /* But raising it any further will see no increase. */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * 3 + sizeof(void *)), + .argc = 0, .envc = 0 }, + .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * (_STK_LIM / 4 * + sizeof(void *)), + .argc = 0, .envc = 0 }, + .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM, + .argc = 0, .envc = 0 }, + .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = 4 * _STK_LIM, + .argc = 0, .envc = 0 }, + .expected_argmin = ULONG_MAX - (_STK_LIM / 4 * 3) + sizeof(void *) }, +}; + +static void exec_test_bprm_stack_limits(struct kunit *test) +{ + /* Double-check the constants. */ + KUNIT_EXPECT_EQ(test, _STK_LIM, SZ_8M); + KUNIT_EXPECT_EQ(test, ARG_MAX, 32 * SZ_4K); + + for (int i = 0; i < ARRAY_SIZE(bprm_stack_limits_results); i++) { + const struct bprm_stack_limits_result *result = &bprm_stack_limits_results[i]; + struct linux_binprm bprm = result->bprm; + int rc; + + rc = bprm_stack_limits(&bprm); + KUNIT_EXPECT_EQ_MSG(test, rc, result->expected_rc, "on loop %d", i); + KUNIT_EXPECT_EQ_MSG(test, bprm.argmin, result->expected_argmin, "on loop %d", i); + } +} + +static struct kunit_case exec_test_cases[] = { + KUNIT_CASE(exec_test_bprm_stack_limits), + {}, +}; + +static struct kunit_suite exec_test_suite = { + .name = "exec", + .test_cases = exec_test_cases, +}; + +kunit_test_suite(exec_test_suite); From 2a97388a807b6ab5538aa8f8537b2463c6988bd2 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 21 Jun 2024 21:54:50 +0300 Subject: [PATCH 5/7] ELF: fix kernel.randomize_va_space double read ELF loader uses "randomize_va_space" twice. It is sysctl and can change at any moment, so 2 loads could see 2 different values in theory with unpredictable consequences. Issue exactly one load for consistent value across one exec. Signed-off-by: Alexey Dobriyan Link: https://lore.kernel.org/r/3329905c-7eb8-400a-8f0a-d87cff979b5b@p183 Signed-off-by: Kees Cook --- fs/binfmt_elf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 7860d6504499..40111451aa95 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1003,7 +1003,8 @@ out_free_interp: if (elf_read_implies_exec(*elf_ex, executable_stack)) current->personality |= READ_IMPLIES_EXEC; - if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) + const int snapshot_randomize_va_space = READ_ONCE(randomize_va_space); + if (!(current->personality & ADDR_NO_RANDOMIZE) && snapshot_randomize_va_space) current->flags |= PF_RANDOMIZE; setup_new_exec(bprm); @@ -1285,7 +1286,7 @@ out_free_interp: mm->end_data = end_data; mm->start_stack = bprm->p; - if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) { + if ((current->flags & PF_RANDOMIZE) && (snapshot_randomize_va_space > 1)) { /* * For architectures with ELF randomization, when executing * a loader directly (i.e. no interpreter listed in ELF From 084ebf7ca83e6cb743784f2eecc654193ce064fb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 21 Jun 2024 13:50:43 -0700 Subject: [PATCH 6/7] execve: Keep bprm->argmin behind CONFIG_MMU When argmin was added in commit 655c16a8ce9c ("exec: separate MM_ANONPAGES and RLIMIT_STACK accounting"), it was intended only for validating stack limits on CONFIG_MMU[1]. All checking for reaching the limit (argmin) is wrapped in CONFIG_MMU ifdef checks, though setting argmin was not. That argmin is only supposed to be used under CONFIG_MMU was rediscovered recently[2], and I don't want to trip over this again. Move argmin's declaration into the existing CONFIG_MMU area, and add helpers functions so the MMU tests can be consolidated. Link: https://lore.kernel.org/all/20181126122307.GA1660@redhat.com [1] Link: https://lore.kernel.org/all/202406211253.7037F69@keescook/ [2] Link: https://lore.kernel.org/r/20240621205046.4001362-1-kees@kernel.org Signed-off-by: Kees Cook --- fs/exec.c | 26 ++++++++++++++++++++------ fs/exec_test.c | 2 ++ include/linux/binfmts.h | 2 +- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index c3bec126505b..b7bc63bfb907 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -486,6 +486,23 @@ static int count_strings_kernel(const char *const *argv) return i; } +static inline int bprm_set_stack_limit(struct linux_binprm *bprm, + unsigned long limit) +{ +#ifdef CONFIG_MMU + bprm->argmin = bprm->p - limit; +#endif + return 0; +} +static inline bool bprm_hit_stack_limit(struct linux_binprm *bprm) +{ +#ifdef CONFIG_MMU + return bprm->p < bprm->argmin; +#else + return false; +#endif +} + /* * Calculate bprm->argmin from: * - _STK_LIM @@ -532,8 +549,7 @@ static int bprm_stack_limits(struct linux_binprm *bprm) return -E2BIG; limit -= ptr_size; - bprm->argmin = bprm->p - limit; - return 0; + return bprm_set_stack_limit(bprm, limit); } /* @@ -571,10 +587,8 @@ static int copy_strings(int argc, struct user_arg_ptr argv, pos = bprm->p; str += len; bprm->p -= len; -#ifdef CONFIG_MMU - if (bprm->p < bprm->argmin) + if (bprm_hit_stack_limit(bprm)) goto out; -#endif while (len > 0) { int offset, bytes_to_copy; @@ -649,7 +663,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) /* We're going to work our way backwards. */ arg += len; bprm->p -= len; - if (IS_ENABLED(CONFIG_MMU) && bprm->p < bprm->argmin) + if (bprm_hit_stack_limit(bprm)) return -E2BIG; while (len > 0) { diff --git a/fs/exec_test.c b/fs/exec_test.c index 32a90c6f47e7..8fea0bf0b7f5 100644 --- a/fs/exec_test.c +++ b/fs/exec_test.c @@ -96,7 +96,9 @@ static void exec_test_bprm_stack_limits(struct kunit *test) rc = bprm_stack_limits(&bprm); KUNIT_EXPECT_EQ_MSG(test, rc, result->expected_rc, "on loop %d", i); +#ifdef CONFIG_MMU KUNIT_EXPECT_EQ_MSG(test, bprm.argmin, result->expected_argmin, "on loop %d", i); +#endif } } diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 70f97f685bff..e6c00e860951 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -19,13 +19,13 @@ struct linux_binprm { #ifdef CONFIG_MMU struct vm_area_struct *vma; unsigned long vma_pages; + unsigned long argmin; /* rlimit marker for copy_strings() */ #else # define MAX_ARG_PAGES 32 struct page *page[MAX_ARG_PAGES]; #endif struct mm_struct *mm; unsigned long p; /* current top of mem */ - unsigned long argmin; /* rlimit marker for copy_strings() */ unsigned int /* Should an execfd be passed to userspace? */ have_execfd:1, From 21f93108306026b8066db31c24a097192c8c36c7 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 21 Jun 2024 13:50:44 -0700 Subject: [PATCH 7/7] exec: Avoid pathological argc, envc, and bprm->p values Make sure nothing goes wrong with the string counters or the bprm's belief about the stack pointer. Add checks and matching self-tests. Take special care for !CONFIG_MMU, since argmin is not exposed there. For 32-bit validation, 32-bit UML was used: $ tools/testing/kunit/kunit.py run \ --make_options CROSS_COMPILE=i686-linux-gnu- \ --make_options SUBARCH=i386 \ exec For !MMU validation, m68k was used: $ tools/testing/kunit/kunit.py run \ --arch m68k --make_option CROSS_COMPILE=m68k-linux-gnu- \ exec Link: https://lore.kernel.org/r/20240520021615.741800-2-keescook@chromium.org Link: https://lore.kernel.org/r/20240621205046.4001362-2-kees@kernel.org Signed-off-by: Kees Cook --- fs/exec.c | 10 +++++++++- fs/exec_test.c | 28 +++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index b7bc63bfb907..5b580ff8d955 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -490,6 +490,9 @@ static inline int bprm_set_stack_limit(struct linux_binprm *bprm, unsigned long limit) { #ifdef CONFIG_MMU + /* Avoid a pathological bprm->p. */ + if (bprm->p < limit) + return -E2BIG; bprm->argmin = bprm->p - limit; #endif return 0; @@ -531,6 +534,9 @@ static int bprm_stack_limits(struct linux_binprm *bprm) * of argument strings even with small stacks */ limit = max_t(unsigned long, limit, ARG_MAX); + /* Reject totally pathological counts. */ + if (bprm->argc < 0 || bprm->envc < 0) + return -E2BIG; /* * We must account for the size of all the argv and envp pointers to * the argv and envp strings, since they will also take up space in @@ -544,7 +550,9 @@ static int bprm_stack_limits(struct linux_binprm *bprm) * argc can never be 0, to keep them from walking envp by accident. * See do_execveat_common(). */ - ptr_size = (max(bprm->argc, 1) + bprm->envc) * sizeof(void *); + if (check_add_overflow(max(bprm->argc, 1), bprm->envc, &ptr_size) || + check_mul_overflow(ptr_size, sizeof(void *), &ptr_size)) + return -E2BIG; if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; diff --git a/fs/exec_test.c b/fs/exec_test.c index 8fea0bf0b7f5..7c77d039680b 100644 --- a/fs/exec_test.c +++ b/fs/exec_test.c @@ -8,9 +8,34 @@ struct bprm_stack_limits_result { }; static const struct bprm_stack_limits_result bprm_stack_limits_results[] = { - /* Giant values produce -E2BIG */ + /* Negative argc/envc counts produce -E2BIG */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = INT_MIN, .envc = INT_MIN }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = 5, .envc = -1 }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = -1, .envc = 10 }, .expected_rc = -E2BIG }, + /* The max value of argc or envc is MAX_ARG_STRINGS. */ { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, .argc = INT_MAX, .envc = INT_MAX }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = MAX_ARG_STRINGS, .envc = MAX_ARG_STRINGS }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = 0, .envc = MAX_ARG_STRINGS }, .expected_rc = -E2BIG }, + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = MAX_ARG_STRINGS, .envc = 0 }, .expected_rc = -E2BIG }, + /* + * On 32-bit system these argc and envc counts, while likely impossible + * to represent within the associated TASK_SIZE, could overflow the + * limit calculation, and bypass the ptr_size <= limit check. + */ + { { .p = ULONG_MAX, .rlim_stack.rlim_cur = ULONG_MAX, + .argc = 0x20000001, .envc = 0x20000001 }, .expected_rc = -E2BIG }, +#ifdef CONFIG_MMU + /* Make sure a pathological bprm->p doesn't cause an overflow. */ + { { .p = sizeof(void *), .rlim_stack.rlim_cur = ULONG_MAX, + .argc = 10, .envc = 10 }, .expected_rc = -E2BIG }, +#endif /* * 0 rlim_stack will get raised to ARG_MAX. With 1 string pointer, * we should see p - ARG_MAX + sizeof(void *). @@ -88,6 +113,7 @@ static void exec_test_bprm_stack_limits(struct kunit *test) /* Double-check the constants. */ KUNIT_EXPECT_EQ(test, _STK_LIM, SZ_8M); KUNIT_EXPECT_EQ(test, ARG_MAX, 32 * SZ_4K); + KUNIT_EXPECT_EQ(test, MAX_ARG_STRINGS, 0x7FFFFFFF); for (int i = 0; i < ARRAY_SIZE(bprm_stack_limits_results); i++) { const struct bprm_stack_limits_result *result = &bprm_stack_limits_results[i];