diff --git a/Documentation/ABI/stable/vdso b/Documentation/ABI/stable/vdso index 951838d42781..85dbb6a160df 100644 --- a/Documentation/ABI/stable/vdso +++ b/Documentation/ABI/stable/vdso @@ -9,9 +9,11 @@ maps an ELF DSO into that program's address space. This DSO is called the vDSO and it often contains useful and highly-optimized alternatives to real syscalls. -These functions are called just like ordinary C function according to -your platform's ABI. Call them from a sensible context. (For example, -if you set CS on x86 to something strange, the vDSO functions are +These functions are called according to your platform's ABI. On many +platforms they are called just like ordinary C function. On other platforms +(ex: powerpc) they are called with the same convention as system calls which +is different from ordinary C functions. Call them from a sensible context. +(For example, if you set CS on x86 to something strange, the vDSO functions are within their rights to crash.) In addition, if you pass a bad pointer to a vDSO function, you might get SIGSEGV instead of -EFAULT. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ed15b876fa74..8f2df6ed745d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -263,6 +263,7 @@ config ARM64 select TRACE_IRQFLAGS_NMI_SUPPORT select HAVE_SOFTIRQ_ON_OWN_STACK select USER_STACKTRACE_SUPPORT + select VDSO_GETRANDOM help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/alternative-macros.h b/arch/arm64/include/asm/alternative-macros.h index d328f549b1a6..c8c77f9e36d6 100644 --- a/arch/arm64/include/asm/alternative-macros.h +++ b/arch/arm64/include/asm/alternative-macros.h @@ -230,7 +230,11 @@ alternative_has_cap_likely(const unsigned long cpucap) return false; asm goto( +#ifdef BUILD_VDSO + ALTERNATIVE("b %l[l_no]", "nop", %[cpucap]) +#else ALTERNATIVE_CB("b %l[l_no]", %[cpucap], alt_cb_patch_nops) +#endif : : [cpucap] "i" (cpucap) : diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 52791715f6e6..9e39217b4afb 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -2,9 +2,11 @@ #ifndef __ASM_MMAN_H__ #define __ASM_MMAN_H__ +#include + +#ifndef BUILD_VDSO #include #include -#include static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, unsigned long pkey) @@ -68,4 +70,6 @@ static inline bool arch_validate_flags(unsigned long vm_flags) } #define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) +#endif /* !BUILD_VDSO */ + #endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/vdso/getrandom.h b/arch/arm64/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..342f807e2044 --- /dev/null +++ b/arch/arm64/include/asm/vdso/getrandom.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register void *buffer asm ("x0") = _buffer; + register size_t len asm ("x1") = _len; + register unsigned int flags asm ("x2") = _flags; + register long ret asm ("x0"); + register long nr asm ("x8") = __NR_getrandom; + + asm volatile( + " svc #0\n" + : "=r" (ret) + : "r" (buffer), "r" (len), "r" (flags), "r" (nr) + : "memory"); + + return ret; +} + +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) +{ + /* + * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace + * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ + * PAGE_OFFSET points to the real VVAR page. + */ + if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) + return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT); + return &_vdso_rng_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/arm64/include/asm/vdso/vsyscall.h b/arch/arm64/include/asm/vdso/vsyscall.h index f94b1457c117..5b6d0dd3cef5 100644 --- a/arch/arm64/include/asm/vdso/vsyscall.h +++ b/arch/arm64/include/asm/vdso/vsyscall.h @@ -2,11 +2,19 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H +#define __VDSO_RND_DATA_OFFSET 480 + #ifndef __ASSEMBLY__ #include #include +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES, +}; + #define VDSO_PRECISION_MASK ~(0xFF00ULL<<48) extern struct vdso_data *vdso_data; @@ -21,6 +29,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __arm64_get_k_vdso_data +static __always_inline +struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void) +{ + return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; +} +#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data + static __always_inline void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk) { diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 89b6e7840002..706c9c3a7a50 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -34,12 +34,6 @@ enum vdso_abi { VDSO_ABI_AA32, }; -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - struct vdso_abi_info { const char *name; const char *vdso_code_start; diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d11da6461278..35685c036044 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -9,7 +9,7 @@ # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile -obj-vdso := vgettimeofday.o note.o sigreturn.o +obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg @@ -34,19 +34,28 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO # -Wmissing-prototypes and -Wmissing-declarations are removed from -# the CFLAGS of vgettimeofday.c to make possible to build the -# kernel with CONFIG_WERROR enabled. -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ - $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ - $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ - -Wmissing-prototypes -Wmissing-declarations +# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled. +CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \ + $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \ + $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \ + -Wmissing-prototypes -Wmissing-declarations -CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables +CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables + +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO) +CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO) + +CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO) +CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO) ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) endif +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif + targets += vdso.lds CPPFLAGS_vdso.lds += -P -C -U$(ARCH) diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 45354f2ddf70..f204a9ddc833 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -11,7 +11,9 @@ #include #include #include +#include #include +#include OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64") OUTPUT_ARCH(aarch64) @@ -19,6 +21,7 @@ OUTPUT_ARCH(aarch64) SECTIONS { PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); #ifdef CONFIG_TIME_NS PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); #endif @@ -102,6 +105,7 @@ VERSION __kernel_gettimeofday; __kernel_clock_gettime; __kernel_clock_getres; + __kernel_getrandom; local: *; }; } diff --git a/arch/arm64/kernel/vdso/vgetrandom-chacha.S b/arch/arm64/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..67890b445309 --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + + .text + +#define state0 v0 +#define state1 v1 +#define state2 v2 +#define state3 v3 +#define copy0 v4 +#define copy0_q q4 +#define copy1 v5 +#define copy2 v6 +#define copy3 v7 +#define copy3_d d7 +#define one_d d16 +#define one_q q16 +#define one_v v16 +#define tmp v17 +#define rot8 v18 + +/* + * ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive + * number of blocks of output with nonce 0, taking an input key and 8-bytes + * counter. Importantly does not spill to the stack. + * + * This implementation avoids d8-d15 because they are callee-save in user + * space. + * + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, + * const uint8_t *key, + * uint32_t *counter, + * size_t nblocks) + * + * x0: output bytes + * x1: 32-byte key input + * x2: 8-byte counter input/output + * x3: number of 64-byte block to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + + /* copy0 = "expand 32-byte k" */ + mov_q x8, 0x3320646e61707865 + mov_q x9, 0x6b20657479622d32 + mov copy0.d[0], x8 + mov copy0.d[1], x9 + + /* copy1,copy2 = key */ + ld1 { copy1.4s, copy2.4s }, [x1] + /* copy3 = counter || zero nonce */ + ld1 { copy3.2s }, [x2] + + movi one_v.2s, #1 + uzp1 one_v.4s, one_v.4s, one_v.4s + +.Lblock: + /* copy state to auxiliary vectors for the final add after the permute. */ + mov state0.16b, copy0.16b + mov state1.16b, copy1.16b + mov state2.16b, copy2.16b + mov state3.16b, copy3.16b + + mov w4, 20 +.Lpermute: + /* + * Permute one 64-byte block where the state matrix is stored in the four NEON + * registers state0-state3. It performs matrix operations on four words in parallel, + * but requires shuffling to rearrange the words after each round. + */ + +.Ldoubleround: + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ + add state0.4s, state0.4s, state1.4s + eor state3.16b, state3.16b, state0.16b + rev32 state3.8h, state3.8h + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #12 + sri state1.4s, tmp.4s, #20 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ + add state0.4s, state0.4s, state1.4s + eor tmp.16b, state3.16b, state0.16b + shl state3.4s, tmp.4s, #8 + sri state3.4s, tmp.4s, #24 + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #7 + sri state1.4s, tmp.4s, #25 + + /* state1[0,1,2,3] = state1[1,2,3,0] */ + ext state1.16b, state1.16b, state1.16b, #4 + /* state2[0,1,2,3] = state2[2,3,0,1] */ + ext state2.16b, state2.16b, state2.16b, #8 + /* state3[0,1,2,3] = state3[1,2,3,0] */ + ext state3.16b, state3.16b, state3.16b, #12 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */ + add state0.4s, state0.4s, state1.4s + eor state3.16b, state3.16b, state0.16b + rev32 state3.8h, state3.8h + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #12 + sri state1.4s, tmp.4s, #20 + + /* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */ + add state0.4s, state0.4s, state1.4s + eor tmp.16b, state3.16b, state0.16b + shl state3.4s, tmp.4s, #8 + sri state3.4s, tmp.4s, #24 + + /* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */ + add state2.4s, state2.4s, state3.4s + eor tmp.16b, state1.16b, state2.16b + shl state1.4s, tmp.4s, #7 + sri state1.4s, tmp.4s, #25 + + /* state1[0,1,2,3] = state1[3,0,1,2] */ + ext state1.16b, state1.16b, state1.16b, #12 + /* state2[0,1,2,3] = state2[2,3,0,1] */ + ext state2.16b, state2.16b, state2.16b, #8 + /* state3[0,1,2,3] = state3[1,2,3,0] */ + ext state3.16b, state3.16b, state3.16b, #4 + + subs w4, w4, #2 + b.ne .Ldoubleround + + /* output0 = state0 + state0 */ + add state0.4s, state0.4s, copy0.4s + /* output1 = state1 + state1 */ + add state1.4s, state1.4s, copy1.4s + /* output2 = state2 + state2 */ + add state2.4s, state2.4s, copy2.4s + /* output2 = state3 + state3 */ + add state3.4s, state3.4s, copy3.4s + st1 { state0.16b - state3.16b }, [x0] + + /* + * ++copy3.counter, the 'add' clears the upper half of the SIMD register + * which is the expected behaviour here. + */ + add copy3_d, copy3_d, one_d + + /* output += 64, --nblocks */ + add x0, x0, 64 + subs x3, x3, #1 + b.ne .Lblock + + /* counter = copy3.counter */ + st1 { copy3.2s }, [x2] + + /* Zero out the potentially sensitive regs, in case nothing uses these again. */ + movi state0.16b, #0 + movi state1.16b, #0 + movi state2.16b, #0 + movi state3.16b, #0 + movi copy1.16b, #0 + movi copy2.16b, #0 + ret +SYM_FUNC_END(__arch_chacha20_blocks_nostack) + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/vgetrandom.c b/arch/arm64/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..832fe195292b --- /dev/null +++ b/arch/arm64/kernel/vdso/vgetrandom.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +typeof(__cvdso_getrandom) __kernel_getrandom; + +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + if (alternative_has_cap_likely(ARM64_HAS_FPSIMD)) + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); + + if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) + return -ENOSYS; + return getrandom_syscall(buffer, len, flags); +} diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index 0e3abf7b0bd3..0eb0436ad4ce 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -191,6 +191,7 @@ config LOONGARCH select TRACE_IRQFLAGS_SUPPORT select USE_PERCPU_NUMA_NODE_ID select USER_STACKTRACE_SUPPORT + select VDSO_GETRANDOM select ZONE_DMA32 config 32BIT diff --git a/arch/loongarch/include/asm/vdso/getrandom.h b/arch/loongarch/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..02f36772541b --- /dev/null +++ b/arch/loongarch/include/asm/vdso/getrandom.h @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Xi Ruoyao . All Rights Reserved. + */ +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include +#include + +static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags) +{ + register long ret asm("a0"); + register long nr asm("a7") = __NR_getrandom; + register void *buffer asm("a0") = _buffer; + register size_t len asm("a1") = _len; + register unsigned int flags asm("a2") = _flags; + + asm volatile( + " syscall 0\n" + : "+r" (ret) + : "r" (nr), "r" (buffer), "r" (len), "r" (flags) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", + "memory"); + + return ret; +} + +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) +{ + return (const struct vdso_rng_data *)(get_vdso_data() + VVAR_LOONGARCH_PAGES_START * + PAGE_SIZE + offsetof(struct loongarch_vdso_data, rng_data)); +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h index 5a12309d9fb5..e31ac7474513 100644 --- a/arch/loongarch/include/asm/vdso/vdso.h +++ b/arch/loongarch/include/asm/vdso/vdso.h @@ -4,6 +4,9 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#ifndef _ASM_VDSO_VDSO_H +#define _ASM_VDSO_VDSO_H + #ifndef __ASSEMBLY__ #include @@ -16,6 +19,7 @@ struct vdso_pcpu_data { struct loongarch_vdso_data { struct vdso_pcpu_data pdata[NR_CPUS]; + struct vdso_rng_data rng_data; }; /* @@ -63,3 +67,5 @@ static inline unsigned long get_vdso_data(void) } #endif /* __ASSEMBLY__ */ + +#endif diff --git a/arch/loongarch/include/asm/vdso/vsyscall.h b/arch/loongarch/include/asm/vdso/vsyscall.h index 5de615383a22..b1273ce6f140 100644 --- a/arch/loongarch/include/asm/vdso/vsyscall.h +++ b/arch/loongarch/include/asm/vdso/vsyscall.h @@ -8,6 +8,7 @@ #include extern struct vdso_data *vdso_data; +extern struct vdso_rng_data *vdso_rng_data; /* * Update the vDSO data page to keep in sync with kernel timekeeping. @@ -19,6 +20,13 @@ struct vdso_data *__loongarch_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __loongarch_get_k_vdso_data +static __always_inline +struct vdso_rng_data *__loongarch_get_k_vdso_rng_data(void) +{ + return vdso_rng_data; +} +#define __arch_get_k_vdso_rng_data __loongarch_get_k_vdso_rng_data + /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c index 90dfccb41c14..f6fcc52aefae 100644 --- a/arch/loongarch/kernel/vdso.c +++ b/arch/loongarch/kernel/vdso.c @@ -37,6 +37,7 @@ static union { static struct page *vdso_pages[] = { NULL }; struct vdso_data *vdso_data = generic_vdso_data.data; struct vdso_pcpu_data *vdso_pdata = loongarch_vdso_data.vdata.pdata; +struct vdso_rng_data *vdso_rng_data = &loongarch_vdso_data.vdata.rng_data; static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile index d724d46b07c8..40c1175823d6 100644 --- a/arch/loongarch/vdso/Makefile +++ b/arch/loongarch/vdso/Makefile @@ -4,7 +4,8 @@ # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile -obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o +obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o vgetrandom.o \ + vgetrandom-chacha.o sigreturn.o # Common compiler flags between ABIs. ccflags-vdso := \ @@ -29,6 +30,10 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) endif +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif + # VDSO linker flags. ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S index 56ad855896de..6b441bde4026 100644 --- a/arch/loongarch/vdso/vdso.lds.S +++ b/arch/loongarch/vdso/vdso.lds.S @@ -62,6 +62,7 @@ VERSION __vdso_clock_getres; __vdso_clock_gettime; __vdso_gettimeofday; + __vdso_getrandom; __vdso_rt_sigreturn; local: *; }; diff --git a/arch/loongarch/vdso/vgetrandom-chacha.S b/arch/loongarch/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..7e86a50f6e85 --- /dev/null +++ b/arch/loongarch/vdso/vgetrandom-chacha.S @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 Xi Ruoyao . All Rights Reserved. + */ + +#include +#include +#include + +.text + +/* Salsa20 quarter-round */ +.macro QR a b c d + add.w \a, \a, \b + xor \d, \d, \a + rotri.w \d, \d, 16 + + add.w \c, \c, \d + xor \b, \b, \c + rotri.w \b, \b, 20 + + add.w \a, \a, \b + xor \d, \d, \a + rotri.w \d, \d, 24 + + add.w \c, \c, \d + xor \b, \b, \c + rotri.w \b, \b, 25 +.endm + +/* + * Very basic LoongArch implementation of ChaCha20. Produces a given positive + * number of blocks of output with a nonce of 0, taking an input key and + * 8-byte counter. Importantly does not spill to the stack. Its arguments + * are: + * + * a0: output bytes + * a1: 32-byte key input + * a2: 8-byte counter input/output + * a3: number of 64-byte blocks to write to output + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + +/* We don't need a frame pointer */ +#define s9 fp + +#define output a0 +#define key a1 +#define counter a2 +#define nblocks a3 +#define i a4 +#define state0 s0 +#define state1 s1 +#define state2 s2 +#define state3 s3 +#define state4 s4 +#define state5 s5 +#define state6 s6 +#define state7 s7 +#define state8 s8 +#define state9 s9 +#define state10 a5 +#define state11 a6 +#define state12 a7 +#define state13 t0 +#define state14 t1 +#define state15 t2 +#define cnt_lo t3 +#define cnt_hi t4 +#define copy0 t5 +#define copy1 t6 +#define copy2 t7 + +/* Reuse i as copy3 */ +#define copy3 i + + /* + * The ABI requires s0-s9 saved, and sp aligned to 16-byte. + * This does not violate the stack-less requirement: no sensitive data + * is spilled onto the stack. + */ + PTR_ADDI sp, sp, (-SZREG * 10) & STACK_ALIGN + REG_S s0, sp, 0 + REG_S s1, sp, SZREG + REG_S s2, sp, SZREG * 2 + REG_S s3, sp, SZREG * 3 + REG_S s4, sp, SZREG * 4 + REG_S s5, sp, SZREG * 5 + REG_S s6, sp, SZREG * 6 + REG_S s7, sp, SZREG * 7 + REG_S s8, sp, SZREG * 8 + REG_S s9, sp, SZREG * 9 + + li.w copy0, 0x61707865 + li.w copy1, 0x3320646e + li.w copy2, 0x79622d32 + + ld.w cnt_lo, counter, 0 + ld.w cnt_hi, counter, 4 + +.Lblock: + /* state[0,1,2,3] = "expand 32-byte k" */ + move state0, copy0 + move state1, copy1 + move state2, copy2 + li.w state3, 0x6b206574 + + /* state[4,5,..,11] = key */ + ld.w state4, key, 0 + ld.w state5, key, 4 + ld.w state6, key, 8 + ld.w state7, key, 12 + ld.w state8, key, 16 + ld.w state9, key, 20 + ld.w state10, key, 24 + ld.w state11, key, 28 + + /* state[12,13] = counter */ + move state12, cnt_lo + move state13, cnt_hi + + /* state[14,15] = 0 */ + move state14, zero + move state15, zero + + li.w i, 10 +.Lpermute: + /* odd round */ + QR state0, state4, state8, state12 + QR state1, state5, state9, state13 + QR state2, state6, state10, state14 + QR state3, state7, state11, state15 + + /* even round */ + QR state0, state5, state10, state15 + QR state1, state6, state11, state12 + QR state2, state7, state8, state13 + QR state3, state4, state9, state14 + + addi.w i, i, -1 + bnez i, .Lpermute + + /* + * copy[3] = "expa", materialize it here because copy[3] shares the + * same register with i which just became dead. + */ + li.w copy3, 0x6b206574 + + /* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */ + add.w state0, state0, copy0 + add.w state1, state1, copy1 + add.w state2, state2, copy2 + add.w state3, state3, copy3 + st.w state0, output, 0 + st.w state1, output, 4 + st.w state2, output, 8 + st.w state3, output, 12 + + /* from now on state[0,1,2,3] are scratch registers */ + + /* state[0,1,2,3] = lo32(key) */ + ld.w state0, key, 0 + ld.w state1, key, 4 + ld.w state2, key, 8 + ld.w state3, key, 12 + + /* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */ + add.w state4, state4, state0 + add.w state5, state5, state1 + add.w state6, state6, state2 + add.w state7, state7, state3 + st.w state4, output, 16 + st.w state5, output, 20 + st.w state6, output, 24 + st.w state7, output, 28 + + /* state[0,1,2,3] = hi32(key) */ + ld.w state0, key, 16 + ld.w state1, key, 20 + ld.w state2, key, 24 + ld.w state3, key, 28 + + /* output[8,9,10,11] = state[0,1,2,3] + state[8,9,10,11] */ + add.w state8, state8, state0 + add.w state9, state9, state1 + add.w state10, state10, state2 + add.w state11, state11, state3 + st.w state8, output, 32 + st.w state9, output, 36 + st.w state10, output, 40 + st.w state11, output, 44 + + /* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */ + add.w state12, state12, cnt_lo + add.w state13, state13, cnt_hi + st.w state12, output, 48 + st.w state13, output, 52 + st.w state14, output, 56 + st.w state15, output, 60 + + /* ++counter */ + addi.w cnt_lo, cnt_lo, 1 + sltui state0, cnt_lo, 1 + add.w cnt_hi, cnt_hi, state0 + + /* output += 64 */ + PTR_ADDI output, output, 64 + /* --nblocks */ + PTR_ADDI nblocks, nblocks, -1 + bnez nblocks, .Lblock + + /* counter = [cnt_lo, cnt_hi] */ + st.w cnt_lo, counter, 0 + st.w cnt_hi, counter, 4 + + /* + * Zero out the potentially sensitive regs, in case nothing uses these + * again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and + * state[0,...,9] are s0-s9 those we'll restore in the epilogue, so we + * only need to zero state[11,...,15]. + */ + move state10, zero + move state11, zero + move state12, zero + move state13, zero + move state14, zero + move state15, zero + + REG_L s0, sp, 0 + REG_L s1, sp, SZREG + REG_L s2, sp, SZREG * 2 + REG_L s3, sp, SZREG * 3 + REG_L s4, sp, SZREG * 4 + REG_L s5, sp, SZREG * 5 + REG_L s6, sp, SZREG * 6 + REG_L s7, sp, SZREG * 7 + REG_L s8, sp, SZREG * 8 + REG_L s9, sp, SZREG * 9 + PTR_ADDI sp, sp, -((-SZREG * 10) & STACK_ALIGN) + + jr ra +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/loongarch/vdso/vgetrandom.c b/arch/loongarch/vdso/vgetrandom.c new file mode 100644 index 000000000000..d5f258ac4a36 --- /dev/null +++ b/arch/loongarch/vdso/vgetrandom.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 Xi Ruoyao . All Rights Reserved. + */ +#include + +ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8a4ee57cd4ef..8623b601ceb5 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -311,6 +311,7 @@ config PPC select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT + select VDSO_GETRANDOM # # Please keep this list sorted alphabetically. # diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h index 17a77d47ed6d..42a51a993d94 100644 --- a/arch/powerpc/include/asm/mman.h +++ b/arch/powerpc/include/asm/mman.h @@ -6,7 +6,7 @@ #include -#ifdef CONFIG_PPC64 +#if defined(CONFIG_PPC64) && !defined(BUILD_VDSO) #include #include diff --git a/arch/powerpc/include/asm/vdso/getrandom.h b/arch/powerpc/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..501d6bb14e8a --- /dev/null +++ b/arch/powerpc/include/asm/vdso/getrandom.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Christophe Leroy , CS GROUP France + */ +#ifndef _ASM_POWERPC_VDSO_GETRANDOM_H +#define _ASM_POWERPC_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +static __always_inline int do_syscall_3(const unsigned long _r0, const unsigned long _r3, + const unsigned long _r4, const unsigned long _r5) +{ + register long r0 asm("r0") = _r0; + register unsigned long r3 asm("r3") = _r3; + register unsigned long r4 asm("r4") = _r4; + register unsigned long r5 asm("r5") = _r5; + register int ret asm ("r3"); + + asm volatile( + " sc\n" + " bns+ 1f\n" + " neg %0, %0\n" + "1:\n" + : "=r" (ret), "+r" (r4), "+r" (r5), "+r" (r0) + : "r" (r3) + : "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "cr0", "ctr"); + + return ret; +} + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) +{ + return do_syscall_3(__NR_getrandom, (unsigned long)buffer, + (unsigned long)len, (unsigned long)flags); +} + +static __always_inline struct vdso_rng_data *__arch_get_vdso_rng_data(void) +{ + return NULL; +} + +ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, + size_t opaque_len, const struct vdso_rng_data *vd); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_POWERPC_VDSO_GETRANDOM_H */ diff --git a/arch/powerpc/include/asm/vdso/vsyscall.h b/arch/powerpc/include/asm/vdso/vsyscall.h index 48cf23f1e273..92f480d8cc6d 100644 --- a/arch/powerpc/include/asm/vdso/vsyscall.h +++ b/arch/powerpc/include/asm/vdso/vsyscall.h @@ -17,6 +17,12 @@ struct vdso_data *__arch_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __arch_get_k_vdso_data +static __always_inline +struct vdso_rng_data *__arch_get_k_vdso_rng_data(void) +{ + return &vdso_data->rng_data; +} + /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index a585c8e538ff..248dee138f7b 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -83,6 +83,7 @@ struct vdso_arch_data { __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ struct vdso_data data[CS_BASES]; + struct vdso_rng_data rng_data; }; #else /* CONFIG_PPC64 */ @@ -95,6 +96,7 @@ struct vdso_arch_data { __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ struct vdso_data data[CS_BASES]; + struct vdso_rng_data rng_data; }; #endif /* CONFIG_PPC64 */ @@ -111,6 +113,21 @@ extern struct vdso_arch_data *vdso_data; addi \ptr, \ptr, (_vdso_datapage - 999b)@l .endm +#include +#include + +.macro get_realdatapage ptr scratch + get_datapage \ptr +#ifdef CONFIG_TIME_NS + lwz \scratch, VDSO_CLOCKMODE_OFFSET(\ptr) + xoris \scratch, \scratch, VDSO_CLOCKMODE_TIMENS@h + xori \scratch, \scratch, VDSO_CLOCKMODE_TIMENS@l + cntlzw \scratch, \scratch + rlwinm \scratch, \scratch, PAGE_SHIFT - 5, 1 << PAGE_SHIFT + add \ptr, \ptr, \scratch +#endif +.endm + #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 23733282de4d..131a8cc10dbe 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -335,6 +335,7 @@ int main(void) /* datapage offsets for use by vdso */ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); + OFFSET(VDSO_RNG_DATA_OFFSET, vdso_arch_data, rng_data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); @@ -346,6 +347,8 @@ int main(void) #else OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); #endif + OFFSET(VDSO_CLOCKMODE_OFFSET, vdso_arch_data, data[0].clock_mode); + DEFINE(VDSO_CLOCKMODE_TIMENS, VDSO_CLOCKMODE_TIMENS); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 1425b6edc66b..56fb1633529a 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -8,30 +8,21 @@ include $(srctree)/lib/vdso/Makefile obj-vdso32 = sigtramp32-32.o gettimeofday-32.o datapage-32.o cacheflush-32.o note-32.o getcpu-32.o obj-vdso64 = sigtramp64-64.o gettimeofday-64.o datapage-64.o cacheflush-64.o note-64.o getcpu-64.o +obj-vdso32 += getrandom-32.o vgetrandom-chacha-32.o +obj-vdso64 += getrandom-64.o vgetrandom-chacha-64.o + ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday-32.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-32.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-32.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-32.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-32.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-32.o = $(CC_FLAGS_FTRACE) - CFLAGS_REMOVE_vgettimeofday-32.o += -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc - # This flag is supported by clang for 64-bit but not 32-bit so it will cause - # an unused command line flag warning for this file. - ifdef CONFIG_CC_IS_CLANG - CFLAGS_REMOVE_vgettimeofday-32.o += -fno-stack-clash-protection - endif - CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) - CFLAGS_vgettimeofday-64.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) - CFLAGS_vgettimeofday-64.o += $(call cc-option, -fno-stack-protector) - CFLAGS_vgettimeofday-64.o += -DDISABLE_BRANCH_PROFILING - CFLAGS_vgettimeofday-64.o += -ffreestanding -fasynchronous-unwind-tables - CFLAGS_REMOVE_vgettimeofday-64.o = $(CC_FLAGS_FTRACE) # Go prior to 1.16.x assumes r30 is not clobbered by any VDSO code. That used to be true # by accident when the VDSO was hand-written asm code, but may not be now that the VDSO is # compiler generated. To avoid breaking Go tell GCC not to use r30. Impact on code # generation is minimal, it will just use r29 instead. - CFLAGS_vgettimeofday-64.o += $(call cc-option, -ffixed-r30) + CFLAGS_vgettimeofday-64.o += -include $(c-gettimeofday-y) $(call cc-option, -ffixed-r30) +endif + +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom-32.o += -include $(c-getrandom-y) + CFLAGS_vgetrandom-64.o += -include $(c-getrandom-y) $(call cc-option, -ffixed-r30) endif # Build rules @@ -42,12 +33,18 @@ else VDSOCC := $(CC) endif -targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o +targets := $(obj-vdso32) vdso32.so.dbg vgettimeofday-32.o vgetrandom-32.o +targets += crtsavres-32.o obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) -targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o +targets += $(obj-vdso64) vdso64.so.dbg vgettimeofday-64.o vgetrandom-64.o obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) -ccflags-y := -fno-common -fno-builtin +ccflags-y := -fno-common -fno-builtin -DBUILD_VDSO +ccflags-y += $(DISABLE_LATENT_ENTROPY_PLUGIN) +ccflags-y += $(call cc-option, -fno-stack-protector) +ccflags-y += -DDISABLE_BRANCH_PROFILING +ccflags-y += -ffreestanding -fasynchronous-unwind-tables +ccflags-remove-y := $(CC_FLAGS_FTRACE) ldflags-y := -Wl,--hash-style=both -nostdlib -shared -z noexecstack $(CLANG_FLAGS) ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) @@ -56,6 +53,12 @@ ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WAR ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS)) CC32FLAGS := -m32 +CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc + # This flag is supported by clang for 64-bit but not 32-bit so it will cause + # an unused command line flag warning for this file. +ifdef CONFIG_CC_IS_CLANG +CC32FLAGSREMOVE += -fno-stack-clash-protection +endif LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 AS32FLAGS := -D__VDSO32__ @@ -68,20 +71,26 @@ targets += vdso64.lds CPPFLAGS_vdso64.lds += -P -C # link rule for the .so file, .lds has to be first -$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o FORCE +$(obj)/vdso32.so.dbg: $(obj)/vdso32.lds $(obj-vdso32) $(obj)/vgettimeofday-32.o $(obj)/vgetrandom-32.o $(obj)/crtsavres-32.o FORCE $(call if_changed,vdso32ld_and_check) -$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso64.lds $(obj-vdso64) $(obj)/vgettimeofday-64.o $(obj)/vgetrandom-64.o FORCE $(call if_changed,vdso64ld_and_check) # assembly rules for the .S files $(obj-vdso32): %-32.o: %.S FORCE $(call if_changed_dep,vdso32as) +$(obj)/crtsavres-32.o: %-32.o: $(srctree)/arch/powerpc/lib/crtsavres.S FORCE + $(call if_changed_dep,vdso32as) $(obj)/vgettimeofday-32.o: %-32.o: %.c FORCE $(call if_changed_dep,vdso32cc) +$(obj)/vgetrandom-32.o: %-32.o: %.c FORCE + $(call if_changed_dep,vdso32cc) $(obj-vdso64): %-64.o: %.S FORCE $(call if_changed_dep,vdso64as) $(obj)/vgettimeofday-64.o: %-64.o: %.c FORCE $(call if_changed_dep,cc_o_c) +$(obj)/vgetrandom-64.o: %-64.o: %.c FORCE + $(call if_changed_dep,cc_o_c) # Generate VDSO offsets using helper script gen-vdso32sym := $(src)/gen_vdso32_offsets.sh @@ -102,7 +111,7 @@ quiet_cmd_vdso32ld_and_check = VDSO32L $@ quiet_cmd_vdso32as = VDSO32A $@ cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) $(AS32FLAGS) -c -o $@ $< quiet_cmd_vdso32cc = VDSO32C $@ - cmd_vdso32cc = $(VDSOCC) $(c_flags) $(CC32FLAGS) -c -o $@ $< + cmd_vdso32cc = $(VDSOCC) $(filter-out $(CC32FLAGSREMOVE), $(c_flags)) $(CC32FLAGS) -c -o $@ $< quiet_cmd_vdso64ld_and_check = VDSO64L $@ cmd_vdso64ld_and_check = $(VDSOCC) $(ldflags-y) $(LD64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^); $(cmd_vdso_check) diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index 0085ae464dac..3b2479bd2f9a 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -30,7 +30,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_datapage r10 + get_realdatapage r10, r11 mtlr r12 .cfi_restore lr #endif diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index db8e167f0166..2b19b6201a33 100644 --- a/arch/powerpc/kernel/vdso/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -28,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_datapage r3 + get_realdatapage r3, r11 mtlr r12 #ifdef __powerpc64__ addi r3,r3,CFG_SYSCALL_MAP64 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_datapage r3 + get_realdatapage r3, r11 #ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) #endif diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S new file mode 100644 index 000000000000..f3bbf931931c --- /dev/null +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -0,0 +1,58 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Userland implementation of getrandom() for processes + * for use in the vDSO + * + * Copyright (C) 2024 Christophe Leroy , CS GROUP France + */ +#include +#include +#include +#include +#include +#include + +/* + * The macro sets two stack frames, one for the caller and one for the callee + * because there are no requirement for the caller to set a stack frame when + * calling VDSO so it may have omitted to set one, especially on PPC64 + */ + +.macro cvdso_call funct + .cfi_startproc + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + mflr r0 + PPC_STLU r1, -PPC_MIN_STKFRM(r1) + .cfi_adjust_cfa_offset PPC_MIN_STKFRM + PPC_STL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) + .cfi_rel_offset lr, PPC_MIN_STKFRM + PPC_LR_STKOFF +#ifdef __powerpc64__ + PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT +#endif + get_realdatapage r8, r11 + addi r8, r8, VDSO_RNG_DATA_OFFSET + bl CFUNC(DOTSYM(\funct)) + PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) +#ifdef __powerpc64__ + PPC_LL r2, PPC_MIN_STKFRM + STK_GOT(r1) + .cfi_restore r2 +#endif + cmpwi r3, 0 + mtlr r0 + addi r1, r1, 2 * PPC_MIN_STKFRM + .cfi_restore lr + .cfi_def_cfa_offset 0 + crclr so + bgelr+ + crset so + neg r3, r3 + blr + .cfi_endproc +.endm + + .text +V_FUNCTION_BEGIN(__kernel_getrandom) + cvdso_call __c_kernel_getrandom +V_FUNCTION_END(__kernel_getrandom) diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 48fc6658053a..67254ac9c8bb 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -118,16 +118,3 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call __c_kernel_time call_time=1 V_FUNCTION_END(__kernel_time) - -/* Routines for restoring integer registers, called by the compiler. */ -/* Called with r11 pointing to the stack header word of the caller of the */ -/* function, just beyond the end of the integer restore area. */ -#ifndef __powerpc64__ -_GLOBAL(_restgpr_31_x) -_GLOBAL(_rest32gpr_31_x) - lwz r0,4(r11) - lwz r31,-4(r11) - mtlr r0 - mr r1,r11 - blr -#endif diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index 8f57107000a2..7b41d5d256e8 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -130,6 +130,7 @@ VERSION #if defined(CONFIG_PPC64) || !defined(CONFIG_SMP) __kernel_getcpu; #endif + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 400819258c06..9481e4b892ed 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -123,6 +123,7 @@ VERSION __kernel_sigtramp_rt64; __kernel_getcpu; __kernel_time; + __kernel_getrandom; local: *; }; diff --git a/arch/powerpc/kernel/vdso/vgetrandom-chacha.S b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S new file mode 100644 index 000000000000..7f9061a9e8b4 --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom-chacha.S @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Christophe Leroy , CS GROUP France + */ + +#include + +#include + +#define dst_bytes r3 +#define key r4 +#define counter r5 +#define nblocks r6 + +#define idx_r0 r0 +#define val4 r4 + +#define const0 0x61707865 +#define const1 0x3320646e +#define const2 0x79622d32 +#define const3 0x6b206574 + +#define key0 r5 +#define key1 r6 +#define key2 r7 +#define key3 r8 +#define key4 r9 +#define key5 r10 +#define key6 r11 +#define key7 r12 + +#define counter0 r14 +#define counter1 r15 + +#define state0 r16 +#define state1 r17 +#define state2 r18 +#define state3 r19 +#define state4 r20 +#define state5 r21 +#define state6 r22 +#define state7 r23 +#define state8 r24 +#define state9 r25 +#define state10 r26 +#define state11 r27 +#define state12 r28 +#define state13 r29 +#define state14 r30 +#define state15 r31 + +.macro quarterround4 a1 b1 c1 d1 a2 b2 c2 d2 a3 b3 c3 d3 a4 b4 c4 d4 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 16 + rotlwi \d2, \d2, 16 + rotlwi \d3, \d3, 16 + rotlwi \d4, \d4, 16 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 12 + rotlwi \b2, \b2, 12 + rotlwi \b3, \b3, 12 + rotlwi \b4, \b4, 12 + add \a1, \a1, \b1 + add \a2, \a2, \b2 + add \a3, \a3, \b3 + add \a4, \a4, \b4 + xor \d1, \d1, \a1 + xor \d2, \d2, \a2 + xor \d3, \d3, \a3 + xor \d4, \d4, \a4 + rotlwi \d1, \d1, 8 + rotlwi \d2, \d2, 8 + rotlwi \d3, \d3, 8 + rotlwi \d4, \d4, 8 + add \c1, \c1, \d1 + add \c2, \c2, \d2 + add \c3, \c3, \d3 + add \c4, \c4, \d4 + xor \b1, \b1, \c1 + xor \b2, \b2, \c2 + xor \b3, \b3, \c3 + xor \b4, \b4, \c4 + rotlwi \b1, \b1, 7 + rotlwi \b2, \b2, 7 + rotlwi \b3, \b3, 7 + rotlwi \b4, \b4, 7 +.endm + +#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4) \ + quarterround4 state##a1 state##b1 state##c1 state##d1 \ + state##a2 state##b2 state##c2 state##d2 \ + state##a3 state##b3 state##c3 state##d3 \ + state##a4 state##b4 state##c4 state##d4 + +/* + * Very basic 32 bits implementation of ChaCha20. Produces a given positive number + * of blocks of output with a nonce of 0, taking an input key and 8-byte + * counter. Importantly does not spill to the stack. Its arguments are: + * + * r3: output bytes + * r4: 32-byte key input + * r5: 8-byte counter input/output (saved on stack) + * r6: number of 64-byte blocks to write to output + * + * r0: counter of blocks (initialised with r6) + * r4: Value '4' after key has been read. + * r5-r12: key + * r14-r15: counter + * r16-r31: state + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) +#ifdef __powerpc64__ + std counter, -216(r1) + + std r14, -144(r1) + std r15, -136(r1) + std r16, -128(r1) + std r17, -120(r1) + std r18, -112(r1) + std r19, -104(r1) + std r20, -96(r1) + std r21, -88(r1) + std r22, -80(r1) + std r23, -72(r1) + std r24, -64(r1) + std r25, -56(r1) + std r26, -48(r1) + std r27, -40(r1) + std r28, -32(r1) + std r29, -24(r1) + std r30, -16(r1) + std r31, -8(r1) +#else + stwu r1, -96(r1) + stw counter, 20(r1) +#ifdef __BIG_ENDIAN__ + stmw r14, 24(r1) +#else + stw r14, 24(r1) + stw r15, 28(r1) + stw r16, 32(r1) + stw r17, 36(r1) + stw r18, 40(r1) + stw r19, 44(r1) + stw r20, 48(r1) + stw r21, 52(r1) + stw r22, 56(r1) + stw r23, 60(r1) + stw r24, 64(r1) + stw r25, 68(r1) + stw r26, 72(r1) + stw r27, 76(r1) + stw r28, 80(r1) + stw r29, 84(r1) + stw r30, 88(r1) + stw r31, 92(r1) +#endif +#endif /* __powerpc64__ */ + + lwz counter0, 0(counter) + lwz counter1, 4(counter) +#ifdef __powerpc64__ + rldimi counter0, counter1, 32, 0 +#endif + mr idx_r0, nblocks + subi dst_bytes, dst_bytes, 4 + + lwz key0, 0(key) + lwz key1, 4(key) + lwz key2, 8(key) + lwz key3, 12(key) + lwz key4, 16(key) + lwz key5, 20(key) + lwz key6, 24(key) + lwz key7, 28(key) + + li val4, 4 +.Lblock: + li r31, 10 + + lis state0, const0@ha + lis state1, const1@ha + lis state2, const2@ha + lis state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + mtctr r31 + + mr state4, key0 + mr state5, key1 + mr state6, key2 + mr state7, key3 + mr state8, key4 + mr state9, key5 + mr state10, key6 + mr state11, key7 + + mr state12, counter0 + mr state13, counter1 + + li state14, 0 + li state15, 0 + +.Lpermute: + QUARTERROUND4( 0, 4, 8,12, 1, 5, 9,13, 2, 6,10,14, 3, 7,11,15) + QUARTERROUND4( 0, 5,10,15, 1, 6,11,12, 2, 7, 8,13, 3, 4, 9,14) + + bdnz .Lpermute + + addis state0, state0, const0@ha + addis state1, state1, const1@ha + addis state2, state2, const2@ha + addis state3, state3, const3@ha + addi state0, state0, const0@l + addi state1, state1, const1@l + addi state2, state2, const2@l + addi state3, state3, const3@l + + add state4, state4, key0 + add state5, state5, key1 + add state6, state6, key2 + add state7, state7, key3 + add state8, state8, key4 + add state9, state9, key5 + add state10, state10, key6 + add state11, state11, key7 + + add state12, state12, counter0 + add state13, state13, counter1 + +#ifdef __BIG_ENDIAN__ + stwbrx state0, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state1, 0, dst_bytes + stwbrx state2, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state3, 0, dst_bytes + stwbrx state4, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state5, 0, dst_bytes + stwbrx state6, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state7, 0, dst_bytes + stwbrx state8, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state9, 0, dst_bytes + stwbrx state10, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state11, 0, dst_bytes + stwbrx state12, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state13, 0, dst_bytes + stwbrx state14, val4, dst_bytes + addi dst_bytes, dst_bytes, 8 + stwbrx state15, 0, dst_bytes +#else + stw state0, 4(dst_bytes) + stw state1, 8(dst_bytes) + stw state2, 12(dst_bytes) + stw state3, 16(dst_bytes) + stw state4, 20(dst_bytes) + stw state5, 24(dst_bytes) + stw state6, 28(dst_bytes) + stw state7, 32(dst_bytes) + stw state8, 36(dst_bytes) + stw state9, 40(dst_bytes) + stw state10, 44(dst_bytes) + stw state11, 48(dst_bytes) + stw state12, 52(dst_bytes) + stw state13, 56(dst_bytes) + stw state14, 60(dst_bytes) + stwu state15, 64(dst_bytes) +#endif + + subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */ + +#ifdef __powerpc64__ + addi counter0, counter0, 1 + srdi counter1, counter0, 32 +#else + addic counter0, counter0, 1 + addze counter1, counter1 +#endif + + bne .Lblock + +#ifdef __powerpc64__ + ld counter, -216(r1) +#else + lwz counter, 20(r1) +#endif + stw counter0, 0(counter) + stw counter1, 4(counter) + + li r6, 0 + li r7, 0 + li r8, 0 + li r9, 0 + li r10, 0 + li r11, 0 + li r12, 0 + +#ifdef __powerpc64__ + ld r14, -144(r1) + ld r15, -136(r1) + ld r16, -128(r1) + ld r17, -120(r1) + ld r18, -112(r1) + ld r19, -104(r1) + ld r20, -96(r1) + ld r21, -88(r1) + ld r22, -80(r1) + ld r23, -72(r1) + ld r24, -64(r1) + ld r25, -56(r1) + ld r26, -48(r1) + ld r27, -40(r1) + ld r28, -32(r1) + ld r29, -24(r1) + ld r30, -16(r1) + ld r31, -8(r1) +#else +#ifdef __BIG_ENDIAN__ + lmw r14, 24(r1) +#else + lwz r14, 24(r1) + lwz r15, 28(r1) + lwz r16, 32(r1) + lwz r17, 36(r1) + lwz r18, 40(r1) + lwz r19, 44(r1) + lwz r20, 48(r1) + lwz r21, 52(r1) + lwz r22, 56(r1) + lwz r23, 60(r1) + lwz r24, 64(r1) + lwz r25, 68(r1) + lwz r26, 72(r1) + lwz r27, 76(r1) + lwz r28, 80(r1) + lwz r29, 84(r1) + lwz r30, 88(r1) + lwz r31, 92(r1) +#endif + addi r1, r1, 96 +#endif /* __powerpc64__ */ + blr +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c new file mode 100644 index 000000000000..5f855d45fb7b --- /dev/null +++ b/arch/powerpc/kernel/vdso/vgetrandom.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Powerpc userspace implementation of getrandom() + * + * Copyright (C) 2024 Christophe Leroy , CS GROUP France + */ +#include +#include + +ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, + size_t opaque_len, const struct vdso_rng_data *vd) +{ + return __cvdso_getrandom_data(vd, buffer, len, flags, opaque_state, opaque_len); +} diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c60e699e99f5..b0d0b3a8d196 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -243,6 +243,7 @@ config S390 select TRACE_IRQFLAGS_SUPPORT select TTY select USER_STACKTRACE_SUPPORT + select VDSO_GETRANDOM select VIRT_CPU_ACCOUNTING select ZONE_DMA # Note: keep the above list sorted alphabetically diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index de980c938a3e..73e781b56bfe 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -39,11 +39,7 @@ #define ALT_TYPE_SHIFT 20 #define ALT_CTX_SHIFT 28 -#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ - ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ - (facility) << ALT_DATA_SHIFT) - -#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ +#define ALT_FACILITY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index b7d234838a36..715bcf8fb69a 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -14,7 +14,7 @@ #include #include #include - +#include #include #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) @@ -39,28 +39,51 @@ static inline void __clear_facility(unsigned long nr, void *facilities) ptr[nr >> 3] &= ~(0x80 >> (nr & 7)); } -static inline int __test_facility(unsigned long nr, void *facilities) +static __always_inline bool __test_facility(unsigned long nr, void *facilities) { unsigned char *ptr; if (nr >= MAX_FACILITY_BIT) - return 0; + return false; ptr = (unsigned char *) facilities + (nr >> 3); return (*ptr & (0x80 >> (nr & 7))) != 0; } +/* + * __test_facility_constant() generates a single instruction branch. If the + * tested facility is available (likely) the branch is patched into a nop. + * + * Do not use this function unless you know what you are doing. All users are + * supposed to use test_facility() which will do the right thing. + */ +static __always_inline bool __test_facility_constant(unsigned long nr) +{ + asm goto( + ALTERNATIVE("brcl 15,%l[l_no]", "brcl 0,0", ALT_FACILITY(%[nr])) + : + : [nr] "i" (nr) + : + : l_no); + return true; +l_no: + return false; +} + /* * The test_facility function uses the bit ordering where the MSB is bit 0. * That makes it easier to query facility bits with the bit number as * documented in the Principles of Operation. */ -static inline int test_facility(unsigned long nr) +static __always_inline bool test_facility(unsigned long nr) { unsigned long facilities_als[] = { FACILITIES_ALS }; - if (__builtin_constant_p(nr) && nr < sizeof(facilities_als) * 8) { - if (__test_facility(nr, &facilities_als)) - return 1; + if (!__is_defined(__DECOMPRESSOR) && __builtin_constant_p(nr)) { + if (nr < sizeof(facilities_als) * 8) { + if (__test_facility(nr, &facilities_als)) + return true; + } + return __test_facility_constant(nr); } return __test_facility(nr, &stfle_fac_list); } diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h index 02ccfe46050a..d296322be4bc 100644 --- a/arch/s390/include/asm/fpu-insn-asm.h +++ b/arch/s390/include/asm/fpu-insn-asm.h @@ -407,6 +407,28 @@ MRXBOPC 0, 0x0E, v1 .endm +/* VECTOR STORE BYTE REVERSED ELEMENTS */ + .macro VSTBR vr1, disp, index="%r0", base, m + VX_NUM v1, \vr1 + GR_NUM x2, \index + GR_NUM b2, \base + .word 0xE600 | ((v1&15) << 4) | (x2&15) + .word (b2 << 12) | (\disp) + MRXBOPC \m, 0x0E, v1 +.endm +.macro VSTBRH vr1, disp, index="%r0", base + VSTBR \vr1, \disp, \index, \base, 1 +.endm +.macro VSTBRF vr1, disp, index="%r0", base + VSTBR \vr1, \disp, \index, \base, 2 +.endm +.macro VSTBRG vr1, disp, index="%r0", base + VSTBR \vr1, \disp, \index, \base, 3 +.endm +.macro VSTBRQ vr1, disp, index="%r0", base + VSTBR \vr1, \disp, \index, \base, 4 +.endm + /* VECTOR STORE MULTIPLE */ .macro VSTM vfrom, vto, disp, base, hint=3 VX_NUM v1, \vfrom diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h index 9f1eea15872c..916ab59e458a 100644 --- a/arch/s390/include/asm/module.h +++ b/arch/s390/include/asm/module.h @@ -38,4 +38,18 @@ struct mod_arch_specific { #endif /* CONFIG_FUNCTION_TRACER */ }; +static inline const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) +{ + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + const Elf_Shdr *s, *se; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + } + return NULL; +} + #endif /* _ASM_S390_MODULE_H */ diff --git a/arch/s390/include/asm/vdso-symbols.h b/arch/s390/include/asm/vdso-symbols.h new file mode 100644 index 000000000000..0df17574d788 --- /dev/null +++ b/arch/s390/include/asm/vdso-symbols.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __S390_VDSO_SYMBOLS_H__ +#define __S390_VDSO_SYMBOLS_H__ + +#include +#ifdef CONFIG_COMPAT +#include +#endif + +#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) +#ifdef CONFIG_COMPAT +#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) +#else +#define VDSO32_SYMBOL(tsk, name) (-1UL) +#endif + +#endif /* __S390_VDSO_SYMBOLS_H__ */ diff --git a/arch/s390/include/asm/vdso.h b/arch/s390/include/asm/vdso.h index 53165aa7813a..91061f0279be 100644 --- a/arch/s390/include/asm/vdso.h +++ b/arch/s390/include/asm/vdso.h @@ -6,18 +6,6 @@ #ifndef __ASSEMBLY__ -#include -#ifdef CONFIG_COMPAT -#include -#endif - -#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) -#ifdef CONFIG_COMPAT -#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) -#else -#define VDSO32_SYMBOL(tsk, name) (-1UL) -#endif - extern struct vdso_data *vdso_data; int vdso_getcpu_init(void); diff --git a/arch/s390/include/asm/vdso/getrandom.h b/arch/s390/include/asm/vdso/getrandom.h new file mode 100644 index 000000000000..36355af7160b --- /dev/null +++ b/arch/s390/include/asm/vdso/getrandom.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __ASM_VDSO_GETRANDOM_H +#define __ASM_VDSO_GETRANDOM_H + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +/** + * getrandom_syscall - Invoke the getrandom() syscall. + * @buffer: Destination buffer to fill with random bytes. + * @len: Size of @buffer in bytes. + * @flags: Zero or more GRND_* flags. + * Returns: The number of random bytes written to @buffer, or a negative value indicating an error. + */ +static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags) +{ + return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags); +} + +static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void) +{ + /* + * The RNG data is in the real VVAR data page, but if a task belongs to a time namespace + * then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_ + * PAGE_OFFSET points to the real VVAR page. + */ + if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS) + return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE; + return &_vdso_rng_data; +} + +#endif /* !__ASSEMBLY__ */ + +#endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/s390/include/asm/vdso/vsyscall.h b/arch/s390/include/asm/vdso/vsyscall.h index 6c67c08cefdd..3c5d5e47814e 100644 --- a/arch/s390/include/asm/vdso/vsyscall.h +++ b/arch/s390/include/asm/vdso/vsyscall.h @@ -2,12 +2,21 @@ #ifndef __ASM_VDSO_VSYSCALL_H #define __ASM_VDSO_VSYSCALL_H +#define __VDSO_RND_DATA_OFFSET 768 + #ifndef __ASSEMBLY__ #include #include #include #include + +enum vvar_pages { + VVAR_DATA_PAGE_OFFSET, + VVAR_TIMENS_PAGE_OFFSET, + VVAR_NR_PAGES +}; + /* * Update the vDSO data page to keep in sync with kernel timekeeping. */ @@ -18,6 +27,12 @@ static __always_inline struct vdso_data *__s390_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __s390_get_k_vdso_data +static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void) +{ + return (void *)vdso_data + __VDSO_RND_DATA_OFFSET; +} +#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data + /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 1942e2a9f8db..5a86b9d1da71 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -24,11 +24,11 @@ #include #include #include +#include #include #include #include #include -#include #include #include "compat_linux.h" #include "compat_ptrace.h" diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 749410cfdbc0..269436665d02 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -42,7 +42,7 @@ _LPP_OFFSET = __LC_LPP .macro LPSWEY address, lpswe ALTERNATIVE_2 "b \lpswe;nopr", \ - ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \ + ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193), \ __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ ALT_LOWCORE .endm diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 6c2cb345402f..e48013cd832c 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -30,9 +30,9 @@ #include #include #include +#include #include #include -#include #include "entry.h" /* diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c index 2f967ac2b8e3..598b512cde01 100644 --- a/arch/s390/kernel/vdso.c +++ b/arch/s390/kernel/vdso.c @@ -12,12 +12,15 @@ #include #include #include +#include #include #include #include #include #include #include +#include +#include #include extern char vdso64_start[], vdso64_end[]; @@ -29,12 +32,6 @@ static union vdso_data_store vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = vdso_data_store.data; -enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, - VVAR_TIMENS_PAGE_OFFSET, - VVAR_NR_PAGES, -}; - #ifdef CONFIG_TIME_NS struct vdso_data *arch_get_vdso_data(void *vvar_page) { @@ -250,8 +247,25 @@ static struct page ** __init vdso_setup_pages(void *start, void *end) return pagelist; } +static void vdso_apply_alternatives(void) +{ + const struct elf64_shdr *alt, *shdr; + struct alt_instr *start, *end; + const struct elf64_hdr *hdr; + + hdr = (struct elf64_hdr *)vdso64_start; + shdr = (void *)hdr + hdr->e_shoff; + alt = find_section(hdr, shdr, ".altinstructions"); + if (!alt) + return; + start = (void *)hdr + alt->sh_offset; + end = (void *)hdr + alt->sh_offset + alt->sh_size; + apply_alternatives(start, end); +} + static int __init vdso_init(void) { + vdso_apply_alternatives(); vdso64_mapping.pages = vdso_setup_pages(vdso64_start, vdso64_end); if (IS_ENABLED(CONFIG_COMPAT)) vdso32_mapping.pages = vdso_setup_pages(vdso32_start, vdso32_end); diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile index ba19c0ca7c87..37bb4b761229 100644 --- a/arch/s390/kernel/vdso64/Makefile +++ b/arch/s390/kernel/vdso64/Makefile @@ -3,12 +3,17 @@ # Include the generic Makefile to check the built vdso. include $(srctree)/lib/vdso/Makefile -obj-vdso64 = vdso_user_wrapper.o note.o -obj-cvdso64 = vdso64_generic.o getcpu.o +obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o +obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK) CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE) +CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE) CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE) +ifneq ($(c-getrandom-y),) + CFLAGS_vgetrandom.o += -include $(c-getrandom-y) +endif + # Build rules targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg diff --git a/arch/s390/kernel/vdso64/vdso.h b/arch/s390/kernel/vdso64/vdso.h index 34c7a2312f9d..9e5397e7b590 100644 --- a/arch/s390/kernel/vdso64/vdso.h +++ b/arch/s390/kernel/vdso64/vdso.h @@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz); int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts); int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts); +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); #endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */ diff --git a/arch/s390/kernel/vdso64/vdso64.lds.S b/arch/s390/kernel/vdso64/vdso64.lds.S index 37e2a505e81d..753040a4b5ab 100644 --- a/arch/s390/kernel/vdso64/vdso64.lds.S +++ b/arch/s390/kernel/vdso64/vdso64.lds.S @@ -4,6 +4,7 @@ * library */ +#include #include #include @@ -13,6 +14,7 @@ OUTPUT_ARCH(s390:64-bit) SECTIONS { PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE); + PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET); #ifdef CONFIG_TIME_NS PROVIDE(_timens_data = _vdso_data + PAGE_SIZE); #endif @@ -42,6 +44,10 @@ SECTIONS .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } .rodata1 : { *(.rodata1) } + . = ALIGN(8); + .altinstructions : { *(.altinstructions) } + .altinstr_replacement : { *(.altinstr_replacement) } + .dynamic : { *(.dynamic) } :text :dynamic .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr @@ -140,6 +146,7 @@ VERSION __kernel_restart_syscall; __kernel_rt_sigreturn; __kernel_sigreturn; + __kernel_getrandom; local: *; }; } diff --git a/arch/s390/kernel/vdso64/vgetrandom-chacha.S b/arch/s390/kernel/vdso64/vgetrandom-chacha.S new file mode 100644 index 000000000000..d802b0a96f41 --- /dev/null +++ b/arch/s390/kernel/vdso64/vgetrandom-chacha.S @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include + +#define STATE0 %v0 +#define STATE1 %v1 +#define STATE2 %v2 +#define STATE3 %v3 +#define COPY0 %v4 +#define COPY1 %v5 +#define COPY2 %v6 +#define COPY3 %v7 +#define PERM4 %v16 +#define PERM8 %v17 +#define PERM12 %v18 +#define BEPERM %v19 +#define TMP0 %v20 +#define TMP1 %v21 +#define TMP2 %v22 +#define TMP3 %v23 + + .section .rodata + + .balign 128 +.Lconstants: + .long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral + .long 0x04050607,0x08090a0b,0x0c0d0e0f,0x00010203 # rotl 4 bytes + .long 0x08090a0b,0x0c0d0e0f,0x00010203,0x04050607 # rotl 8 bytes + .long 0x0c0d0e0f,0x00010203,0x04050607,0x08090a0b # rotl 12 bytes + .long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap + + .text +/* + * s390 ChaCha20 implementation meant for vDSO. Produces a given positive + * number of blocks of output with nonce 0, taking an input key and 8-bytes + * counter. Does not spill to the stack. + * + * void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, + * const uint8_t *key, + * uint32_t *counter, + * size_t nblocks) + */ +SYM_FUNC_START(__arch_chacha20_blocks_nostack) + larl %r1,.Lconstants + + /* COPY0 = "expand 32-byte k" */ + VL COPY0,0,,%r1 + + /* PERM4-PERM12,BEPERM = byte selectors for VPERM */ + VLM PERM4,BEPERM,16,%r1 + + /* COPY1,COPY2 = key */ + VLM COPY1,COPY2,0,%r3 + + /* COPY3 = counter || zero nonce */ + lg %r3,0(%r4) + VZERO COPY3 + VLVGG COPY3,%r3,0 + + lghi %r1,0 +.Lblock: + VLR STATE0,COPY0 + VLR STATE1,COPY1 + VLR STATE2,COPY2 + VLR STATE3,COPY3 + + lghi %r0,10 +.Ldoubleround: + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,16 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,12 + + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,8 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,7 + + /* STATE1[0,1,2,3] = STATE1[1,2,3,0] */ + VPERM STATE1,STATE1,STATE1,PERM4 + /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ + VPERM STATE2,STATE2,STATE2,PERM8 + /* STATE3[0,1,2,3] = STATE3[3,0,1,2] */ + VPERM STATE3,STATE3,STATE3,PERM12 + + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,16 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,12 + + /* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */ + VAF STATE0,STATE0,STATE1 + VX STATE3,STATE3,STATE0 + VERLLF STATE3,STATE3,8 + + /* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */ + VAF STATE2,STATE2,STATE3 + VX STATE1,STATE1,STATE2 + VERLLF STATE1,STATE1,7 + + /* STATE1[0,1,2,3] = STATE1[3,0,1,2] */ + VPERM STATE1,STATE1,STATE1,PERM12 + /* STATE2[0,1,2,3] = STATE2[2,3,0,1] */ + VPERM STATE2,STATE2,STATE2,PERM8 + /* STATE3[0,1,2,3] = STATE3[1,2,3,0] */ + VPERM STATE3,STATE3,STATE3,PERM4 + brctg %r0,.Ldoubleround + + /* OUTPUT0 = STATE0 + STATE0 */ + VAF STATE0,STATE0,COPY0 + /* OUTPUT1 = STATE1 + STATE1 */ + VAF STATE1,STATE1,COPY1 + /* OUTPUT2 = STATE2 + STATE2 */ + VAF STATE2,STATE2,COPY2 + /* OUTPUT2 = STATE3 + STATE3 */ + VAF STATE3,STATE3,COPY3 + + /* + * 32 bit wise little endian store to OUTPUT. If the vector + * enhancement facility 2 is not installed use the slow path. + */ + ALTERNATIVE "brc 0xf,.Lstoreslow", "nop", ALT_FACILITY(148) + VSTBRF STATE0,0,,%r2 + VSTBRF STATE1,16,,%r2 + VSTBRF STATE2,32,,%r2 + VSTBRF STATE3,48,,%r2 +.Lstoredone: + + /* ++COPY3.COUNTER */ + /* alsih %r3,1 */ + .insn rilu,0xcc0a00000000,%r3,1 + alcr %r3,%r1 + VLVGG COPY3,%r3,0 + + /* OUTPUT += 64, --NBLOCKS */ + aghi %r2,64 + brctg %r5,.Lblock + + /* COUNTER = COPY3.COUNTER */ + stg %r3,0(%r4) + + /* Zero out potentially sensitive regs */ + VZERO STATE0 + VZERO STATE1 + VZERO STATE2 + VZERO STATE3 + VZERO COPY1 + VZERO COPY2 + + /* Early exit if TMP0-TMP3 have not been used */ + ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148) + + VZERO TMP0 + VZERO TMP1 + VZERO TMP2 + VZERO TMP3 + + br %r14 + +.Lstoreslow: + /* Convert STATE to little endian format and store to OUTPUT */ + VPERM TMP0,STATE0,STATE0,BEPERM + VPERM TMP1,STATE1,STATE1,BEPERM + VPERM TMP2,STATE2,STATE2,BEPERM + VPERM TMP3,STATE3,STATE3,BEPERM + VSTM TMP0,TMP3,0,%r2 + j .Lstoredone +SYM_FUNC_END(__arch_chacha20_blocks_nostack) diff --git a/arch/s390/kernel/vdso64/vgetrandom.c b/arch/s390/kernel/vdso64/vgetrandom.c new file mode 100644 index 000000000000..b5268b507fb5 --- /dev/null +++ b/arch/s390/kernel/vdso64/vgetrandom.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "vdso.h" + +ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) +{ + if (test_facility(129)) + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); + if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) + return -ENOSYS; + return getrandom_syscall(buffer, len, flags); +} diff --git a/arch/x86/entry/vdso/vgetrandom.c b/arch/x86/entry/vdso/vgetrandom.c index 52d3c7faae2e..430862b8977c 100644 --- a/arch/x86/entry/vdso/vgetrandom.c +++ b/arch/x86/entry/vdso/vgetrandom.c @@ -6,8 +6,6 @@ #include "../../../../lib/vdso/getrandom.c" -ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); - ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len) { return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 6d83ceb7f1ba..b8fed8b8b9cc 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -38,6 +38,9 @@ struct vdso_data *arch_get_vdso_data(void *vvar_page) } #undef EMIT_VVAR +DEFINE_VVAR(struct vdso_data, _vdso_data); +DEFINE_VVAR_SINGLE(struct vdso_rng_data, _vdso_rng_data); + unsigned int vclocks_used __read_mostly; #if defined(CONFIG_X86_64) diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 0c92db84469d..6e4f8fae3ce9 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -5,6 +5,7 @@ #include #include +struct timespec64; /* some helper functions for xen and kvm pv clock sources */ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src); u64 pvclock_clocksource_read_nowd(struct pvclock_vcpu_time_info *src); diff --git a/arch/x86/include/asm/vdso/getrandom.h b/arch/x86/include/asm/vdso/getrandom.h index b96e674cafde..ff5334ad32a0 100644 --- a/arch/x86/include/asm/vdso/getrandom.h +++ b/arch/x86/include/asm/vdso/getrandom.h @@ -37,19 +37,6 @@ static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void return &__vdso_rng_data; } -/** - * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. - * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. - * @key: 32-byte input key. - * @counter: 8-byte counter, read on input and updated on return. - * @nblocks: Number of blocks to generate. - * - * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write - * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data - * leaking into forked child processes. - */ -extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); - #endif /* !__ASSEMBLY__ */ #endif /* __ASM_VDSO_GETRANDOM_H */ diff --git a/arch/x86/include/asm/vdso/vsyscall.h b/arch/x86/include/asm/vdso/vsyscall.h index 972415a8be31..67fedf1698b5 100644 --- a/arch/x86/include/asm/vdso/vsyscall.h +++ b/arch/x86/include/asm/vdso/vsyscall.h @@ -9,9 +9,6 @@ #include #include -DEFINE_VVAR(struct vdso_data, _vdso_data); -DEFINE_VVAR_SINGLE(struct vdso_rng_data, _vdso_rng_data); - /* * Update the vDSO data page to keep in sync with kernel timekeeping. */ @@ -22,6 +19,13 @@ struct vdso_data *__x86_get_k_vdso_data(void) } #define __arch_get_k_vdso_data __x86_get_k_vdso_data +static __always_inline +struct vdso_rng_data *__x86_get_k_vdso_rng_data(void) +{ + return &_vdso_rng_data; +} +#define __arch_get_k_vdso_rng_data __x86_get_k_vdso_rng_data + /* The asm-generic header needs to be included after the definitions above */ #include diff --git a/drivers/char/random.c b/drivers/char/random.c index 87fe61295ea1..23ee76bbb4aa 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -59,6 +59,7 @@ #ifdef CONFIG_VDSO_GETRANDOM #include #include +#include #endif #include #include @@ -281,8 +282,15 @@ static void crng_reseed(struct work_struct *work) * former to arrive at the latter. Use smp_store_release so that this * is ordered with the write above to base_crng.generation. Pairs with * the smp_rmb() before the syscall in the vDSO code. + * + * Cast to unsigned long for 32-bit architectures, since atomic 64-bit + * operations are not supported on those architectures. This is safe + * because base_crng.generation is a 32-bit value. On big-endian + * architectures it will be stored in the upper 32 bits, but that's okay + * because the vDSO side only checks whether the value changed, without + * actually using or interpreting the value. */ - smp_store_release(&_vdso_rng_data.generation, next_gen + 1); + smp_store_release((unsigned long *)&__arch_get_k_vdso_rng_data()->generation, next_gen + 1); #endif if (!static_branch_likely(&crng_is_ready)) crng_init = CRNG_READY; @@ -735,7 +743,7 @@ static void __cold _credit_init_bits(size_t bits) queue_work(system_unbound_wq, &set_ready); atomic_notifier_call_chain(&random_ready_notifier, 0, NULL); #ifdef CONFIG_VDSO_GETRANDOM - WRITE_ONCE(_vdso_rng_data.is_ready, true); + WRITE_ONCE(__arch_get_k_vdso_rng_data()->is_ready, true); #endif wake_up_interruptible(&crng_init_wait); kill_fasync(&fasync, SIGIO, POLL_IN); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2c5f4814aef9..ade74a396968 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -989,8 +989,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) #ifdef CONFIG_X86_USER_SHADOW_STACK [ilog2(VM_SHADOW_STACK)] = "ss", #endif -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) [ilog2(VM_DROPPABLE)] = "dp", +#endif +#ifdef CONFIG_64BIT [ilog2(VM_SEALED)] = "sl", #endif }; diff --git a/include/asm-generic/unaligned.h b/include/asm-generic/unaligned.h index a84c64e5f11e..95acdd70b3b2 100644 --- a/include/asm-generic/unaligned.h +++ b/include/asm-generic/unaligned.h @@ -8,16 +8,7 @@ */ #include #include - -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ -}) - -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ -} while (0) +#include #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) #define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5be52b7ac573..13bff7cf03b7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -367,7 +367,7 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ -#elif defined(CONFIG_PPC) +#elif defined(CONFIG_PPC64) # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 @@ -417,6 +417,8 @@ extern unsigned int kobjsize(const void *objp); #ifdef CONFIG_64BIT #define VM_DROPPABLE_BIT 40 #define VM_DROPPABLE BIT(VM_DROPPABLE_BIT) +#elif defined(CONFIG_PPC32) +#define VM_DROPPABLE VM_ARCH_1 #else #define VM_DROPPABLE VM_NONE #endif diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index b63d211bd141..37265977d524 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -143,7 +143,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #if defined(CONFIG_X86) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } -#elif defined(CONFIG_PPC) +#elif defined(CONFIG_PPC64) #define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } #elif defined(CONFIG_PARISC) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } @@ -165,7 +165,7 @@ IF_HAVE_PG_ARCH_X(arch_3) # define IF_HAVE_UFFD_MINOR(flag, name) #endif -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) || defined(CONFIG_PPC32) # define IF_HAVE_VM_DROPPABLE(flag, name) {flag, name}, #else # define IF_HAVE_VM_DROPPABLE(flag, name) diff --git a/include/vdso/getrandom.h b/include/vdso/getrandom.h index a8b7c14b0ae0..6ca4d6de9e46 100644 --- a/include/vdso/getrandom.h +++ b/include/vdso/getrandom.h @@ -43,4 +43,32 @@ struct vgetrandom_state { bool in_use; }; +/** + * __arch_chacha20_blocks_nostack - Generate ChaCha20 stream without using the stack. + * @dst_bytes: Destination buffer to hold @nblocks * 64 bytes of output. + * @key: 32-byte input key. + * @counter: 8-byte counter, read on input and updated on return. + * @nblocks: Number of blocks to generate. + * + * Generates a given positive number of blocks of ChaCha20 output with nonce=0, and does not write + * to any stack or memory outside of the parameters passed to it, in order to mitigate stack data + * leaking into forked child processes. + */ +extern void __arch_chacha20_blocks_nostack(u8 *dst_bytes, const u32 *key, u32 *counter, size_t nblocks); + +/** + * __vdso_getrandom - Architecture-specific vDSO implementation of getrandom() syscall. + * @buffer: Passed to __cvdso_getrandom(). + * @len: Passed to __cvdso_getrandom(). + * @flags: Passed to __cvdso_getrandom(). + * @opaque_state: Passed to __cvdso_getrandom(). + * @opaque_len: Passed to __cvdso_getrandom(); + * + * This function is implemented by making a single call to to __cvdso_getrandom(), whose + * documentation may be consulted for more information. + * + * Returns: The return value of __cvdso_getrandom(). + */ +extern ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len); + #endif /* _VDSO_GETRANDOM_H */ diff --git a/include/vdso/helpers.h b/include/vdso/helpers.h index 73501149439d..3ddb03bb05cb 100644 --- a/include/vdso/helpers.h +++ b/include/vdso/helpers.h @@ -4,6 +4,7 @@ #ifndef __ASSEMBLY__ +#include #include static __always_inline u32 vdso_read_begin(const struct vdso_data *vd) diff --git a/include/vdso/unaligned.h b/include/vdso/unaligned.h new file mode 100644 index 000000000000..eee3d2a4dbe4 --- /dev/null +++ b/include/vdso/unaligned.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_UNALIGNED_H +#define __VDSO_UNALIGNED_H + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#endif /* __VDSO_UNALIGNED_H */ diff --git a/lib/vdso/Makefile b/lib/vdso/Makefile index 9f031eafc465..cedbf15f8087 100644 --- a/lib/vdso/Makefile +++ b/lib/vdso/Makefile @@ -4,6 +4,7 @@ GENERIC_VDSO_MK_PATH := $(abspath $(lastword $(MAKEFILE_LIST))) GENERIC_VDSO_DIR := $(dir $(GENERIC_VDSO_MK_PATH)) c-gettimeofday-$(CONFIG_GENERIC_GETTIMEOFDAY) := $(addprefix $(GENERIC_VDSO_DIR), gettimeofday.c) +c-getrandom-$(CONFIG_VDSO_GETRANDOM) := $(addprefix $(GENERIC_VDSO_DIR), getrandom.c) # This cmd checks that the vdso library does not contain dynamic relocations. # It has to be called after the linking of the vdso library and requires it diff --git a/lib/vdso/getrandom.c b/lib/vdso/getrandom.c index e1db228bc4f0..938ca539aaa6 100644 --- a/lib/vdso/getrandom.c +++ b/lib/vdso/getrandom.c @@ -3,15 +3,19 @@ * Copyright (C) 2022-2024 Jason A. Donenfeld . All Rights Reserved. */ -#include -#include -#include +#include +#include #include #include +#include #include -#include -#include #include +#include + +#undef PAGE_SIZE +#undef PAGE_MASK +#define PAGE_SIZE (1UL << CONFIG_PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) #define MEMCPY_AND_ZERO_SRC(type, dst, src, len) do { \ while (len >= sizeof(type)) { \ @@ -68,16 +72,17 @@ __cvdso_getrandom_data(const struct vdso_rng_data *rng_info, void *buffer, size_ struct vgetrandom_state *state = opaque_state; size_t batch_len, nblocks, orig_len = len; bool in_use, have_retried = false; - unsigned long current_generation; void *orig_buffer = buffer; + u64 current_generation; u32 counter[2] = { 0 }; if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags)) { - *(struct vgetrandom_opaque_params *)opaque_state = (struct vgetrandom_opaque_params) { - .size_of_opaque_state = sizeof(*state), - .mmap_prot = PROT_READ | PROT_WRITE, - .mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS - }; + struct vgetrandom_opaque_params *params = opaque_state; + params->size_of_opaque_state = sizeof(*state); + params->mmap_prot = PROT_READ | PROT_WRITE; + params->mmap_flags = MAP_DROPPABLE | MAP_ANONYMOUS; + for (size_t i = 0; i < ARRAY_SIZE(params->reserved); ++i) + params->reserved[i] = 0; return 0; } diff --git a/tools/arch/arm64/vdso b/tools/arch/arm64/vdso new file mode 120000 index 000000000000..233c7a26f6e5 --- /dev/null +++ b/tools/arch/arm64/vdso @@ -0,0 +1 @@ +../../../arch/arm64/kernel/vdso \ No newline at end of file diff --git a/tools/arch/loongarch/vdso b/tools/arch/loongarch/vdso new file mode 120000 index 000000000000..ebda43a82db7 --- /dev/null +++ b/tools/arch/loongarch/vdso @@ -0,0 +1 @@ +../../../arch/loongarch/vdso \ No newline at end of file diff --git a/tools/arch/powerpc/vdso b/tools/arch/powerpc/vdso new file mode 120000 index 000000000000..4e676d1d1cb4 --- /dev/null +++ b/tools/arch/powerpc/vdso @@ -0,0 +1 @@ +../../../arch/powerpc/kernel/vdso \ No newline at end of file diff --git a/tools/arch/s390/vdso b/tools/arch/s390/vdso new file mode 120000 index 000000000000..6cf4c1cebdcd --- /dev/null +++ b/tools/arch/s390/vdso @@ -0,0 +1 @@ +../../../arch/s390/kernel/vdso64 \ No newline at end of file diff --git a/tools/arch/x86/vdso b/tools/arch/x86/vdso new file mode 120000 index 000000000000..7eb962fd3454 --- /dev/null +++ b/tools/arch/x86/vdso @@ -0,0 +1 @@ +../../../arch/x86/entry/vdso/ \ No newline at end of file diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h index 7ce02a223732..8e548ac8f740 100644 --- a/tools/include/asm/alternative.h +++ b/tools/include/asm/alternative.h @@ -2,8 +2,18 @@ #ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H #define _TOOLS_ASM_ALTERNATIVE_ASM_H +#if defined(__s390x__) +#ifdef __ASSEMBLY__ +.macro ALTERNATIVE oldinstr, newinstr, feature + \oldinstr +.endm +#endif +#else + /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ #define ALTERNATIVE # #endif + +#endif diff --git a/tools/include/generated/asm-offsets.h b/tools/include/generated/asm-offsets.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/include/generated/asm/cpucap-defs.h b/tools/include/generated/asm/cpucap-defs.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/include/generated/asm/sysreg-defs.h b/tools/include/generated/asm/sysreg-defs.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 6f7f22ac9da5..4366da278033 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -2,6 +2,8 @@ #ifndef _TOOLS_LINUX_COMPILER_H_ #define _TOOLS_LINUX_COMPILER_H_ +#ifndef __ASSEMBLY__ + #include #ifndef __compiletime_error @@ -224,4 +226,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#endif /* __ASSEMBLY__ */ + #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h index bc763d500262..a48ff086899c 100644 --- a/tools/include/linux/linkage.h +++ b/tools/include/linux/linkage.h @@ -1,4 +1,8 @@ #ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H #define _TOOLS_INCLUDE_LINUX_LINKAGE_H +#define SYM_FUNC_START(x) .globl x; x: + +#define SYM_FUNC_END(x) + #endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 3de8e7e052ae..af9cedbf5357 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -1,7 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -uname_M := $(shell uname -m 2>/dev/null || echo not) -ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) -SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null) +include ../../../scripts/Makefile.arch TEST_GEN_PROGS := vdso_test_gettimeofday TEST_GEN_PROGS += vdso_test_getcpu @@ -11,14 +9,12 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness -ifeq ($(uname_M),x86_64) +ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc s390)) TEST_GEN_PROGS += vdso_test_getrandom -ifneq ($(SODIUM),) TEST_GEN_PROGS += vdso_test_chacha endif -endif -CFLAGS := -std=gnu99 +CFLAGS := -std=gnu99 -O2 ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s @@ -38,11 +34,12 @@ $(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl $(OUTPUT)/vdso_test_getrandom: parse_vdso.c $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ + $(KHDR_INCLUDES) \ -isystem $(top_srcdir)/include/uapi -$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/arch/$(ARCH)/entry/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ - -isystem $(top_srcdir)/arch/$(ARCH)/include \ - -isystem $(top_srcdir)/include \ - -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \ - -Wa,--noexecstack $(SODIUM) + -idirafter $(top_srcdir)/tools/include/generated \ + -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \ + -idirafter $(top_srcdir)/include \ + -D__ASSEMBLY__ -Wa,--noexecstack diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 4ae417372e9e..7dd5668ea8a6 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -36,6 +36,12 @@ #define ELF_BITS_XFORM(bits, x) ELF_BITS_XFORM2(bits, x) #define ELF(x) ELF_BITS_XFORM(ELF_BITS, x) +#ifdef __s390x__ +#define ELF_HASH_ENTRY ELF(Xword) +#else +#define ELF_HASH_ENTRY ELF(Word) +#endif + static struct vdso_info { bool valid; @@ -47,8 +53,8 @@ static struct vdso_info /* Symbol table */ ELF(Sym) *symtab; const char *symstrings; - ELF(Word) *bucket, *chain; - ELF(Word) nbucket, nchain; + ELF_HASH_ENTRY *bucket, *chain; + ELF_HASH_ENTRY nbucket, nchain; /* Version table */ ELF(Versym) *versym; @@ -115,7 +121,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) /* * Fish out the useful bits of the dynamic table. */ - ELF(Word) *hash = 0; + ELF_HASH_ENTRY *hash = 0; vdso_info.symstrings = 0; vdso_info.symtab = 0; vdso_info.versym = 0; @@ -133,7 +139,7 @@ void vdso_init_from_sysinfo_ehdr(uintptr_t base) + vdso_info.load_offset); break; case DT_HASH: - hash = (ELF(Word) *) + hash = (ELF_HASH_ENTRY *) ((uintptr_t)dyn[i].d_un.d_ptr + vdso_info.load_offset); break; @@ -216,7 +222,8 @@ void *vdso_sym(const char *version, const char *name) ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC && + ELF64_ST_TYPE(sym->st_info) != STT_NOTYPE) continue; if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK) diff --git a/tools/testing/selftests/vDSO/vdso_call.h b/tools/testing/selftests/vDSO/vdso_call.h new file mode 100644 index 000000000000..bb237d771051 --- /dev/null +++ b/tools/testing/selftests/vDSO/vdso_call.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Macro to call vDSO functions + * + * Copyright (C) 2024 Christophe Leroy , CS GROUP France + */ +#ifndef __VDSO_CALL_H__ +#define __VDSO_CALL_H__ + +#ifdef __powerpc__ + +#define LOADARGS_1(fn, __arg1) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ +} while (0) + +#define LOADARGS_2(fn, __arg1, __arg2) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ + _r4 = (long)__arg2; \ +} while (0) + +#define LOADARGS_3(fn, __arg1, __arg2, __arg3) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ + _r4 = (long)__arg2; \ + _r5 = (long)__arg3; \ +} while (0) + +#define LOADARGS_5(fn, __arg1, __arg2, __arg3, __arg4, __arg5) do { \ + _r0 = fn; \ + _r3 = (long)__arg1; \ + _r4 = (long)__arg2; \ + _r5 = (long)__arg3; \ + _r6 = (long)__arg4; \ + _r7 = (long)__arg5; \ +} while (0) + +#define VDSO_CALL(fn, nr, args...) ({ \ + register void *_r0 asm ("r0"); \ + register long _r3 asm ("r3"); \ + register long _r4 asm ("r4"); \ + register long _r5 asm ("r5"); \ + register long _r6 asm ("r6"); \ + register long _r7 asm ("r7"); \ + register long _r8 asm ("r8"); \ + register long _rval asm ("r3"); \ + \ + LOADARGS_##nr(fn, args); \ + \ + asm volatile( \ + " mtctr %0\n" \ + " bctrl\n" \ + " bns+ 1f\n" \ + " neg 3, 3\n" \ + "1:" \ + : "+r" (_r0), "=r" (_r3), "+r" (_r4), "+r" (_r5), \ + "+r" (_r6), "+r" (_r7), "+r" (_r8) \ + : "r" (_rval) \ + : "r9", "r10", "r11", "r12", "cr0", "cr1", "cr5", \ + "cr6", "cr7", "xer", "lr", "ctr", "memory" \ + ); \ + _rval; \ +}) + +#else +#define VDSO_CALL(fn, nr, args...) fn(args) +#endif + +#endif diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index 7b543e7f04d7..722260f97561 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -18,18 +18,18 @@ #elif defined(__aarch64__) #define VDSO_VERSION 3 #define VDSO_NAMES 0 +#elif defined(__powerpc64__) +#define VDSO_VERSION 1 +#define VDSO_NAMES 0 #elif defined(__powerpc__) #define VDSO_VERSION 1 #define VDSO_NAMES 0 #define VDSO_32BIT 1 -#elif defined(__powerpc64__) -#define VDSO_VERSION 1 -#define VDSO_NAMES 0 -#elif defined (__s390__) +#elif defined (__s390__) && !defined(__s390x__) #define VDSO_VERSION 2 #define VDSO_NAMES 0 #define VDSO_32BIT 1 -#elif defined (__s390X__) +#elif defined (__s390x__) #define VDSO_VERSION 2 #define VDSO_NAMES 0 #elif defined(__mips__) @@ -68,16 +68,15 @@ static const char *versions[7] = { "LINUX_5.10" }; -static const char *names[2][6] = { +static const char *names[2][7] = { { "__kernel_gettimeofday", "__kernel_clock_gettime", "__kernel_time", "__kernel_clock_getres", "__kernel_getcpu", -#if defined(VDSO_32BIT) "__kernel_clock_gettime64", -#endif + "__kernel_getrandom", }, { "__vdso_gettimeofday", @@ -85,9 +84,8 @@ static const char *names[2][6] = { "__vdso_time", "__vdso_clock_getres", "__vdso_getcpu", -#if defined(VDSO_32BIT) "__vdso_clock_gettime64", -#endif + "__vdso_getrandom", }, }; diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c index 96d32fd65b42..a54424e2336f 100644 --- a/tools/testing/selftests/vDSO/vdso_test_abi.c +++ b/tools/testing/selftests/vDSO/vdso_test_abi.c @@ -20,10 +20,8 @@ #include "../kselftest.h" #include "vdso_config.h" - -extern void *vdso_sym(const char *version, const char *name); -extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); -extern void vdso_init_from_auxv(void *auxv); +#include "vdso_call.h" +#include "parse_vdso.h" static const char *version; static const char **name; @@ -61,7 +59,7 @@ static void vdso_test_gettimeofday(void) } struct timeval tv; - long ret = vdso_gettimeofday(&tv, 0); + long ret = VDSO_CALL(vdso_gettimeofday, 2, &tv, 0); if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", @@ -86,7 +84,7 @@ static void vdso_test_clock_gettime(clockid_t clk_id) } struct timespec ts; - long ret = vdso_clock_gettime(clk_id, &ts); + long ret = VDSO_CALL(vdso_clock_gettime, 2, clk_id, &ts); if (ret == 0) { ksft_print_msg("The time is %lld.%06lld\n", @@ -111,7 +109,7 @@ static void vdso_test_time(void) return; } - long ret = vdso_time(NULL); + long ret = VDSO_CALL(vdso_time, 1, NULL); if (ret > 0) { ksft_print_msg("The time in hours since January 1, 1970 is %lld\n", @@ -138,7 +136,7 @@ static void vdso_test_clock_getres(clockid_t clk_id) } struct timespec ts, sys_ts; - long ret = vdso_clock_getres(clk_id, &ts); + long ret = VDSO_CALL(vdso_clock_getres, 2, clk_id, &ts); if (ret == 0) { ksft_print_msg("The vdso resolution is %lld %lld\n", diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index e38f44e5f803..b1ea532c5996 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -3,23 +3,90 @@ * Copyright (C) 2022-2024 Jason A. Donenfeld . All Rights Reserved. */ -#include +#include #include +#include #include #include +#include #include "../kselftest.h" -extern void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint8_t *key, uint32_t *counter, size_t nblocks); +#if defined(__aarch64__) +static bool cpu_has_capabilities(void) +{ + return getauxval(AT_HWCAP) & HWCAP_ASIMD; +} +#elif defined(__s390x__) +static bool cpu_has_capabilities(void) +{ + return getauxval(AT_HWCAP) & HWCAP_S390_VXRS; +} +#else +static bool cpu_has_capabilities(void) +{ + return true; +} +#endif + +static uint32_t rol32(uint32_t word, unsigned int shift) +{ + return (word << (shift & 31)) | (word >> ((-shift) & 31)); +} + +static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) +{ + uint32_t s[16] = { + 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U, + key[0], key[1], key[2], key[3], key[4], key[5], key[6], key[7], + counter[0], counter[1], 0, 0 + }; + + while (nblocks--) { + uint32_t x[16]; + memcpy(x, s, sizeof(x)); + for (unsigned int r = 0; r < 20; r += 2) { + #define QR(a, b, c, d) ( \ + x[a] += x[b], \ + x[d] = rol32(x[d] ^ x[a], 16), \ + x[c] += x[d], \ + x[b] = rol32(x[b] ^ x[c], 12), \ + x[a] += x[b], \ + x[d] = rol32(x[d] ^ x[a], 8), \ + x[c] += x[d], \ + x[b] = rol32(x[b] ^ x[c], 7)) + + QR(0, 4, 8, 12); + QR(1, 5, 9, 13); + QR(2, 6, 10, 14); + QR(3, 7, 11, 15); + QR(0, 5, 10, 15); + QR(1, 6, 11, 12); + QR(2, 7, 8, 13); + QR(3, 4, 9, 14); + } + for (unsigned int i = 0; i < 16; ++i, dst_bytes += sizeof(uint32_t)) + put_unaligned_le32(x[i] + s[i], dst_bytes); + if (!++s[12]) + ++s[13]; + } + counter[0] = s[12]; + counter[1] = s[13]; +} + +typedef uint8_t u8; +typedef uint32_t u32; +typedef uint64_t u64; +#include int main(int argc, char *argv[]) { enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 }; - static const uint8_t nonce[8] = { 0 }; - uint32_t counter[2]; - uint8_t key[32]; + uint32_t key[8], counter1[2], counter2[2]; uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS]; ksft_print_header(); + if (!cpu_has_capabilities()) + ksft_exit_skip("Required CPU capabilities missing\n"); ksft_set_plan(1); for (unsigned int trial = 0; trial < TRIALS; ++trial) { @@ -27,17 +94,33 @@ int main(int argc, char *argv[]) printf("getrandom() failed!\n"); return KSFT_SKIP; } - crypto_stream_chacha20(output1, sizeof(output1), nonce, key); + memset(counter1, 0, sizeof(counter1)); + reference_chacha20_blocks(output1, key, counter1, BLOCKS); for (unsigned int split = 0; split < BLOCKS; ++split) { memset(output2, 'X', sizeof(output2)); - memset(counter, 0, sizeof(counter)); + memset(counter2, 0, sizeof(counter2)); if (split) - __arch_chacha20_blocks_nostack(output2, key, counter, split); - __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split); - if (memcmp(output1, output2, sizeof(output1))) + __arch_chacha20_blocks_nostack(output2, key, counter2, split); + __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split); + if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) return KSFT_FAIL; } } + memset(counter1, 0, sizeof(counter1)); + counter1[0] = (uint32_t)-BLOCKS + 2; + memset(counter2, 0, sizeof(counter2)); + counter2[0] = (uint32_t)-BLOCKS + 2; + + reference_chacha20_blocks(output1, key, counter1, BLOCKS); + __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); + if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) + return KSFT_FAIL; + + reference_chacha20_blocks(output1, key, counter1, BLOCKS); + __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); + if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) + return KSFT_FAIL; + ksft_test_result_pass("chacha: PASS\n"); return KSFT_PASS; } diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index e691a3cf1491..5fb97ad67eea 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -20,6 +20,7 @@ #include #include "vdso_config.h" +#include "vdso_call.h" #include "../kselftest.h" static const char **name; @@ -114,6 +115,12 @@ static void fill_function_pointers() if (!vdso) vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso32.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); + if (!vdso) + vdso = dlopen("linux-vdso64.so.1", + RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); if (!vdso) { printf("[WARN]\tfailed to find vDSO\n"); return; @@ -180,7 +187,7 @@ static void test_getcpu(void) ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0); if (vdso_getcpu) - ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0); + ret_vdso = VDSO_CALL(vdso_getcpu, 3, &cpu_vdso, &node_vdso, 0); if (vgetcpu) ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0); @@ -263,7 +270,7 @@ static void test_one_clock_gettime(int clock, const char *name) if (sys_clock_gettime(clock, &start) < 0) { if (errno == EINVAL) { - vdso_ret = vdso_clock_gettime(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime, 2, clock, &vdso); if (vdso_ret == -EINVAL) { printf("[OK]\tNo such clock.\n"); } else { @@ -276,7 +283,7 @@ static void test_one_clock_gettime(int clock, const char *name) return; } - vdso_ret = vdso_clock_gettime(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime, 2, clock, &vdso); end_ret = sys_clock_gettime(clock, &end); if (vdso_ret != 0 || end_ret != 0) { @@ -325,7 +332,7 @@ static void test_one_clock_gettime64(int clock, const char *name) if (sys_clock_gettime64(clock, &start) < 0) { if (errno == EINVAL) { - vdso_ret = vdso_clock_gettime64(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime64, 2, clock, &vdso); if (vdso_ret == -EINVAL) { printf("[OK]\tNo such clock.\n"); } else { @@ -338,7 +345,7 @@ static void test_one_clock_gettime64(int clock, const char *name) return; } - vdso_ret = vdso_clock_gettime64(clock, &vdso); + vdso_ret = VDSO_CALL(vdso_clock_gettime64, 2, clock, &vdso); end_ret = sys_clock_gettime64(clock, &end); if (vdso_ret != 0 || end_ret != 0) { @@ -395,7 +402,7 @@ static void test_gettimeofday(void) return; } - vdso_ret = vdso_gettimeofday(&vdso, &vdso_tz); + vdso_ret = VDSO_CALL(vdso_gettimeofday, 2, &vdso, &vdso_tz); end_ret = sys_gettimeofday(&end, NULL); if (vdso_ret != 0 || end_ret != 0) { @@ -425,7 +432,7 @@ static void test_gettimeofday(void) } /* And make sure that passing NULL for tz doesn't crash. */ - vdso_gettimeofday(&vdso, NULL); + VDSO_CALL(vdso_gettimeofday, 2, &vdso, NULL); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c index b758f68c6c9c..cdeaed45fb26 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c +++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c @@ -14,6 +14,7 @@ #include "../kselftest.h" #include "parse_vdso.h" #include "vdso_config.h" +#include "vdso_call.h" struct getcpu_cache; typedef long (*getcpu_t)(unsigned int *, unsigned int *, @@ -42,7 +43,7 @@ int main(int argc, char **argv) return KSFT_SKIP; } - ret = get_cpu(&cpu, &node, 0); + ret = VDSO_CALL(get_cpu, 3, &cpu, &node, 0); if (ret == 0) { printf("Running on CPU %u node %u\n", cpu, node); } else { diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 05122425a873..72a1d9b43a84 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -16,11 +16,17 @@ #include #include #include +#include +#include #include #include +#include +#include #include "../kselftest.h" #include "parse_vdso.h" +#include "vdso_config.h" +#include "vdso_call.h" #ifndef timespecsub #define timespecsub(tsp, usp, vsp) \ @@ -38,50 +44,43 @@ static struct { pthread_mutex_t lock; void **states; size_t len, cap; -} grnd_allocator = { - .lock = PTHREAD_MUTEX_INITIALIZER -}; - -static struct { ssize_t(*fn)(void *, size_t, unsigned long, void *, size_t); - pthread_key_t key; - pthread_once_t initialized; struct vgetrandom_opaque_params params; -} grnd_ctx = { - .initialized = PTHREAD_ONCE_INIT +} vgrnd = { + .lock = PTHREAD_MUTEX_INITIALIZER }; static void *vgetrandom_get_state(void) { void *state = NULL; - pthread_mutex_lock(&grnd_allocator.lock); - if (!grnd_allocator.len) { + pthread_mutex_lock(&vgrnd.lock); + if (!vgrnd.len) { size_t page_size = getpagesize(); size_t new_cap; size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */ void *new_block, *new_states; - alloc_size = (num * grnd_ctx.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); - num = (page_size / grnd_ctx.params.size_of_opaque_state) * (alloc_size / page_size); - new_block = mmap(0, alloc_size, grnd_ctx.params.mmap_prot, grnd_ctx.params.mmap_flags, -1, 0); + alloc_size = (num * vgrnd.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); + num = (page_size / vgrnd.params.size_of_opaque_state) * (alloc_size / page_size); + new_block = mmap(0, alloc_size, vgrnd.params.mmap_prot, vgrnd.params.mmap_flags, -1, 0); if (new_block == MAP_FAILED) goto out; - new_cap = grnd_allocator.cap + num; - new_states = reallocarray(grnd_allocator.states, new_cap, sizeof(*grnd_allocator.states)); + new_cap = vgrnd.cap + num; + new_states = reallocarray(vgrnd.states, new_cap, sizeof(*vgrnd.states)); if (!new_states) goto unmap; - grnd_allocator.cap = new_cap; - grnd_allocator.states = new_states; + vgrnd.cap = new_cap; + vgrnd.states = new_states; for (size_t i = 0; i < num; ++i) { - if (((uintptr_t)new_block & (page_size - 1)) + grnd_ctx.params.size_of_opaque_state > page_size) + if (((uintptr_t)new_block & (page_size - 1)) + vgrnd.params.size_of_opaque_state > page_size) new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1))); - grnd_allocator.states[i] = new_block; - new_block += grnd_ctx.params.size_of_opaque_state; + vgrnd.states[i] = new_block; + new_block += vgrnd.params.size_of_opaque_state; } - grnd_allocator.len = num; + vgrnd.len = num; goto success; unmap: @@ -89,10 +88,10 @@ static void *vgetrandom_get_state(void) goto out; } success: - state = grnd_allocator.states[--grnd_allocator.len]; + state = vgrnd.states[--vgrnd.len]; out: - pthread_mutex_unlock(&grnd_allocator.lock); + pthread_mutex_unlock(&vgrnd.lock); return state; } @@ -100,27 +99,33 @@ static void vgetrandom_put_state(void *state) { if (!state) return; - pthread_mutex_lock(&grnd_allocator.lock); - grnd_allocator.states[grnd_allocator.len++] = state; - pthread_mutex_unlock(&grnd_allocator.lock); + pthread_mutex_lock(&vgrnd.lock); + vgrnd.states[vgrnd.len++] = state; + pthread_mutex_unlock(&vgrnd.lock); } static void vgetrandom_init(void) { - if (pthread_key_create(&grnd_ctx.key, vgetrandom_put_state) != 0) - return; + const char *version = versions[VDSO_VERSION]; + const char *name = names[VDSO_NAMES][6]; unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); + size_t ret; + if (!sysinfo_ehdr) { printf("AT_SYSINFO_EHDR is not present!\n"); exit(KSFT_SKIP); } vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); - grnd_ctx.fn = (__typeof__(grnd_ctx.fn))vdso_sym("LINUX_2.6", "__vdso_getrandom"); - if (!grnd_ctx.fn) { - printf("__vdso_getrandom is missing!\n"); + vgrnd.fn = (__typeof__(vgrnd.fn))vdso_sym(version, name); + if (!vgrnd.fn) { + printf("%s is missing!\n", name); exit(KSFT_FAIL); } - if (grnd_ctx.fn(NULL, 0, 0, &grnd_ctx.params, ~0UL) != 0) { + ret = VDSO_CALL(vgrnd.fn, 5, NULL, 0, 0, &vgrnd.params, ~0UL); + if (ret == -ENOSYS) { + printf("unsupported architecture\n"); + exit(KSFT_SKIP); + } else if (ret) { printf("failed to fetch vgetrandom params!\n"); exit(KSFT_FAIL); } @@ -128,27 +133,21 @@ static void vgetrandom_init(void) static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) { - void *state; + static __thread void *state; - pthread_once(&grnd_ctx.initialized, vgetrandom_init); - state = pthread_getspecific(grnd_ctx.key); if (!state) { state = vgetrandom_get_state(); - if (pthread_setspecific(grnd_ctx.key, state) != 0) { - vgetrandom_put_state(state); - state = NULL; - } if (!state) { printf("vgetrandom_get_state failed!\n"); exit(KSFT_FAIL); } } - return grnd_ctx.fn(buf, len, flags, state, grnd_ctx.params.size_of_opaque_state); + return VDSO_CALL(vgrnd.fn, 5, buf, len, flags, state, vgrnd.params.size_of_opaque_state); } enum { TRIALS = 25000000, THREADS = 256 }; -static void *test_vdso_getrandom(void *) +static void *test_vdso_getrandom(void *ctx) { for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; @@ -158,7 +157,7 @@ static void *test_vdso_getrandom(void *) return NULL; } -static void *test_libc_getrandom(void *) +static void *test_libc_getrandom(void *ctx) { for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; @@ -168,7 +167,7 @@ static void *test_libc_getrandom(void *) return NULL; } -static void *test_syscall_getrandom(void *) +static void *test_syscall_getrandom(void *ctx) { for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; @@ -244,9 +243,10 @@ static void fill(void) static void kselftest(void) { uint8_t weird_size[1263]; + pid_t child; ksft_print_header(); - ksft_set_plan(1); + ksft_set_plan(2); for (size_t i = 0; i < 1000; ++i) { ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0); @@ -255,6 +255,42 @@ static void kselftest(void) } ksft_test_result_pass("getrandom: PASS\n"); + + unshare(CLONE_NEWUSER); + assert(unshare(CLONE_NEWTIME) == 0); + child = fork(); + assert(child >= 0); + if (!child) { + vgetrandom_init(); + child = getpid(); + assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); + assert(kill(child, SIGSTOP) == 0); + assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); + _exit(0); + } + for (;;) { + struct ptrace_syscall_info info = { 0 }; + int status, ret; + assert(waitpid(child, &status, 0) >= 0); + if (WIFEXITED(status)) { + if (WEXITSTATUS(status) != 0) + exit(KSFT_FAIL); + break; + } + assert(WIFSTOPPED(status)); + if (WSTOPSIG(status) == SIGSTOP) + assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); + else if (WSTOPSIG(status) == (SIGTRAP | 0x80)) { + assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); + if (info.op == PTRACE_SYSCALL_INFO_ENTRY && info.entry.nr == __NR_getrandom && + info.entry.args[0] == (uintptr_t)weird_size && info.entry.args[1] == sizeof(weird_size)) + exit(KSFT_FAIL); + } + assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); + } + + ksft_test_result_pass("getrandom timens: PASS\n"); + exit(KSFT_PASS); } @@ -265,6 +301,8 @@ static void usage(const char *argv0) int main(int argc, char *argv[]) { + vgetrandom_init(); + if (argc == 1) { kselftest(); return 0; diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c index ee4f1ca56a71..e31b18ffae33 100644 --- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c +++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c @@ -19,6 +19,7 @@ #include "../kselftest.h" #include "parse_vdso.h" #include "vdso_config.h" +#include "vdso_call.h" int main(int argc, char **argv) { @@ -43,7 +44,7 @@ int main(int argc, char **argv) } struct timeval tv; - long ret = gtod(&tv, 0); + long ret = VDSO_CALL(gtod, 2, &tv, 0); if (ret == 0) { printf("The time is %lld.%06lld\n",