2019-06-03 07:44:50 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0-only
|
2014-11-14 15:54:08 +00:00
|
|
|
/*
|
|
|
|
|
* alternative runtime patching
|
|
|
|
|
* inspired by the x86 version
|
|
|
|
|
*
|
|
|
|
|
* Copyright (C) 2014 ARM Ltd.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#define pr_fmt(fmt) "alternatives: " fmt
|
|
|
|
|
|
|
|
|
|
#include <linux/init.h>
|
|
|
|
|
#include <linux/cpu.h>
|
2022-08-30 11:48:32 +01:00
|
|
|
#include <linux/elf.h>
|
2014-11-14 15:54:08 +00:00
|
|
|
#include <asm/cacheflush.h>
|
|
|
|
|
#include <asm/alternative.h>
|
|
|
|
|
#include <asm/cpufeature.h>
|
2015-06-01 10:47:40 +01:00
|
|
|
#include <asm/insn.h>
|
2022-08-30 11:48:32 +01:00
|
|
|
#include <asm/module.h>
|
2016-08-24 18:27:28 +01:00
|
|
|
#include <asm/sections.h>
|
2022-08-30 11:48:32 +01:00
|
|
|
#include <asm/vdso.h>
|
2014-11-14 15:54:08 +00:00
|
|
|
#include <linux/stop_machine.h>
|
|
|
|
|
|
2021-02-04 09:43:49 +08:00
|
|
|
#define __ALT_PTR(a, f) ((void *)&(a)->f + (a)->f)
|
2015-06-01 10:47:40 +01:00
|
|
|
#define ALT_ORIG_PTR(a) __ALT_PTR(a, orig_offset)
|
|
|
|
|
#define ALT_REPL_PTR(a) __ALT_PTR(a, alt_offset)
|
|
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
#define ALT_CAP(a) ((a)->cpufeature & ~ARM64_CB_BIT)
|
|
|
|
|
#define ALT_HAS_CB(a) ((a)->cpufeature & ARM64_CB_BIT)
|
|
|
|
|
|
2020-06-30 14:06:04 +01:00
|
|
|
/* Volatile, as we may be patching the guts of READ_ONCE() */
|
|
|
|
|
static volatile int all_alternatives_applied;
|
2019-01-31 14:58:52 +00:00
|
|
|
|
|
|
|
|
static DECLARE_BITMAP(applied_alternatives, ARM64_NCAPS);
|
2018-01-08 15:38:06 +00:00
|
|
|
|
2014-11-28 13:40:45 +00:00
|
|
|
struct alt_region {
|
|
|
|
|
struct alt_instr *begin;
|
|
|
|
|
struct alt_instr *end;
|
|
|
|
|
};
|
|
|
|
|
|
2019-01-31 14:58:52 +00:00
|
|
|
bool alternative_is_applied(u16 cpufeature)
|
|
|
|
|
{
|
|
|
|
|
if (WARN_ON(cpufeature >= ARM64_NCAPS))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return test_bit(cpufeature, applied_alternatives);
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-01 10:47:40 +01:00
|
|
|
/*
|
|
|
|
|
* Check if the target PC is within an alternative block.
|
|
|
|
|
*/
|
arm64: alternatives: mark patch_alternative() as `noinstr`
The alternatives code must be `noinstr` such that it does not patch itself,
as the cache invalidation is only performed after all the alternatives have
been applied.
Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update()
and get_alt_insn() with `__always_inline` since they are both only called
through patch_alternative().
Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused
a boot hang:
[ 0.241121] CPU: All CPU(s) started at EL2
The alternatives code was patching the atomics in __tsan_read4() from LL/SC
atomics to LSE atomics.
The following fragment is using LL/SC atomics in the .text section:
| <__tsan_unaligned_read4+304>: ldxr x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This LL/SC atomic sequence was to be replaced with LSE atomics. However since
the alternatives code was instrumentable, __tsan_read4() was being called after
only the first instruction was replaced, which led to the following code in memory:
| <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This caused an infinite loop as the `stxr` instruction never completed successfully,
so `w7` was always 0.
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2022-04-05 11:47:33 +01:00
|
|
|
static __always_inline bool branch_insn_requires_update(struct alt_instr *alt, unsigned long pc)
|
2015-06-01 10:47:40 +01:00
|
|
|
{
|
2020-07-09 15:59:53 +03:00
|
|
|
unsigned long replptr = (unsigned long)ALT_REPL_PTR(alt);
|
|
|
|
|
return !(pc >= replptr && pc <= (replptr + alt->alt_len));
|
2015-06-01 10:47:40 +01:00
|
|
|
}
|
|
|
|
|
|
2016-09-09 14:07:13 +01:00
|
|
|
#define align_down(x, a) ((unsigned long)(x) & ~(((unsigned long)(a)) - 1))
|
|
|
|
|
|
arm64: alternatives: mark patch_alternative() as `noinstr`
The alternatives code must be `noinstr` such that it does not patch itself,
as the cache invalidation is only performed after all the alternatives have
been applied.
Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update()
and get_alt_insn() with `__always_inline` since they are both only called
through patch_alternative().
Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused
a boot hang:
[ 0.241121] CPU: All CPU(s) started at EL2
The alternatives code was patching the atomics in __tsan_read4() from LL/SC
atomics to LSE atomics.
The following fragment is using LL/SC atomics in the .text section:
| <__tsan_unaligned_read4+304>: ldxr x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This LL/SC atomic sequence was to be replaced with LSE atomics. However since
the alternatives code was instrumentable, __tsan_read4() was being called after
only the first instruction was replaced, which led to the following code in memory:
| <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This caused an infinite loop as the `stxr` instruction never completed successfully,
so `w7` was always 0.
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2022-04-05 11:47:33 +01:00
|
|
|
static __always_inline u32 get_alt_insn(struct alt_instr *alt, __le32 *insnptr, __le32 *altinsnptr)
|
2015-06-01 10:47:40 +01:00
|
|
|
{
|
|
|
|
|
u32 insn;
|
|
|
|
|
|
|
|
|
|
insn = le32_to_cpu(*altinsnptr);
|
|
|
|
|
|
|
|
|
|
if (aarch64_insn_is_branch_imm(insn)) {
|
|
|
|
|
s32 offset = aarch64_get_branch_offset(insn);
|
|
|
|
|
unsigned long target;
|
|
|
|
|
|
|
|
|
|
target = (unsigned long)altinsnptr + offset;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we're branching inside the alternate sequence,
|
|
|
|
|
* do not rewrite the instruction, as it is already
|
|
|
|
|
* correct. Otherwise, generate the new instruction.
|
|
|
|
|
*/
|
|
|
|
|
if (branch_insn_requires_update(alt, target)) {
|
|
|
|
|
offset = target - (unsigned long)insnptr;
|
|
|
|
|
insn = aarch64_set_branch_offset(insn, offset);
|
|
|
|
|
}
|
2016-09-09 14:07:13 +01:00
|
|
|
} else if (aarch64_insn_is_adrp(insn)) {
|
|
|
|
|
s32 orig_offset, new_offset;
|
|
|
|
|
unsigned long target;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* If we're replacing an adrp instruction, which uses PC-relative
|
|
|
|
|
* immediate addressing, adjust the offset to reflect the new
|
|
|
|
|
* PC. adrp operates on 4K aligned addresses.
|
|
|
|
|
*/
|
|
|
|
|
orig_offset = aarch64_insn_adrp_get_offset(insn);
|
|
|
|
|
target = align_down(altinsnptr, SZ_4K) + orig_offset;
|
|
|
|
|
new_offset = target - align_down(insnptr, SZ_4K);
|
|
|
|
|
insn = aarch64_insn_adrp_set_offset(insn, new_offset);
|
2016-09-09 14:07:11 +01:00
|
|
|
} else if (aarch64_insn_uses_literal(insn)) {
|
|
|
|
|
/*
|
|
|
|
|
* Disallow patching unhandled instructions using PC relative
|
|
|
|
|
* literal addresses
|
|
|
|
|
*/
|
|
|
|
|
BUG();
|
2015-06-01 10:47:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return insn;
|
|
|
|
|
}
|
|
|
|
|
|
arm64: alternatives: mark patch_alternative() as `noinstr`
The alternatives code must be `noinstr` such that it does not patch itself,
as the cache invalidation is only performed after all the alternatives have
been applied.
Mark patch_alternative() as `noinstr`. Mark branch_insn_requires_update()
and get_alt_insn() with `__always_inline` since they are both only called
through patch_alternative().
Booting a kernel in QEMU TCG with KCSAN=y and ARM64_USE_LSE_ATOMICS=y caused
a boot hang:
[ 0.241121] CPU: All CPU(s) started at EL2
The alternatives code was patching the atomics in __tsan_read4() from LL/SC
atomics to LSE atomics.
The following fragment is using LL/SC atomics in the .text section:
| <__tsan_unaligned_read4+304>: ldxr x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This LL/SC atomic sequence was to be replaced with LSE atomics. However since
the alternatives code was instrumentable, __tsan_read4() was being called after
only the first instruction was replaced, which led to the following code in memory:
| <__tsan_unaligned_read4+304>: ldadd x5, x6, [x2]
| <__tsan_unaligned_read4+308>: add x6, x6, x5
| <__tsan_unaligned_read4+312>: stxr w7, x6, [x2]
| <__tsan_unaligned_read4+316>: cbnz w7, <__tsan_unaligned_read4+304>
This caused an infinite loop as the `stxr` instruction never completed successfully,
so `w7` was always 0.
Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220405104733.11476-1-joey.gouly@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
2022-04-05 11:47:33 +01:00
|
|
|
static noinstr void patch_alternative(struct alt_instr *alt,
|
2017-12-03 12:02:14 +00:00
|
|
|
__le32 *origptr, __le32 *updptr, int nr_inst)
|
|
|
|
|
{
|
|
|
|
|
__le32 *replptr;
|
|
|
|
|
int i;
|
|
|
|
|
|
|
|
|
|
replptr = ALT_REPL_PTR(alt);
|
|
|
|
|
for (i = 0; i < nr_inst; i++) {
|
|
|
|
|
u32 insn;
|
|
|
|
|
|
|
|
|
|
insn = get_alt_insn(alt, origptr + i, replptr + i);
|
|
|
|
|
updptr[i] = cpu_to_le32(insn);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2018-06-22 09:31:15 +01:00
|
|
|
/*
|
|
|
|
|
* We provide our own, private D-cache cleaning function so that we don't
|
|
|
|
|
* accidentally call into the cache.S code, which is patched by us at
|
|
|
|
|
* runtime.
|
|
|
|
|
*/
|
|
|
|
|
static void clean_dcache_range_nopatch(u64 start, u64 end)
|
|
|
|
|
{
|
|
|
|
|
u64 cur, d_size, ctr_el0;
|
|
|
|
|
|
|
|
|
|
ctr_el0 = read_sanitised_ftr_reg(SYS_CTR_EL0);
|
|
|
|
|
d_size = 4 << cpuid_feature_extract_unsigned_field(ctr_el0,
|
2022-07-04 18:02:40 +01:00
|
|
|
CTR_EL0_DminLine_SHIFT);
|
2018-06-22 09:31:15 +01:00
|
|
|
cur = start & ~(d_size - 1);
|
|
|
|
|
do {
|
|
|
|
|
/*
|
|
|
|
|
* We must clean+invalidate to the PoC in order to avoid
|
|
|
|
|
* Cortex-A53 errata 826319, 827319, 824069 and 819472
|
|
|
|
|
* (this corresponds to ARM64_WORKAROUND_CLEAN_CACHE)
|
|
|
|
|
*/
|
|
|
|
|
asm volatile("dc civac, %0" : : "r" (cur) : "memory");
|
|
|
|
|
} while (cur += d_size, cur < end);
|
|
|
|
|
}
|
|
|
|
|
|
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
- arm64 perf: DDR PMU driver for Alibaba's T-Head Yitian 710 SoC, SVE
vector granule register added to the user regs together with SVE perf
extensions documentation.
- SVE updates: add HWCAP for SVE EBF16, update the SVE ABI
documentation to match the actual kernel behaviour (zeroing the
registers on syscall rather than "zeroed or preserved" previously).
- More conversions to automatic system registers generation.
- vDSO: use self-synchronising virtual counter access in gettimeofday()
if the architecture supports it.
- arm64 stacktrace cleanups and improvements.
- arm64 atomics improvements: always inline assembly, remove LL/SC
trampolines.
- Improve the reporting of EL1 exceptions: rework BTI and FPAC
exception handling, better EL1 undefs reporting.
- Cortex-A510 erratum 2658417: remove BF16 support due to incorrect
result.
- arm64 defconfig updates: build CoreSight as a module, enable options
necessary for docker, memory hotplug/hotremove, enable all PMUs
provided by Arm.
- arm64 ptrace() support for TPIDR2_EL0 (register provided with the SME
extensions).
- arm64 ftraces updates/fixes: fix module PLTs with mcount, remove
unused function.
- kselftest updates for arm64: simple HWCAP validation, FP stress test
improvements, validation of ZA regs in signal handlers, include
larger SVE and SME vector lengths in signal tests, various cleanups.
- arm64 alternatives (code patching) improvements to robustness and
consistency: replace cpucap static branches with equivalent
alternatives, associate callback alternatives with a cpucap.
- Miscellaneous updates: optimise kprobe performance of patching
single-step slots, simplify uaccess_mask_ptr(), move MTE registers
initialisation to C, support huge vmalloc() mappings, run softirqs on
the per-CPU IRQ stack, compat (arm32) misalignment fixups for
multiword accesses.
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (126 commits)
arm64: alternatives: Use vdso/bits.h instead of linux/bits.h
arm64/kprobe: Optimize the performance of patching single-step slot
arm64: defconfig: Add Coresight as module
kselftest/arm64: Handle EINTR while reading data from children
kselftest/arm64: Flag fp-stress as exiting when we begin finishing up
kselftest/arm64: Don't repeat termination handler for fp-stress
ARM64: reloc_test: add __init/__exit annotations to module init/exit funcs
arm64/mm: fold check for KFENCE into can_set_direct_map()
arm64: ftrace: fix module PLTs with mcount
arm64: module: Remove unused plt_entry_is_initialized()
arm64: module: Make plt_equals_entry() static
arm64: fix the build with binutils 2.27
kselftest/arm64: Don't enable v8.5 for MTE selftest builds
arm64: uaccess: simplify uaccess_mask_ptr()
arm64: asm/perf_regs.h: Avoid C++-style comment in UAPI header
kselftest/arm64: Fix typo in hwcap check
arm64: mte: move register initialization to C
arm64: mm: handle ARM64_KERNEL_USES_PMD_MAPS in vmemmap_populate()
arm64: dma: Drop cache invalidation from arch_dma_prep_coherent()
arm64/sve: Add Perf extensions documentation
...
2022-10-06 11:51:49 -07:00
|
|
|
static void __apply_alternatives(const struct alt_region *region,
|
|
|
|
|
bool is_module,
|
2019-01-31 14:58:53 +00:00
|
|
|
unsigned long *feature_mask)
|
2014-11-14 15:54:08 +00:00
|
|
|
{
|
|
|
|
|
struct alt_instr *alt;
|
2017-12-03 12:02:14 +00:00
|
|
|
__le32 *origptr, *updptr;
|
|
|
|
|
alternative_cb_t alt_cb;
|
2014-11-14 15:54:08 +00:00
|
|
|
|
2014-11-28 13:40:45 +00:00
|
|
|
for (alt = region->begin; alt < region->end; alt++) {
|
2017-12-03 12:02:14 +00:00
|
|
|
int nr_inst;
|
2022-09-12 17:22:08 +01:00
|
|
|
int cap = ALT_CAP(alt);
|
2015-06-01 10:47:40 +01:00
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
if (!test_bit(cap, feature_mask))
|
2019-01-31 14:58:53 +00:00
|
|
|
continue;
|
|
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
if (!cpus_have_cap(cap))
|
2014-11-14 15:54:08 +00:00
|
|
|
continue;
|
|
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
if (ALT_HAS_CB(alt))
|
2017-12-03 12:02:14 +00:00
|
|
|
BUG_ON(alt->alt_len != 0);
|
|
|
|
|
else
|
|
|
|
|
BUG_ON(alt->alt_len != alt->orig_len);
|
2014-11-14 15:54:08 +00:00
|
|
|
|
2015-06-01 10:47:40 +01:00
|
|
|
origptr = ALT_ORIG_PTR(alt);
|
2018-06-22 09:31:15 +01:00
|
|
|
updptr = is_module ? origptr : lm_alias(origptr);
|
2017-12-03 12:02:14 +00:00
|
|
|
nr_inst = alt->orig_len / AARCH64_INSN_SIZE;
|
2015-06-01 10:47:40 +01:00
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
if (ALT_HAS_CB(alt))
|
2017-12-03 12:02:14 +00:00
|
|
|
alt_cb = ALT_REPL_PTR(alt);
|
2022-09-12 17:22:08 +01:00
|
|
|
else
|
|
|
|
|
alt_cb = patch_alternative;
|
2017-12-03 12:02:14 +00:00
|
|
|
|
|
|
|
|
alt_cb(alt, origptr, updptr, nr_inst);
|
2015-06-01 10:47:40 +01:00
|
|
|
|
2018-06-22 09:31:15 +01:00
|
|
|
if (!is_module) {
|
|
|
|
|
clean_dcache_range_nopatch((u64)origptr,
|
|
|
|
|
(u64)(origptr + nr_inst));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* The core module code takes care of cache maintenance in
|
|
|
|
|
* flush_module_icache().
|
|
|
|
|
*/
|
|
|
|
|
if (!is_module) {
|
|
|
|
|
dsb(ish);
|
arm64: Rename arm64-internal cache maintenance functions
Although naming across the codebase isn't that consistent, it
tends to follow certain patterns. Moreover, the term "flush"
isn't defined in the Arm Architecture reference manual, and might
be interpreted to mean clean, invalidate, or both for a cache.
Rename arm64-internal functions to make the naming internally
consistent, as well as making it consistent with the Arm ARM, by
specifying whether it applies to the instruction, data, or both
caches, whether the operation is a clean, invalidate, or both.
Also specify which point the operation applies to, i.e., to the
point of unification (PoU), coherency (PoC), or persistence
(PoP).
This commit applies the following sed transformation to all files
under arch/arm64:
"s/\b__flush_cache_range\b/caches_clean_inval_pou_macro/g;"\
"s/\b__flush_icache_range\b/caches_clean_inval_pou/g;"\
"s/\binvalidate_icache_range\b/icache_inval_pou/g;"\
"s/\b__flush_dcache_area\b/dcache_clean_inval_poc/g;"\
"s/\b__inval_dcache_area\b/dcache_inval_poc/g;"\
"s/__clean_dcache_area_poc\b/dcache_clean_poc/g;"\
"s/\b__clean_dcache_area_pop\b/dcache_clean_pop/g;"\
"s/\b__clean_dcache_area_pou\b/dcache_clean_pou/g;"\
"s/\b__flush_cache_user_range\b/caches_clean_inval_user_pou/g;"\
"s/\b__flush_icache_all\b/icache_inval_all_pou/g;"
Note that __clean_dcache_area_poc is deliberately missing a word
boundary check at the beginning in order to match the efistub
symbols in image-vars.h.
Also note that, despite its name, __flush_icache_range operates
on both instruction and data caches. The name change here
reflects that.
No functional change intended.
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20210524083001.2586635-19-tabba@google.com
Signed-off-by: Will Deacon <will@kernel.org>
2021-05-24 09:30:01 +01:00
|
|
|
icache_inval_all_pou();
|
2018-06-22 09:31:15 +01:00
|
|
|
isb();
|
2019-01-31 14:58:52 +00:00
|
|
|
|
2019-01-31 14:58:53 +00:00
|
|
|
/* Ignore ARM64_CB bit from feature mask */
|
|
|
|
|
bitmap_or(applied_alternatives, applied_alternatives,
|
|
|
|
|
feature_mask, ARM64_NCAPS);
|
|
|
|
|
bitmap_and(applied_alternatives, applied_alternatives,
|
|
|
|
|
cpu_hwcaps, ARM64_NCAPS);
|
2014-11-14 15:54:08 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-30 11:48:32 +01:00
|
|
|
void apply_alternatives_vdso(void)
|
|
|
|
|
{
|
|
|
|
|
struct alt_region region;
|
|
|
|
|
const struct elf64_hdr *hdr;
|
|
|
|
|
const struct elf64_shdr *shdr;
|
|
|
|
|
const struct elf64_shdr *alt;
|
2022-09-30 09:18:22 +01:00
|
|
|
DECLARE_BITMAP(all_capabilities, ARM64_NCAPS);
|
2022-08-30 11:48:32 +01:00
|
|
|
|
2022-09-30 09:18:22 +01:00
|
|
|
bitmap_fill(all_capabilities, ARM64_NCAPS);
|
2022-08-30 11:48:32 +01:00
|
|
|
|
|
|
|
|
hdr = (struct elf64_hdr *)vdso_start;
|
|
|
|
|
shdr = (void *)hdr + hdr->e_shoff;
|
|
|
|
|
alt = find_section(hdr, shdr, ".altinstructions");
|
|
|
|
|
if (!alt)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
region = (struct alt_region){
|
|
|
|
|
.begin = (void *)hdr + alt->sh_offset,
|
|
|
|
|
.end = (void *)hdr + alt->sh_offset + alt->sh_size,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
__apply_alternatives(®ion, false, &all_capabilities[0]);
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-12 17:22:07 +01:00
|
|
|
static const struct alt_region kernel_alternatives = {
|
|
|
|
|
.begin = (struct alt_instr *)__alt_instructions,
|
|
|
|
|
.end = (struct alt_instr *)__alt_instructions_end,
|
|
|
|
|
};
|
|
|
|
|
|
2015-07-28 19:07:28 +01:00
|
|
|
/*
|
|
|
|
|
* We might be patching the stop_machine state machine, so implement a
|
|
|
|
|
* really simple polling protocol here.
|
|
|
|
|
*/
|
|
|
|
|
static int __apply_alternatives_multi_stop(void *unused)
|
2014-11-14 15:54:08 +00:00
|
|
|
{
|
2015-07-28 19:07:28 +01:00
|
|
|
/* We always have a CPU 0 at this point (__init) */
|
|
|
|
|
if (smp_processor_id()) {
|
2020-06-30 14:06:04 +01:00
|
|
|
while (!all_alternatives_applied)
|
2015-07-28 19:07:28 +01:00
|
|
|
cpu_relax();
|
2015-08-04 18:52:09 +01:00
|
|
|
isb();
|
2015-07-28 19:07:28 +01:00
|
|
|
} else {
|
2022-09-12 17:22:08 +01:00
|
|
|
DECLARE_BITMAP(remaining_capabilities, ARM64_NCAPS);
|
2019-01-31 14:58:53 +00:00
|
|
|
|
|
|
|
|
bitmap_complement(remaining_capabilities, boot_capabilities,
|
2022-09-12 17:22:08 +01:00
|
|
|
ARM64_NCAPS);
|
2019-01-31 14:58:53 +00:00
|
|
|
|
2019-01-31 14:58:52 +00:00
|
|
|
BUG_ON(all_alternatives_applied);
|
2022-09-12 17:22:07 +01:00
|
|
|
__apply_alternatives(&kernel_alternatives, false,
|
|
|
|
|
remaining_capabilities);
|
2015-07-28 19:07:28 +01:00
|
|
|
/* Barriers provided by the cache flushing */
|
2020-06-30 14:06:04 +01:00
|
|
|
all_alternatives_applied = 1;
|
2015-07-28 19:07:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void __init apply_alternatives_all(void)
|
|
|
|
|
{
|
2022-09-12 17:22:06 +01:00
|
|
|
pr_info("applying system-wide alternatives\n");
|
|
|
|
|
|
2022-08-30 11:48:32 +01:00
|
|
|
apply_alternatives_vdso();
|
2014-11-14 15:54:08 +00:00
|
|
|
/* better not try code patching on a live SMP system */
|
2015-07-28 19:07:28 +01:00
|
|
|
stop_machine(__apply_alternatives_multi_stop, NULL, cpu_online_mask);
|
2014-11-28 13:40:45 +00:00
|
|
|
}
|
|
|
|
|
|
2019-01-31 14:58:53 +00:00
|
|
|
/*
|
|
|
|
|
* This is called very early in the boot process (directly after we run
|
|
|
|
|
* a feature detect on the boot CPU). No need to worry about other CPUs
|
|
|
|
|
* here.
|
|
|
|
|
*/
|
|
|
|
|
void __init apply_boot_alternatives(void)
|
|
|
|
|
{
|
|
|
|
|
/* If called on non-boot cpu things could go wrong */
|
|
|
|
|
WARN_ON(smp_processor_id() != 0);
|
|
|
|
|
|
2022-09-12 17:22:06 +01:00
|
|
|
pr_info("applying boot alternatives\n");
|
|
|
|
|
|
2022-09-12 17:22:07 +01:00
|
|
|
__apply_alternatives(&kernel_alternatives, false,
|
|
|
|
|
&boot_capabilities[0]);
|
2019-01-31 14:58:53 +00:00
|
|
|
}
|
|
|
|
|
|
2018-06-22 09:31:15 +01:00
|
|
|
#ifdef CONFIG_MODULES
|
|
|
|
|
void apply_alternatives_module(void *start, size_t length)
|
2014-11-28 13:40:45 +00:00
|
|
|
{
|
|
|
|
|
struct alt_region region = {
|
|
|
|
|
.begin = start,
|
|
|
|
|
.end = start + length,
|
|
|
|
|
};
|
2022-09-12 17:22:08 +01:00
|
|
|
DECLARE_BITMAP(all_capabilities, ARM64_NCAPS);
|
2019-01-31 14:58:53 +00:00
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
bitmap_fill(all_capabilities, ARM64_NCAPS);
|
2014-11-28 13:40:45 +00:00
|
|
|
|
2019-01-31 14:58:53 +00:00
|
|
|
__apply_alternatives(®ion, true, &all_capabilities[0]);
|
2014-11-14 15:54:08 +00:00
|
|
|
}
|
2018-06-22 09:31:15 +01:00
|
|
|
#endif
|
arm64: alternatives: add shared NOP callback
For each instance of an alternative, the compiler outputs a distinct
copy of the alternative instructions into a subsection. As the compiler
doesn't have special knowledge of alternatives, it cannot coalesce these
to save space.
In a defconfig kernel built with GCC 12.1.0, there are approximately
10,000 instances of alternative_has_feature_likely(), where the
replacement instruction is always a NOP. As NOPs are
position-independent, we don't need a unique copy per alternative
sequence.
This patch adds a callback to patch an alternative sequence with NOPs,
and make use of this in alternative_has_feature_likely(). So that this
can be used for other sites in future, this is written to patch multiple
instructions up to the original sequence length.
For NVHE, an alias is added to image-vars.h.
For modules, the callback is exported. Note that as modules are loaded
within 2GiB of the kernel, an alt_instr entry in a module can always
refer directly to the callback, and no special handling is necessary.
When building with GCC 12.1.0, the vmlinux is ~158KiB smaller, though
the resulting Image size is unchanged due to alignment constraints and
padding:
| % ls -al vmlinux-*
| -rwxr-xr-x 1 mark mark 134644592 Sep 1 14:52 vmlinux-after
| -rwxr-xr-x 1 mark mark 134486232 Sep 1 14:50 vmlinux-before
| % ls -al Image-*
| -rw-r--r-- 1 mark mark 37108224 Sep 1 14:52 Image-after
| -rw-r--r-- 1 mark mark 37108224 Sep 1 14:50 Image-before
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220912162210.3626215-9-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-12 17:22:10 +01:00
|
|
|
|
|
|
|
|
noinstr void alt_cb_patch_nops(struct alt_instr *alt, __le32 *origptr,
|
|
|
|
|
__le32 *updptr, int nr_inst)
|
|
|
|
|
{
|
|
|
|
|
for (int i = 0; i < nr_inst; i++)
|
|
|
|
|
updptr[i] = cpu_to_le32(aarch64_insn_gen_nop());
|
|
|
|
|
}
|
|
|
|
|
EXPORT_SYMBOL(alt_cb_patch_nops);
|