2020-06-30 13:55:59 +01:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
|
#ifndef __ASM_ALTERNATIVE_MACROS_H
|
|
|
|
|
#define __ASM_ALTERNATIVE_MACROS_H
|
|
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
#include <linux/const.h>
|
2022-10-03 12:37:59 -07:00
|
|
|
#include <vdso/bits.h>
|
2022-09-12 17:22:08 +01:00
|
|
|
|
2020-06-30 13:55:59 +01:00
|
|
|
#include <asm/cpucaps.h>
|
2021-06-18 16:11:22 +01:00
|
|
|
#include <asm/insn-def.h>
|
2020-06-30 13:55:59 +01:00
|
|
|
|
arm64: fix the build with binutils 2.27
Jon Hunter reports that for some toolchains the build has been broken
since commit:
4c0bd995d73ed889 ("arm64: alternatives: have callbacks take a cap")
... with a stream of build-time splats of the form:
| CC arch/arm64/kvm/hyp/vhe/debug-sr.o
| /tmp/ccY3kbki.s: Assembler messages:
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: junk at end of line, first unrecognized character
| is `L'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: junk at end of line, first unrecognized character
| is `L'
| scripts/Makefile.build:249: recipe for target
| 'arch/arm64/kvm/hyp/vhe/debug-sr.o' failed
The issue here is that older versions of binutils (up to and including
2.27.0) don't like an 'L' suffix on constants. For plain assembly files,
UL() avoids this suffix, but in C files this gets added, and so for
inline assembly we can't directly use a constant defined with `UL()`.
We could avoid this by passing the constant as an input parameter, but
this isn't practical given the way we use the alternative macros.
Instead, just open code the constant without the `UL` suffix, and for
consistency do this for both the inline assembly macro and the regular
assembly macro.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Fixes: 4c0bd995d73e ("arm64: alternatives: have callbacks take a cap")
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/linux-arm-kernel/3cecc3a5-30b0-f0bd-c3de-9e09bd21909b@nvidia.com/
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220929150227.1028556-1-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-29 16:02:27 +01:00
|
|
|
/*
|
|
|
|
|
* Binutils 2.27.0 can't handle a 'UL' suffix on constants, so for the assembly
|
|
|
|
|
* macros below we must use we must use `(1 << ARM64_CB_SHIFT)`.
|
|
|
|
|
*/
|
|
|
|
|
#define ARM64_CB_SHIFT 15
|
|
|
|
|
#define ARM64_CB_BIT BIT(ARM64_CB_SHIFT)
|
2022-09-12 17:22:08 +01:00
|
|
|
|
|
|
|
|
#if ARM64_NCAPS >= ARM64_CB_BIT
|
|
|
|
|
#error "cpucaps have overflown ARM64_CB_BIT"
|
|
|
|
|
#endif
|
2020-06-30 13:55:59 +01:00
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
|
|
#include <linux/stringify.h>
|
|
|
|
|
|
|
|
|
|
#define ALTINSTR_ENTRY(feature) \
|
|
|
|
|
" .word 661b - .\n" /* label */ \
|
|
|
|
|
" .word 663f - .\n" /* new instruction */ \
|
|
|
|
|
" .hword " __stringify(feature) "\n" /* feature bit */ \
|
|
|
|
|
" .byte 662b-661b\n" /* source len */ \
|
|
|
|
|
" .byte 664f-663f\n" /* replacement len */
|
|
|
|
|
|
|
|
|
|
#define ALTINSTR_ENTRY_CB(feature, cb) \
|
|
|
|
|
" .word 661b - .\n" /* label */ \
|
|
|
|
|
" .word " __stringify(cb) "- .\n" /* callback */ \
|
|
|
|
|
" .hword " __stringify(feature) "\n" /* feature bit */ \
|
|
|
|
|
" .byte 662b-661b\n" /* source len */ \
|
|
|
|
|
" .byte 664f-663f\n" /* replacement len */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* alternative assembly primitive:
|
|
|
|
|
*
|
|
|
|
|
* If any of these .org directive fail, it means that insn1 and insn2
|
|
|
|
|
* don't have the same length. This used to be written as
|
|
|
|
|
*
|
|
|
|
|
* .if ((664b-663b) != (662b-661b))
|
|
|
|
|
* .error "Alternatives instruction length mismatch"
|
|
|
|
|
* .endif
|
|
|
|
|
*
|
|
|
|
|
* but most assemblers die if insn1 or insn2 have a .inst. This should
|
|
|
|
|
* be fixed in a binutils release posterior to 2.25.51.0.2 (anything
|
|
|
|
|
* containing commit 4e4d08cf7399b606 or c1baaddf8861).
|
|
|
|
|
*
|
|
|
|
|
* Alternatives with callbacks do not generate replacement instructions.
|
|
|
|
|
*/
|
|
|
|
|
#define __ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg_enabled) \
|
|
|
|
|
".if "__stringify(cfg_enabled)" == 1\n" \
|
|
|
|
|
"661:\n\t" \
|
|
|
|
|
oldinstr "\n" \
|
|
|
|
|
"662:\n" \
|
|
|
|
|
".pushsection .altinstructions,\"a\"\n" \
|
|
|
|
|
ALTINSTR_ENTRY(feature) \
|
|
|
|
|
".popsection\n" \
|
|
|
|
|
".subsection 1\n" \
|
|
|
|
|
"663:\n\t" \
|
|
|
|
|
newinstr "\n" \
|
|
|
|
|
"664:\n\t" \
|
|
|
|
|
".org . - (664b-663b) + (662b-661b)\n\t" \
|
|
|
|
|
".org . - (662b-661b) + (664b-663b)\n\t" \
|
|
|
|
|
".previous\n" \
|
|
|
|
|
".endif\n"
|
|
|
|
|
|
|
|
|
|
#define __ALTERNATIVE_CFG_CB(oldinstr, feature, cfg_enabled, cb) \
|
|
|
|
|
".if "__stringify(cfg_enabled)" == 1\n" \
|
|
|
|
|
"661:\n\t" \
|
|
|
|
|
oldinstr "\n" \
|
|
|
|
|
"662:\n" \
|
|
|
|
|
".pushsection .altinstructions,\"a\"\n" \
|
|
|
|
|
ALTINSTR_ENTRY_CB(feature, cb) \
|
|
|
|
|
".popsection\n" \
|
|
|
|
|
"663:\n\t" \
|
|
|
|
|
"664:\n\t" \
|
|
|
|
|
".endif\n"
|
|
|
|
|
|
|
|
|
|
#define _ALTERNATIVE_CFG(oldinstr, newinstr, feature, cfg, ...) \
|
|
|
|
|
__ALTERNATIVE_CFG(oldinstr, newinstr, feature, IS_ENABLED(cfg))
|
|
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
#define ALTERNATIVE_CB(oldinstr, feature, cb) \
|
arm64: fix the build with binutils 2.27
Jon Hunter reports that for some toolchains the build has been broken
since commit:
4c0bd995d73ed889 ("arm64: alternatives: have callbacks take a cap")
... with a stream of build-time splats of the form:
| CC arch/arm64/kvm/hyp/vhe/debug-sr.o
| /tmp/ccY3kbki.s: Assembler messages:
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: junk at end of line, first unrecognized character
| is `L'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: junk at end of line, first unrecognized character
| is `L'
| scripts/Makefile.build:249: recipe for target
| 'arch/arm64/kvm/hyp/vhe/debug-sr.o' failed
The issue here is that older versions of binutils (up to and including
2.27.0) don't like an 'L' suffix on constants. For plain assembly files,
UL() avoids this suffix, but in C files this gets added, and so for
inline assembly we can't directly use a constant defined with `UL()`.
We could avoid this by passing the constant as an input parameter, but
this isn't practical given the way we use the alternative macros.
Instead, just open code the constant without the `UL` suffix, and for
consistency do this for both the inline assembly macro and the regular
assembly macro.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Fixes: 4c0bd995d73e ("arm64: alternatives: have callbacks take a cap")
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/linux-arm-kernel/3cecc3a5-30b0-f0bd-c3de-9e09bd21909b@nvidia.com/
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220929150227.1028556-1-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-29 16:02:27 +01:00
|
|
|
__ALTERNATIVE_CFG_CB(oldinstr, (1 << ARM64_CB_SHIFT) | (feature), 1, cb)
|
2020-06-30 13:55:59 +01:00
|
|
|
#else
|
|
|
|
|
|
|
|
|
|
#include <asm/assembler.h>
|
|
|
|
|
|
|
|
|
|
.macro altinstruction_entry orig_offset alt_offset feature orig_len alt_len
|
|
|
|
|
.word \orig_offset - .
|
|
|
|
|
.word \alt_offset - .
|
2022-09-12 17:22:08 +01:00
|
|
|
.hword (\feature)
|
2020-06-30 13:55:59 +01:00
|
|
|
.byte \orig_len
|
|
|
|
|
.byte \alt_len
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
.macro alternative_insn insn1, insn2, cap, enable = 1
|
|
|
|
|
.if \enable
|
|
|
|
|
661: \insn1
|
|
|
|
|
662: .pushsection .altinstructions, "a"
|
|
|
|
|
altinstruction_entry 661b, 663f, \cap, 662b-661b, 664f-663f
|
|
|
|
|
.popsection
|
|
|
|
|
.subsection 1
|
|
|
|
|
663: \insn2
|
2021-04-13 17:08:04 -07:00
|
|
|
664: .org . - (664b-663b) + (662b-661b)
|
2020-06-30 13:55:59 +01:00
|
|
|
.org . - (662b-661b) + (664b-663b)
|
2021-04-13 17:08:04 -07:00
|
|
|
.previous
|
2020-06-30 13:55:59 +01:00
|
|
|
.endif
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Alternative sequences
|
|
|
|
|
*
|
|
|
|
|
* The code for the case where the capability is not present will be
|
|
|
|
|
* assembled and linked as normal. There are no restrictions on this
|
|
|
|
|
* code.
|
|
|
|
|
*
|
|
|
|
|
* The code for the case where the capability is present will be
|
|
|
|
|
* assembled into a special section to be used for dynamic patching.
|
|
|
|
|
* Code for that case must:
|
|
|
|
|
*
|
|
|
|
|
* 1. Be exactly the same length (in bytes) as the default code
|
|
|
|
|
* sequence.
|
|
|
|
|
*
|
|
|
|
|
* 2. Not contain a branch target that is used outside of the
|
|
|
|
|
* alternative sequence it is defined in (branches into an
|
|
|
|
|
* alternative sequence are not fixed up).
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Begin an alternative code sequence.
|
|
|
|
|
*/
|
|
|
|
|
.macro alternative_if_not cap
|
|
|
|
|
.set .Lasm_alt_mode, 0
|
|
|
|
|
.pushsection .altinstructions, "a"
|
|
|
|
|
altinstruction_entry 661f, 663f, \cap, 662f-661f, 664f-663f
|
|
|
|
|
.popsection
|
|
|
|
|
661:
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
.macro alternative_if cap
|
|
|
|
|
.set .Lasm_alt_mode, 1
|
|
|
|
|
.pushsection .altinstructions, "a"
|
|
|
|
|
altinstruction_entry 663f, 661f, \cap, 664f-663f, 662f-661f
|
|
|
|
|
.popsection
|
|
|
|
|
.subsection 1
|
|
|
|
|
.align 2 /* So GAS knows label 661 is suitably aligned */
|
|
|
|
|
661:
|
|
|
|
|
.endm
|
|
|
|
|
|
2022-09-12 17:22:08 +01:00
|
|
|
.macro alternative_cb cap, cb
|
2020-06-30 13:55:59 +01:00
|
|
|
.set .Lasm_alt_mode, 0
|
|
|
|
|
.pushsection .altinstructions, "a"
|
arm64: fix the build with binutils 2.27
Jon Hunter reports that for some toolchains the build has been broken
since commit:
4c0bd995d73ed889 ("arm64: alternatives: have callbacks take a cap")
... with a stream of build-time splats of the form:
| CC arch/arm64/kvm/hyp/vhe/debug-sr.o
| /tmp/ccY3kbki.s: Assembler messages:
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1600: Error: junk at end of line, first unrecognized character
| is `L'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: found 'L', expected: ')'
| /tmp/ccY3kbki.s:1723: Error: junk at end of line, first unrecognized character
| is `L'
| scripts/Makefile.build:249: recipe for target
| 'arch/arm64/kvm/hyp/vhe/debug-sr.o' failed
The issue here is that older versions of binutils (up to and including
2.27.0) don't like an 'L' suffix on constants. For plain assembly files,
UL() avoids this suffix, but in C files this gets added, and so for
inline assembly we can't directly use a constant defined with `UL()`.
We could avoid this by passing the constant as an input parameter, but
this isn't practical given the way we use the alternative macros.
Instead, just open code the constant without the `UL` suffix, and for
consistency do this for both the inline assembly macro and the regular
assembly macro.
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Fixes: 4c0bd995d73e ("arm64: alternatives: have callbacks take a cap")
Reported-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/linux-arm-kernel/3cecc3a5-30b0-f0bd-c3de-9e09bd21909b@nvidia.com/
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/20220929150227.1028556-1-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-29 16:02:27 +01:00
|
|
|
altinstruction_entry 661f, \cb, (1 << ARM64_CB_SHIFT) | \cap, 662f-661f, 0
|
2020-06-30 13:55:59 +01:00
|
|
|
.popsection
|
|
|
|
|
661:
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Provide the other half of the alternative code sequence.
|
|
|
|
|
*/
|
|
|
|
|
.macro alternative_else
|
|
|
|
|
662:
|
|
|
|
|
.if .Lasm_alt_mode==0
|
|
|
|
|
.subsection 1
|
|
|
|
|
.else
|
|
|
|
|
.previous
|
|
|
|
|
.endif
|
|
|
|
|
663:
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Complete an alternative code sequence.
|
|
|
|
|
*/
|
|
|
|
|
.macro alternative_endif
|
|
|
|
|
664:
|
2021-04-13 17:08:04 -07:00
|
|
|
.org . - (664b-663b) + (662b-661b)
|
|
|
|
|
.org . - (662b-661b) + (664b-663b)
|
2020-06-30 13:55:59 +01:00
|
|
|
.if .Lasm_alt_mode==0
|
|
|
|
|
.previous
|
|
|
|
|
.endif
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Callback-based alternative epilogue
|
|
|
|
|
*/
|
|
|
|
|
.macro alternative_cb_end
|
|
|
|
|
662:
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Provides a trivial alternative or default sequence consisting solely
|
|
|
|
|
* of NOPs. The number of NOPs is chosen automatically to match the
|
|
|
|
|
* previous case.
|
|
|
|
|
*/
|
|
|
|
|
.macro alternative_else_nop_endif
|
|
|
|
|
alternative_else
|
|
|
|
|
nops (662b-661b) / AARCH64_INSN_SIZE
|
|
|
|
|
alternative_endif
|
|
|
|
|
.endm
|
|
|
|
|
|
|
|
|
|
#define _ALTERNATIVE_CFG(insn1, insn2, cap, cfg, ...) \
|
|
|
|
|
alternative_insn insn1, insn2, cap, IS_ENABLED(cfg)
|
|
|
|
|
|
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature));
|
|
|
|
|
*
|
|
|
|
|
* Usage: asm(ALTERNATIVE(oldinstr, newinstr, feature, CONFIG_FOO));
|
|
|
|
|
* N.B. If CONFIG_FOO is specified, but not selected, the whole block
|
|
|
|
|
* will be omitted, including oldinstr.
|
|
|
|
|
*/
|
|
|
|
|
#define ALTERNATIVE(oldinstr, newinstr, ...) \
|
|
|
|
|
_ALTERNATIVE_CFG(oldinstr, newinstr, __VA_ARGS__, 1)
|
|
|
|
|
|
arm64: alternatives: add alternative_has_feature_*()
Currrently we use a mixture of alternative sequences and static branches
to handle features detected at boot time. For ease of maintenance we
generally prefer to use static branches in C code, but this has a few
downsides:
* Each static branch has metadata in the __jump_table section, which is
not discarded after features are finalized. This wastes some space,
and slows down the patching of other static branches.
* The static branches are patched at a different point in time from the
alternatives, so changes are not atomic. This leaves a transient
period where there could be a mismatch between the behaviour of
alternatives and static branches, which could be problematic for some
features (e.g. pseudo-NMI).
* More (instrumentable) kernel code is executed to patch each static
branch, which can be risky when patching certain features (e.g.
irqflags management for pseudo-NMI).
* When CONFIG_JUMP_LABEL=n, static branches are turned into a load of a
flag and a conditional branch. This means it isn't safe to use such
static branches in an alternative address space (e.g. the NVHE/PKVM
hyp code), where the generated address isn't safe to acccess.
To deal with these issues, this patch introduces new
alternative_has_feature_*() helpers, which work like static branches but
are patched using alternatives. This ensures the patching is performed
at the same time as other alternative patching, allows the metadata to
be freed after patching, and is safe for use in alternative address
spaces.
Note that all supported toolchains have asm goto support, and since
commit:
a0a12c3ed057af57 ("asm goto: eradicate CC_HAS_ASM_GOTO)"
... the CC_HAS_ASM_GOTO Kconfig symbol has been removed, so no feature
check is necessary, and we can always make use of asm goto.
Additionally, note that:
* This has no impact on cpus_have_cap(), which is a dynamic check.
* This has no functional impact on cpus_have_const_cap(). The branches
are patched slightly later than before this patch, but these branches
are not reachable until caps have been finalised.
* It is now invalid to use cpus_have_final_cap() in the window between
feature detection and patching. All existing uses are only expected
after patching anyway, so this should not be a problem.
* The LSE atomics will now be enabled during alternatives patching
rather than immediately before. As the LL/SC an LSE atomics are
functionally equivalent this should not be problematic.
When building defconfig with GCC 12.1.0, the resulting Image is 64KiB
smaller:
| % ls -al Image-*
| -rw-r--r-- 1 mark mark 37108224 Aug 23 09:56 Image-after
| -rw-r--r-- 1 mark mark 37173760 Aug 23 09:54 Image-before
According to bloat-o-meter.pl:
| add/remove: 44/34 grow/shrink: 602/1294 up/down: 39692/-61108 (-21416)
| Function old new delta
| [...]
| Total: Before=16618336, After=16596920, chg -0.13%
| add/remove: 0/2 grow/shrink: 0/0 up/down: 0/-1296 (-1296)
| Data old new delta
| arm64_const_caps_ready 16 - -16
| cpu_hwcap_keys 1280 - -1280
| Total: Before=8987120, After=8985824, chg -0.01%
| add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0)
| RO Data old new delta
| Total: Before=18408, After=18408, chg +0.00%
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220912162210.3626215-8-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-12 17:22:09 +01:00
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
|
|
|
|
|
#include <linux/types.h>
|
|
|
|
|
|
|
|
|
|
static __always_inline bool
|
|
|
|
|
alternative_has_feature_likely(unsigned long feature)
|
|
|
|
|
{
|
2022-09-20 15:00:44 +01:00
|
|
|
compiletime_assert(feature < ARM64_NCAPS,
|
|
|
|
|
"feature must be < ARM64_NCAPS");
|
arm64: alternatives: add alternative_has_feature_*()
Currrently we use a mixture of alternative sequences and static branches
to handle features detected at boot time. For ease of maintenance we
generally prefer to use static branches in C code, but this has a few
downsides:
* Each static branch has metadata in the __jump_table section, which is
not discarded after features are finalized. This wastes some space,
and slows down the patching of other static branches.
* The static branches are patched at a different point in time from the
alternatives, so changes are not atomic. This leaves a transient
period where there could be a mismatch between the behaviour of
alternatives and static branches, which could be problematic for some
features (e.g. pseudo-NMI).
* More (instrumentable) kernel code is executed to patch each static
branch, which can be risky when patching certain features (e.g.
irqflags management for pseudo-NMI).
* When CONFIG_JUMP_LABEL=n, static branches are turned into a load of a
flag and a conditional branch. This means it isn't safe to use such
static branches in an alternative address space (e.g. the NVHE/PKVM
hyp code), where the generated address isn't safe to acccess.
To deal with these issues, this patch introduces new
alternative_has_feature_*() helpers, which work like static branches but
are patched using alternatives. This ensures the patching is performed
at the same time as other alternative patching, allows the metadata to
be freed after patching, and is safe for use in alternative address
spaces.
Note that all supported toolchains have asm goto support, and since
commit:
a0a12c3ed057af57 ("asm goto: eradicate CC_HAS_ASM_GOTO)"
... the CC_HAS_ASM_GOTO Kconfig symbol has been removed, so no feature
check is necessary, and we can always make use of asm goto.
Additionally, note that:
* This has no impact on cpus_have_cap(), which is a dynamic check.
* This has no functional impact on cpus_have_const_cap(). The branches
are patched slightly later than before this patch, but these branches
are not reachable until caps have been finalised.
* It is now invalid to use cpus_have_final_cap() in the window between
feature detection and patching. All existing uses are only expected
after patching anyway, so this should not be a problem.
* The LSE atomics will now be enabled during alternatives patching
rather than immediately before. As the LL/SC an LSE atomics are
functionally equivalent this should not be problematic.
When building defconfig with GCC 12.1.0, the resulting Image is 64KiB
smaller:
| % ls -al Image-*
| -rw-r--r-- 1 mark mark 37108224 Aug 23 09:56 Image-after
| -rw-r--r-- 1 mark mark 37173760 Aug 23 09:54 Image-before
According to bloat-o-meter.pl:
| add/remove: 44/34 grow/shrink: 602/1294 up/down: 39692/-61108 (-21416)
| Function old new delta
| [...]
| Total: Before=16618336, After=16596920, chg -0.13%
| add/remove: 0/2 grow/shrink: 0/0 up/down: 0/-1296 (-1296)
| Data old new delta
| arm64_const_caps_ready 16 - -16
| cpu_hwcap_keys 1280 - -1280
| Total: Before=8987120, After=8985824, chg -0.01%
| add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0)
| RO Data old new delta
| Total: Before=18408, After=18408, chg +0.00%
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220912162210.3626215-8-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-12 17:22:09 +01:00
|
|
|
|
|
|
|
|
asm_volatile_goto(
|
arm64: alternatives: add shared NOP callback
For each instance of an alternative, the compiler outputs a distinct
copy of the alternative instructions into a subsection. As the compiler
doesn't have special knowledge of alternatives, it cannot coalesce these
to save space.
In a defconfig kernel built with GCC 12.1.0, there are approximately
10,000 instances of alternative_has_feature_likely(), where the
replacement instruction is always a NOP. As NOPs are
position-independent, we don't need a unique copy per alternative
sequence.
This patch adds a callback to patch an alternative sequence with NOPs,
and make use of this in alternative_has_feature_likely(). So that this
can be used for other sites in future, this is written to patch multiple
instructions up to the original sequence length.
For NVHE, an alias is added to image-vars.h.
For modules, the callback is exported. Note that as modules are loaded
within 2GiB of the kernel, an alt_instr entry in a module can always
refer directly to the callback, and no special handling is necessary.
When building with GCC 12.1.0, the vmlinux is ~158KiB smaller, though
the resulting Image size is unchanged due to alignment constraints and
padding:
| % ls -al vmlinux-*
| -rwxr-xr-x 1 mark mark 134644592 Sep 1 14:52 vmlinux-after
| -rwxr-xr-x 1 mark mark 134486232 Sep 1 14:50 vmlinux-before
| % ls -al Image-*
| -rw-r--r-- 1 mark mark 37108224 Sep 1 14:52 Image-after
| -rw-r--r-- 1 mark mark 37108224 Sep 1 14:50 Image-before
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220912162210.3626215-9-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-12 17:22:10 +01:00
|
|
|
ALTERNATIVE_CB("b %l[l_no]", %[feature], alt_cb_patch_nops)
|
arm64: alternatives: add alternative_has_feature_*()
Currrently we use a mixture of alternative sequences and static branches
to handle features detected at boot time. For ease of maintenance we
generally prefer to use static branches in C code, but this has a few
downsides:
* Each static branch has metadata in the __jump_table section, which is
not discarded after features are finalized. This wastes some space,
and slows down the patching of other static branches.
* The static branches are patched at a different point in time from the
alternatives, so changes are not atomic. This leaves a transient
period where there could be a mismatch between the behaviour of
alternatives and static branches, which could be problematic for some
features (e.g. pseudo-NMI).
* More (instrumentable) kernel code is executed to patch each static
branch, which can be risky when patching certain features (e.g.
irqflags management for pseudo-NMI).
* When CONFIG_JUMP_LABEL=n, static branches are turned into a load of a
flag and a conditional branch. This means it isn't safe to use such
static branches in an alternative address space (e.g. the NVHE/PKVM
hyp code), where the generated address isn't safe to acccess.
To deal with these issues, this patch introduces new
alternative_has_feature_*() helpers, which work like static branches but
are patched using alternatives. This ensures the patching is performed
at the same time as other alternative patching, allows the metadata to
be freed after patching, and is safe for use in alternative address
spaces.
Note that all supported toolchains have asm goto support, and since
commit:
a0a12c3ed057af57 ("asm goto: eradicate CC_HAS_ASM_GOTO)"
... the CC_HAS_ASM_GOTO Kconfig symbol has been removed, so no feature
check is necessary, and we can always make use of asm goto.
Additionally, note that:
* This has no impact on cpus_have_cap(), which is a dynamic check.
* This has no functional impact on cpus_have_const_cap(). The branches
are patched slightly later than before this patch, but these branches
are not reachable until caps have been finalised.
* It is now invalid to use cpus_have_final_cap() in the window between
feature detection and patching. All existing uses are only expected
after patching anyway, so this should not be a problem.
* The LSE atomics will now be enabled during alternatives patching
rather than immediately before. As the LL/SC an LSE atomics are
functionally equivalent this should not be problematic.
When building defconfig with GCC 12.1.0, the resulting Image is 64KiB
smaller:
| % ls -al Image-*
| -rw-r--r-- 1 mark mark 37108224 Aug 23 09:56 Image-after
| -rw-r--r-- 1 mark mark 37173760 Aug 23 09:54 Image-before
According to bloat-o-meter.pl:
| add/remove: 44/34 grow/shrink: 602/1294 up/down: 39692/-61108 (-21416)
| Function old new delta
| [...]
| Total: Before=16618336, After=16596920, chg -0.13%
| add/remove: 0/2 grow/shrink: 0/0 up/down: 0/-1296 (-1296)
| Data old new delta
| arm64_const_caps_ready 16 - -16
| cpu_hwcap_keys 1280 - -1280
| Total: Before=8987120, After=8985824, chg -0.01%
| add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0)
| RO Data old new delta
| Total: Before=18408, After=18408, chg +0.00%
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220912162210.3626215-8-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-12 17:22:09 +01:00
|
|
|
:
|
|
|
|
|
: [feature] "i" (feature)
|
|
|
|
|
:
|
|
|
|
|
: l_no);
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
l_no:
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static __always_inline bool
|
|
|
|
|
alternative_has_feature_unlikely(unsigned long feature)
|
|
|
|
|
{
|
2022-09-20 15:00:44 +01:00
|
|
|
compiletime_assert(feature < ARM64_NCAPS,
|
|
|
|
|
"feature must be < ARM64_NCAPS");
|
arm64: alternatives: add alternative_has_feature_*()
Currrently we use a mixture of alternative sequences and static branches
to handle features detected at boot time. For ease of maintenance we
generally prefer to use static branches in C code, but this has a few
downsides:
* Each static branch has metadata in the __jump_table section, which is
not discarded after features are finalized. This wastes some space,
and slows down the patching of other static branches.
* The static branches are patched at a different point in time from the
alternatives, so changes are not atomic. This leaves a transient
period where there could be a mismatch between the behaviour of
alternatives and static branches, which could be problematic for some
features (e.g. pseudo-NMI).
* More (instrumentable) kernel code is executed to patch each static
branch, which can be risky when patching certain features (e.g.
irqflags management for pseudo-NMI).
* When CONFIG_JUMP_LABEL=n, static branches are turned into a load of a
flag and a conditional branch. This means it isn't safe to use such
static branches in an alternative address space (e.g. the NVHE/PKVM
hyp code), where the generated address isn't safe to acccess.
To deal with these issues, this patch introduces new
alternative_has_feature_*() helpers, which work like static branches but
are patched using alternatives. This ensures the patching is performed
at the same time as other alternative patching, allows the metadata to
be freed after patching, and is safe for use in alternative address
spaces.
Note that all supported toolchains have asm goto support, and since
commit:
a0a12c3ed057af57 ("asm goto: eradicate CC_HAS_ASM_GOTO)"
... the CC_HAS_ASM_GOTO Kconfig symbol has been removed, so no feature
check is necessary, and we can always make use of asm goto.
Additionally, note that:
* This has no impact on cpus_have_cap(), which is a dynamic check.
* This has no functional impact on cpus_have_const_cap(). The branches
are patched slightly later than before this patch, but these branches
are not reachable until caps have been finalised.
* It is now invalid to use cpus_have_final_cap() in the window between
feature detection and patching. All existing uses are only expected
after patching anyway, so this should not be a problem.
* The LSE atomics will now be enabled during alternatives patching
rather than immediately before. As the LL/SC an LSE atomics are
functionally equivalent this should not be problematic.
When building defconfig with GCC 12.1.0, the resulting Image is 64KiB
smaller:
| % ls -al Image-*
| -rw-r--r-- 1 mark mark 37108224 Aug 23 09:56 Image-after
| -rw-r--r-- 1 mark mark 37173760 Aug 23 09:54 Image-before
According to bloat-o-meter.pl:
| add/remove: 44/34 grow/shrink: 602/1294 up/down: 39692/-61108 (-21416)
| Function old new delta
| [...]
| Total: Before=16618336, After=16596920, chg -0.13%
| add/remove: 0/2 grow/shrink: 0/0 up/down: 0/-1296 (-1296)
| Data old new delta
| arm64_const_caps_ready 16 - -16
| cpu_hwcap_keys 1280 - -1280
| Total: Before=8987120, After=8985824, chg -0.01%
| add/remove: 0/0 grow/shrink: 0/0 up/down: 0/0 (0)
| RO Data old new delta
| Total: Before=18408, After=18408, chg +0.00%
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Joey Gouly <joey.gouly@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20220912162210.3626215-8-mark.rutland@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2022-09-12 17:22:09 +01:00
|
|
|
|
|
|
|
|
asm_volatile_goto(
|
|
|
|
|
ALTERNATIVE("nop", "b %l[l_yes]", %[feature])
|
|
|
|
|
:
|
|
|
|
|
: [feature] "i" (feature)
|
|
|
|
|
:
|
|
|
|
|
: l_yes);
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
l_yes:
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
2020-06-30 13:55:59 +01:00
|
|
|
#endif /* __ASM_ALTERNATIVE_MACROS_H */
|