mirror of
https://github.com/torvalds/linux.git
synced 2024-11-11 14:42:24 +00:00
arm64 fixes:
- Fix FP register corruption when SVE is not available or in use - Fix out-of-tree module build failure when CONFIG_ARM64_MODULE_PLTS=y - Missing 'const' generating errors with LTO builds - Remove unsupported events from Cortex-A73 PMU description - Removal of stale and incorrect comments -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABCgAGBQJaIXOkAAoJELescNyEwWM0swYH/3iSLxKnGDht1M9xqa5V288z eNC/Vw/Y/Sqi305reRK6gWbJ0hwtJLYSEK3tDbeL6C9v9mg8CIZNzbPI3vrEjAq+ n8yKmJVYaXlu9jmmo7vqF7LZ7LRgKZPO0cEKWZBR8LAYjD0zJPikwDR/JvTkGH75 1VnFfwuMykB989NMcVGQ1eD2G5RH13e2j9D2ErT0fbdcZ/MWpcviVVqMr4ggsQoR imVozMPXXLQ/0LeUfr8IRIst3x0CgFwmMX7CDWoVJJJXB7Zq0nvNptEtlS5tUZ/x 1vbXJstFasG3EL6QKiKxfUvtbaa4Vm7xEBBIVABQij+iUw8Og1OBojVi0wBCE3s= =9hCV -----END PGP SIGNATURE----- Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux Pull arm64 fixes from Will Deacon: "The critical one here is a fix for fpsimd register corruption across signals which was introduced by the SVE support code (the register files overlap), but the others are worth having as well. Summary: - Fix FP register corruption when SVE is not available or in use - Fix out-of-tree module build failure when CONFIG_ARM64_MODULE_PLTS=y - Missing 'const' generating errors with LTO builds - Remove unsupported events from Cortex-A73 PMU description - Removal of stale and incorrect comments" * tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: arm64: context: Fix comments and remove pointless smp_wmb() arm64: cpu_ops: Add missing 'const' qualifiers arm64: perf: remove unsupported events for Cortex-A73 arm64: fpsimd: Fix failure to restore FPSIMD state after signals arm64: pgd: Mark pgd_cache as __ro_after_init arm64: ftrace: emit ftrace-mod.o contents through code arm64: module-plts: factor out PLT generation code for ftrace arm64: mm: cleanup stale AIVIVT references
This commit is contained in:
commit
4b1967c90a
@ -83,9 +83,6 @@ endif
|
||||
|
||||
ifeq ($(CONFIG_ARM64_MODULE_PLTS),y)
|
||||
KBUILD_LDFLAGS_MODULE += -T $(srctree)/arch/arm64/kernel/module.lds
|
||||
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
|
||||
KBUILD_LDFLAGS_MODULE += $(objtree)/arch/arm64/kernel/ftrace-mod.o
|
||||
endif
|
||||
endif
|
||||
|
||||
# Default value
|
||||
|
@ -38,7 +38,7 @@
|
||||
*
|
||||
* See Documentation/cachetlb.txt for more information. Please note that
|
||||
* the implementation assumes non-aliasing VIPT D-cache and (aliasing)
|
||||
* VIPT or ASID-tagged VIVT I-cache.
|
||||
* VIPT I-cache.
|
||||
*
|
||||
* flush_cache_mm(mm)
|
||||
*
|
||||
|
@ -32,7 +32,7 @@ struct mod_arch_specific {
|
||||
struct mod_plt_sec init;
|
||||
|
||||
/* for CONFIG_DYNAMIC_FTRACE */
|
||||
void *ftrace_trampoline;
|
||||
struct plt_entry *ftrace_trampoline;
|
||||
};
|
||||
#endif
|
||||
|
||||
@ -45,4 +45,48 @@ extern u64 module_alloc_base;
|
||||
#define module_alloc_base ((u64)_etext - MODULES_VSIZE)
|
||||
#endif
|
||||
|
||||
struct plt_entry {
|
||||
/*
|
||||
* A program that conforms to the AArch64 Procedure Call Standard
|
||||
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
|
||||
* IP1 (x17) may be inserted at any branch instruction that is
|
||||
* exposed to a relocation that supports long branches. Since that
|
||||
* is exactly what we are dealing with here, we are free to use x16
|
||||
* as a scratch register in the PLT veneers.
|
||||
*/
|
||||
__le32 mov0; /* movn x16, #0x.... */
|
||||
__le32 mov1; /* movk x16, #0x...., lsl #16 */
|
||||
__le32 mov2; /* movk x16, #0x...., lsl #32 */
|
||||
__le32 br; /* br x16 */
|
||||
};
|
||||
|
||||
static inline struct plt_entry get_plt_entry(u64 val)
|
||||
{
|
||||
/*
|
||||
* MOVK/MOVN/MOVZ opcode:
|
||||
* +--------+------------+--------+-----------+-------------+---------+
|
||||
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
|
||||
* +--------+------------+--------+-----------+-------------+---------+
|
||||
*
|
||||
* Rd := 0x10 (x16)
|
||||
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
|
||||
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
|
||||
* sf := 1 (64-bit variant)
|
||||
*/
|
||||
return (struct plt_entry){
|
||||
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
|
||||
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
|
||||
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
|
||||
cpu_to_le32(0xd61f0200)
|
||||
};
|
||||
}
|
||||
|
||||
static inline bool plt_entries_equal(const struct plt_entry *a,
|
||||
const struct plt_entry *b)
|
||||
{
|
||||
return a->mov0 == b->mov0 &&
|
||||
a->mov1 == b->mov1 &&
|
||||
a->mov2 == b->mov2;
|
||||
}
|
||||
|
||||
#endif /* __ASM_MODULE_H */
|
||||
|
@ -61,6 +61,3 @@ extra-y += $(head-y) vmlinux.lds
|
||||
ifeq ($(CONFIG_DEBUG_EFI),y)
|
||||
AFLAGS_head.o += -DVMLINUX_PATH="\"$(realpath $(objtree)/vmlinux)\""
|
||||
endif
|
||||
|
||||
# will be included by each individual module but not by the core kernel itself
|
||||
extra-$(CONFIG_DYNAMIC_FTRACE) += ftrace-mod.o
|
||||
|
@ -31,13 +31,13 @@ extern const struct cpu_operations cpu_psci_ops;
|
||||
|
||||
const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init;
|
||||
|
||||
static const struct cpu_operations *dt_supported_cpu_ops[] __initconst = {
|
||||
static const struct cpu_operations *const dt_supported_cpu_ops[] __initconst = {
|
||||
&smp_spin_table_ops,
|
||||
&cpu_psci_ops,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
|
||||
static const struct cpu_operations *const acpi_supported_cpu_ops[] __initconst = {
|
||||
#ifdef CONFIG_ARM64_ACPI_PARKING_PROTOCOL
|
||||
&acpi_parking_protocol_ops,
|
||||
#endif
|
||||
@ -47,7 +47,7 @@ static const struct cpu_operations *acpi_supported_cpu_ops[] __initconst = {
|
||||
|
||||
static const struct cpu_operations * __init cpu_get_ops(const char *name)
|
||||
{
|
||||
const struct cpu_operations **ops;
|
||||
const struct cpu_operations *const *ops;
|
||||
|
||||
ops = acpi_disabled ? dt_supported_cpu_ops : acpi_supported_cpu_ops;
|
||||
|
||||
|
@ -1026,10 +1026,10 @@ void fpsimd_update_current_state(struct fpsimd_state *state)
|
||||
|
||||
local_bh_disable();
|
||||
|
||||
if (system_supports_sve() && test_thread_flag(TIF_SVE)) {
|
||||
current->thread.fpsimd_state = *state;
|
||||
current->thread.fpsimd_state = *state;
|
||||
if (system_supports_sve() && test_thread_flag(TIF_SVE))
|
||||
fpsimd_to_sve(current);
|
||||
}
|
||||
|
||||
task_fpsimd_load();
|
||||
|
||||
if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) {
|
||||
|
@ -1,18 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.section ".text.ftrace_trampoline", "ax"
|
||||
.align 3
|
||||
0: .quad 0
|
||||
__ftrace_trampoline:
|
||||
ldr x16, 0b
|
||||
br x16
|
||||
ENDPROC(__ftrace_trampoline)
|
@ -76,7 +76,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
|
||||
if (offset < -SZ_128M || offset >= SZ_128M) {
|
||||
#ifdef CONFIG_ARM64_MODULE_PLTS
|
||||
unsigned long *trampoline;
|
||||
struct plt_entry trampoline;
|
||||
struct module *mod;
|
||||
|
||||
/*
|
||||
@ -104,22 +104,24 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
|
||||
* is added in the future, but for now, the pr_err() below
|
||||
* deals with a theoretical issue only.
|
||||
*/
|
||||
trampoline = (unsigned long *)mod->arch.ftrace_trampoline;
|
||||
if (trampoline[0] != addr) {
|
||||
if (trampoline[0] != 0) {
|
||||
trampoline = get_plt_entry(addr);
|
||||
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
|
||||
&trampoline)) {
|
||||
if (!plt_entries_equal(mod->arch.ftrace_trampoline,
|
||||
&(struct plt_entry){})) {
|
||||
pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* point the trampoline to our ftrace entry point */
|
||||
module_disable_ro(mod);
|
||||
trampoline[0] = addr;
|
||||
*mod->arch.ftrace_trampoline = trampoline;
|
||||
module_enable_ro(mod, true);
|
||||
|
||||
/* update trampoline before patching in the branch */
|
||||
smp_wmb();
|
||||
}
|
||||
addr = (unsigned long)&trampoline[1];
|
||||
addr = (unsigned long)(void *)mod->arch.ftrace_trampoline;
|
||||
#else /* CONFIG_ARM64_MODULE_PLTS */
|
||||
return -EINVAL;
|
||||
#endif /* CONFIG_ARM64_MODULE_PLTS */
|
||||
|
@ -11,21 +11,6 @@
|
||||
#include <linux/module.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
struct plt_entry {
|
||||
/*
|
||||
* A program that conforms to the AArch64 Procedure Call Standard
|
||||
* (AAPCS64) must assume that a veneer that alters IP0 (x16) and/or
|
||||
* IP1 (x17) may be inserted at any branch instruction that is
|
||||
* exposed to a relocation that supports long branches. Since that
|
||||
* is exactly what we are dealing with here, we are free to use x16
|
||||
* as a scratch register in the PLT veneers.
|
||||
*/
|
||||
__le32 mov0; /* movn x16, #0x.... */
|
||||
__le32 mov1; /* movk x16, #0x...., lsl #16 */
|
||||
__le32 mov2; /* movk x16, #0x...., lsl #32 */
|
||||
__le32 br; /* br x16 */
|
||||
};
|
||||
|
||||
static bool in_init(const struct module *mod, void *loc)
|
||||
{
|
||||
return (u64)loc - (u64)mod->init_layout.base < mod->init_layout.size;
|
||||
@ -40,33 +25,14 @@ u64 module_emit_plt_entry(struct module *mod, void *loc, const Elf64_Rela *rela,
|
||||
int i = pltsec->plt_num_entries;
|
||||
u64 val = sym->st_value + rela->r_addend;
|
||||
|
||||
/*
|
||||
* MOVK/MOVN/MOVZ opcode:
|
||||
* +--------+------------+--------+-----------+-------------+---------+
|
||||
* | sf[31] | opc[30:29] | 100101 | hw[22:21] | imm16[20:5] | Rd[4:0] |
|
||||
* +--------+------------+--------+-----------+-------------+---------+
|
||||
*
|
||||
* Rd := 0x10 (x16)
|
||||
* hw := 0b00 (no shift), 0b01 (lsl #16), 0b10 (lsl #32)
|
||||
* opc := 0b11 (MOVK), 0b00 (MOVN), 0b10 (MOVZ)
|
||||
* sf := 1 (64-bit variant)
|
||||
*/
|
||||
plt[i] = (struct plt_entry){
|
||||
cpu_to_le32(0x92800010 | (((~val ) & 0xffff)) << 5),
|
||||
cpu_to_le32(0xf2a00010 | ((( val >> 16) & 0xffff)) << 5),
|
||||
cpu_to_le32(0xf2c00010 | ((( val >> 32) & 0xffff)) << 5),
|
||||
cpu_to_le32(0xd61f0200)
|
||||
};
|
||||
plt[i] = get_plt_entry(val);
|
||||
|
||||
/*
|
||||
* Check if the entry we just created is a duplicate. Given that the
|
||||
* relocations are sorted, this will be the last entry we allocated.
|
||||
* (if one exists).
|
||||
*/
|
||||
if (i > 0 &&
|
||||
plt[i].mov0 == plt[i - 1].mov0 &&
|
||||
plt[i].mov1 == plt[i - 1].mov1 &&
|
||||
plt[i].mov2 == plt[i - 1].mov2)
|
||||
if (i > 0 && plt_entries_equal(plt + i, plt + i - 1))
|
||||
return (u64)&plt[i - 1];
|
||||
|
||||
pltsec->plt_num_entries++;
|
||||
@ -154,6 +120,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
|
||||
unsigned long core_plts = 0;
|
||||
unsigned long init_plts = 0;
|
||||
Elf64_Sym *syms = NULL;
|
||||
Elf_Shdr *tramp = NULL;
|
||||
int i;
|
||||
|
||||
/*
|
||||
@ -165,6 +132,10 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
|
||||
mod->arch.core.plt = sechdrs + i;
|
||||
else if (!strcmp(secstrings + sechdrs[i].sh_name, ".init.plt"))
|
||||
mod->arch.init.plt = sechdrs + i;
|
||||
else if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) &&
|
||||
!strcmp(secstrings + sechdrs[i].sh_name,
|
||||
".text.ftrace_trampoline"))
|
||||
tramp = sechdrs + i;
|
||||
else if (sechdrs[i].sh_type == SHT_SYMTAB)
|
||||
syms = (Elf64_Sym *)sechdrs[i].sh_addr;
|
||||
}
|
||||
@ -215,5 +186,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
|
||||
mod->arch.init.plt_num_entries = 0;
|
||||
mod->arch.init.plt_max_entries = init_plts;
|
||||
|
||||
if (tramp) {
|
||||
tramp->sh_type = SHT_NOBITS;
|
||||
tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
|
||||
tramp->sh_addralign = __alignof__(struct plt_entry);
|
||||
tramp->sh_size = sizeof(struct plt_entry);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
SECTIONS {
|
||||
.plt (NOLOAD) : { BYTE(0) }
|
||||
.init.plt (NOLOAD) : { BYTE(0) }
|
||||
.text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
|
||||
}
|
||||
|
@ -262,12 +262,6 @@ static const unsigned armv8_a73_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR,
|
||||
|
||||
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
|
||||
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
|
||||
|
||||
[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD,
|
||||
[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR,
|
||||
};
|
||||
|
||||
static const unsigned armv8_thunder_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
|
@ -96,12 +96,6 @@ static void flush_context(unsigned int cpu)
|
||||
|
||||
set_reserved_asid_bits();
|
||||
|
||||
/*
|
||||
* Ensure the generation bump is observed before we xchg the
|
||||
* active_asids.
|
||||
*/
|
||||
smp_wmb();
|
||||
|
||||
for_each_possible_cpu(i) {
|
||||
asid = atomic64_xchg_relaxed(&per_cpu(active_asids, i), 0);
|
||||
/*
|
||||
@ -117,7 +111,10 @@ static void flush_context(unsigned int cpu)
|
||||
per_cpu(reserved_asids, i) = asid;
|
||||
}
|
||||
|
||||
/* Queue a TLB invalidate and flush the I-cache if necessary. */
|
||||
/*
|
||||
* Queue a TLB invalidation for each CPU to perform on next
|
||||
* context-switch
|
||||
*/
|
||||
cpumask_setall(&tlb_flush_pending);
|
||||
}
|
||||
|
||||
@ -202,11 +199,18 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu)
|
||||
asid = atomic64_read(&mm->context.id);
|
||||
|
||||
/*
|
||||
* The memory ordering here is subtle. We rely on the control
|
||||
* dependency between the generation read and the update of
|
||||
* active_asids to ensure that we are synchronised with a
|
||||
* parallel rollover (i.e. this pairs with the smp_wmb() in
|
||||
* flush_context).
|
||||
* The memory ordering here is subtle.
|
||||
* If our ASID matches the current generation, then we update
|
||||
* our active_asids entry with a relaxed xchg. Racing with a
|
||||
* concurrent rollover means that either:
|
||||
*
|
||||
* - We get a zero back from the xchg and end up waiting on the
|
||||
* lock. Taking the lock synchronises with the rollover and so
|
||||
* we are forced to see the updated generation.
|
||||
*
|
||||
* - We get a valid ASID back from the xchg, which means the
|
||||
* relaxed xchg in flush_context will treat us as reserved
|
||||
* because atomic RmWs are totally ordered for a given location.
|
||||
*/
|
||||
if (!((asid ^ atomic64_read(&asid_generation)) >> asid_bits)
|
||||
&& atomic64_xchg_relaxed(&per_cpu(active_asids, cpu), asid))
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include <asm/page.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
static struct kmem_cache *pgd_cache;
|
||||
static struct kmem_cache *pgd_cache __ro_after_init;
|
||||
|
||||
pgd_t *pgd_alloc(struct mm_struct *mm)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user