Merge branch 'for-next/boot' into for-next/core

* for-next/boot: (34 commits)
  arm64: fix KASAN_INLINE
  arm64: Add an override for ID_AA64SMFR0_EL1.FA64
  arm64: Add the arm64.nosve command line option
  arm64: Add the arm64.nosme command line option
  arm64: Expose a __check_override primitive for oddball features
  arm64: Allow the idreg override to deal with variable field width
  arm64: Factor out checking of a feature against the override into a macro
  arm64: Allow sticky E2H when entering EL1
  arm64: Save state of HCR_EL2.E2H before switch to EL1
  arm64: Rename the VHE switch to "finalise_el2"
  arm64: mm: fix booting with 52-bit address space
  arm64: head: remove __PHYS_OFFSET
  arm64: lds: use PROVIDE instead of conditional definitions
  arm64: setup: drop early FDT pointer helpers
  arm64: head: avoid relocating the kernel twice for KASLR
  arm64: kaslr: defer initialization to initcall where permitted
  arm64: head: record CPU boot mode after enabling the MMU
  arm64: head: populate kernel page tables with MMU and caches on
  arm64: head: factor out TTBR1 assignment into a macro
  arm64: idreg-override: use early FDT mapping in ID map
  ...
This commit is contained in:
Will Deacon 2022-07-25 10:59:15 +01:00
commit f96d67a8af
24 changed files with 753 additions and 615 deletions

View File

@ -400,6 +400,12 @@
arm64.nomte [ARM64] Unconditionally disable Memory Tagging Extension
support
arm64.nosve [ARM64] Unconditionally disable Scalable Vector
Extension support
arm64.nosme [ARM64] Unconditionally disable Scalable Matrix
Extension support
ataflop= [HW,M68k]
atarimouse= [HW,MOUSE] Atari Mouse

View File

@ -60,12 +60,13 @@ these functions (see arch/arm{,64}/include/asm/virt.h):
* ::
x0 = HVC_VHE_RESTART (arm64 only)
x0 = HVC_FINALISE_EL2 (arm64 only)
Attempt to upgrade the kernel's exception level from EL1 to EL2 by enabling
the VHE mode. This is conditioned by the CPU supporting VHE, the EL2 MMU
being off, and VHE not being disabled by any other means (command line
option, for example).
Finish configuring EL2 depending on the command-line options,
including an attempt to upgrade the kernel's exception level from
EL1 to EL2 by enabling the VHE mode. This is conditioned by the CPU
supporting VHE, the EL2 MMU being off, and VHE not being disabled by
any other means (command line option, for example).
Any other value of r0/x0 triggers a hypervisor-specific handling,
which is not documented here.

View File

@ -359,6 +359,20 @@ alternative_cb_end
bfi \valreg, \t1sz, #TCR_T1SZ_OFFSET, #TCR_TxSZ_WIDTH
.endm
/*
* idmap_get_t0sz - get the T0SZ value needed to cover the ID map
*
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
* this number conveniently equals the number of leading zeroes in
* the physical address of _end.
*/
.macro idmap_get_t0sz, reg
adrp \reg, _end
orr \reg, \reg, #(1 << VA_BITS_MIN) - 1
clz \reg, \reg
.endm
/*
* tcr_compute_pa_size - set TCR.(I)PS to the highest supported
* ID_AA64MMFR0_EL1.PARange value
@ -465,6 +479,18 @@ alternative_endif
_cond_uaccess_extable .Licache_op\@, \fixup
.endm
/*
* load_ttbr1 - install @pgtbl as a TTBR1 page table
* pgtbl preserved
* tmp1/tmp2 clobbered, either may overlap with pgtbl
*/
.macro load_ttbr1, pgtbl, tmp1, tmp2
phys_to_ttbr \tmp1, \pgtbl
offset_ttbr1 \tmp1, \tmp2
msr ttbr1_el1, \tmp1
isb
.endm
/*
* To prevent the possibility of old and new partial table walks being visible
* in the tlb, switch the ttbr to a zero page when we invalidate the old
@ -478,10 +504,7 @@ alternative_endif
isb
tlbi vmalle1
dsb nsh
phys_to_ttbr \tmp, \page_table
offset_ttbr1 \tmp, \tmp2
msr ttbr1_el1, \tmp
isb
load_ttbr1 \page_table, \tmp, \tmp2
.endm
/*

View File

@ -908,7 +908,10 @@ static inline unsigned int get_vmid_bits(u64 mmfr1)
}
extern struct arm64_ftr_override id_aa64mmfr1_override;
extern struct arm64_ftr_override id_aa64pfr0_override;
extern struct arm64_ftr_override id_aa64pfr1_override;
extern struct arm64_ftr_override id_aa64zfr0_override;
extern struct arm64_ftr_override id_aa64smfr0_override;
extern struct arm64_ftr_override id_aa64isar1_override;
extern struct arm64_ftr_override id_aa64isar2_override;

View File

@ -129,64 +129,6 @@
msr cptr_el2, x0 // Disable copro. traps to EL2
.endm
/* SVE register access */
.macro __init_el2_nvhe_sve
mrs x1, id_aa64pfr0_el1
ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4
cbz x1, .Lskip_sve_\@
bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
.Lskip_sve_\@:
.endm
/* SME register access and priority mapping */
.macro __init_el2_nvhe_sme
mrs x1, id_aa64pfr1_el1
ubfx x1, x1, #ID_AA64PFR1_SME_SHIFT, #4
cbz x1, .Lskip_sme_\@
bic x0, x0, #CPTR_EL2_TSM // Also disable SME traps
msr cptr_el2, x0 // Disable copro. traps to EL2
isb
mrs x1, sctlr_el2
orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
msr sctlr_el2, x1
isb
mov x1, #0 // SMCR controls
mrs_s x2, SYS_ID_AA64SMFR0_EL1
ubfx x2, x2, #ID_AA64SMFR0_EL1_FA64_SHIFT, #1 // Full FP in SM?
cbz x2, .Lskip_sme_fa64_\@
orr x1, x1, SMCR_ELx_FA64_MASK
.Lskip_sme_fa64_\@:
orr x1, x1, #SMCR_ELx_LEN_MASK // Enable full SME vector
msr_s SYS_SMCR_EL2, x1 // length for EL1.
mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
cbz x1, .Lskip_sme_\@
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
cbz x1, .Lskip_sme_\@
mrs_s x1, SYS_HCRX_EL2
orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
msr_s SYS_HCRX_EL2, x1
.Lskip_sme_\@:
.endm
/* Disable any fine grained traps */
.macro __init_el2_fgt
mrs x1, id_aa64mmfr0_el1
@ -250,8 +192,6 @@
__init_el2_hstr
__init_el2_nvhe_idregs
__init_el2_nvhe_cptr
__init_el2_nvhe_sve
__init_el2_nvhe_sme
__init_el2_fgt
__init_el2_nvhe_prepare_eret
.endm

View File

@ -8,6 +8,7 @@
#ifndef __ASM_KERNEL_PGTABLE_H
#define __ASM_KERNEL_PGTABLE_H
#include <asm/boot.h>
#include <asm/pgtable-hwdef.h>
#include <asm/sparsemem.h>
@ -35,10 +36,8 @@
*/
#if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS - 1)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT) - 1)
#else
#define SWAPPER_PGTABLE_LEVELS (CONFIG_PGTABLE_LEVELS)
#define IDMAP_PGTABLE_LEVELS (ARM64_HW_PGTABLE_LEVELS(PHYS_MASK_SHIFT))
#endif
@ -87,7 +86,14 @@
+ EARLY_PUDS((vstart), (vend)) /* each PUD needs a next level page table */ \
+ EARLY_PMDS((vstart), (vend))) /* each PMD needs a next level page table */
#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end))
#define IDMAP_DIR_SIZE (IDMAP_PGTABLE_LEVELS * PAGE_SIZE)
/* the initial ID map may need two extra pages if it needs to be extended */
#if VA_BITS < 48
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
#else
#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
#endif
#define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE)
/* Initial memory map size */
#if ARM64_KERNEL_USES_PMD_MAPS
@ -107,9 +113,11 @@
#define SWAPPER_PMD_FLAGS (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S)
#if ARM64_KERNEL_USES_PMD_MAPS
#define SWAPPER_MM_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
#define SWAPPER_RW_MMUFLAGS (PMD_ATTRINDX(MT_NORMAL) | SWAPPER_PMD_FLAGS)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PMD_SECT_RDONLY)
#else
#define SWAPPER_MM_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
#define SWAPPER_RW_MMUFLAGS (PTE_ATTRINDX(MT_NORMAL) | SWAPPER_PTE_FLAGS)
#define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY)
#endif
/*

View File

@ -174,7 +174,11 @@
#include <linux/types.h>
#include <asm/bug.h>
#if VA_BITS > 48
extern u64 vabits_actual;
#else
#define vabits_actual ((u64)VA_BITS)
#endif
extern s64 memstart_addr;
/* PHYS_OFFSET - the physical address of the start of memory. */

View File

@ -60,8 +60,7 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
* TCR_T0SZ(VA_BITS), unless system RAM is positioned very high in
* physical memory, in which case it will be smaller.
*/
extern u64 idmap_t0sz;
extern u64 idmap_ptrs_per_pgd;
extern int idmap_t0sz;
/*
* Ensure TCR.T0SZ is set to the provided value.
@ -106,13 +105,18 @@ static inline void cpu_uninstall_idmap(void)
cpu_switch_mm(mm->pgd, mm);
}
static inline void cpu_install_idmap(void)
static inline void __cpu_install_idmap(pgd_t *idmap)
{
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_idmap_tcr_t0sz();
cpu_switch_mm(lm_alias(idmap_pg_dir), &init_mm);
cpu_switch_mm(lm_alias(idmap), &init_mm);
}
static inline void cpu_install_idmap(void)
{
__cpu_install_idmap(idmap_pg_dir);
}
/*
@ -143,7 +147,7 @@ static inline void cpu_install_ttbr0(phys_addr_t ttbr0, unsigned long t0sz)
* Atomically replaces the active TTBR1_EL1 PGD with a new VA-compatible PGD,
* avoiding the possibility of conflicting TLB entries being allocated.
*/
static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
@ -166,7 +170,7 @@ static inline void __nocfi cpu_replace_ttbr1(pgd_t *pgdp)
replace_phys = (void *)__pa_symbol(function_nocfi(idmap_cpu_replace_ttbr1));
cpu_install_idmap();
__cpu_install_idmap(idmap);
replace_phys(ttbr1);
cpu_uninstall_idmap();
}

View File

@ -36,9 +36,9 @@
#define HVC_RESET_VECTORS 2
/*
* HVC_VHE_RESTART - Upgrade the CPU from EL1 to EL2, if possible
* HVC_FINALISE_EL2 - Upgrade the CPU from EL1 to EL2, if possible
*/
#define HVC_VHE_RESTART 3
#define HVC_FINALISE_EL2 3
/* Max number of HYP stub hypercalls */
#define HVC_STUB_HCALL_NR 4
@ -49,6 +49,13 @@
#define BOOT_CPU_MODE_EL1 (0xe11)
#define BOOT_CPU_MODE_EL2 (0xe12)
/*
* Flags returned together with the boot mode, but not preserved in
* __boot_cpu_mode. Used by the idreg override code to work out the
* boot state.
*/
#define BOOT_CPU_FLAG_E2H BIT_ULL(32)
#ifndef __ASSEMBLY__
#include <asm/ptrace.h>

View File

@ -64,7 +64,7 @@ obj-$(CONFIG_ACPI) += acpi.o
obj-$(CONFIG_ACPI_NUMA) += acpi_numa.o
obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o
obj-$(CONFIG_PARAVIRT) += paravirt.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o pi/
obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \

View File

@ -633,7 +633,10 @@ static const struct arm64_ftr_bits ftr_raz[] = {
__ARM64_FTR_REG_OVERRIDE(#id, id, table, &no_override)
struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64pfr1_override;
struct arm64_ftr_override __ro_after_init id_aa64zfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64smfr0_override;
struct arm64_ftr_override __ro_after_init id_aa64isar1_override;
struct arm64_ftr_override __ro_after_init id_aa64isar2_override;
@ -670,11 +673,14 @@ static const struct __ftr_reg_entry {
ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5),
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0,
&id_aa64pfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1,
&id_aa64pfr1_override),
ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
ARM64_FTR_REG(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0,
&id_aa64zfr0_override),
ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64SMFR0_EL1, ftr_id_aa64smfr0,
&id_aa64smfr0_override),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@ -3295,7 +3301,7 @@ subsys_initcall_sync(init_32bit_el0_mask);
static void __maybe_unused cpu_enable_cnp(struct arm64_cpu_capabilities const *cap)
{
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
}
/*

View File

@ -37,8 +37,6 @@
#include "efi-header.S"
#define __PHYS_OFFSET KERNEL_START
#if (PAGE_OFFSET & 0x1fffff) != 0
#error PAGE_OFFSET must be at least 2MB aligned
#endif
@ -51,9 +49,6 @@
* MMU = off, D-cache = off, I-cache = on or off,
* x0 = physical address to the FDT blob.
*
* This code is mostly position independent so you call this at
* __pa(PAGE_OFFSET).
*
* Note that the callee-saved registers are used for storing variables
* that are useful before the MMU is enabled. The allocations are described
* in the entry routines.
@ -82,25 +77,34 @@
* primary lowlevel boot path:
*
* Register Scope Purpose
* x20 primary_entry() .. __primary_switch() CPU boot mode
* x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0
* x22 create_idmap() .. start_kernel() ID map VA of the DT blob
* x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset
* x28 __create_page_tables() callee preserved temp register
* x19/x20 __primary_switch() callee preserved temp registers
* x24 __primary_switch() .. relocate_kernel() current RELR displacement
* x24 __primary_switch() linear map KASLR seed
* x25 primary_entry() .. start_kernel() supported VA size
* x28 create_idmap() callee preserved temp register
*/
SYM_CODE_START(primary_entry)
bl preserve_boot_args
bl init_kernel_el // w0=cpu_boot_mode
adrp x23, __PHYS_OFFSET
and x23, x23, MIN_KIMG_ALIGN - 1 // KASLR offset, defaults to 0
bl set_cpu_boot_mode_flag
bl __create_page_tables
mov x20, x0
bl create_idmap
/*
* The following calls CPU setup code, see arch/arm64/mm/proc.S for
* details.
* On return, the CPU will be ready for the MMU to be turned on and
* the TCR will have been set.
*/
#if VA_BITS > 48
mrs_s x0, SYS_ID_AA64MMFR2_EL1
tst x0, #0xf << ID_AA64MMFR2_LVA_SHIFT
mov x0, #VA_BITS
mov x25, #VA_BITS_MIN
csel x25, x25, x0, eq
mov x0, x25
#endif
bl __cpu_setup // initialise processor
b __primary_switch
SYM_CODE_END(primary_entry)
@ -122,28 +126,16 @@ SYM_CODE_START_LOCAL(preserve_boot_args)
b dcache_inval_poc // tail call
SYM_CODE_END(preserve_boot_args)
/*
* Macro to create a table entry to the next page.
*
* tbl: page table address
* virt: virtual address
* shift: #imm page table shift
* ptrs: #imm pointers per table page
*
* Preserves: virt
* Corrupts: ptrs, tmp1, tmp2
* Returns: tbl -> next level table page address
*/
.macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2
add \tmp1, \tbl, #PAGE_SIZE
phys_to_pte \tmp2, \tmp1
orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type
lsr \tmp1, \virt, #\shift
sub \ptrs, \ptrs, #1
and \tmp1, \tmp1, \ptrs // table index
str \tmp2, [\tbl, \tmp1, lsl #3]
add \tbl, \tbl, #PAGE_SIZE // next level table page
.endm
SYM_FUNC_START_LOCAL(clear_page_tables)
/*
* Clear the init page tables.
*/
adrp x0, init_pg_dir
adrp x1, init_pg_end
sub x2, x1, x0
mov x1, xzr
b __pi_memset // tail call
SYM_FUNC_END(clear_page_tables)
/*
* Macro to populate page table entries, these entries can be pointers to the next level
@ -179,31 +171,20 @@ SYM_CODE_END(preserve_boot_args)
* vstart: virtual address of start of range
* vend: virtual address of end of range - we map [vstart, vend]
* shift: shift used to transform virtual address into index
* ptrs: number of entries in page table
* order: #imm 2log(number of entries in page table)
* istart: index in table corresponding to vstart
* iend: index in table corresponding to vend
* count: On entry: how many extra entries were required in previous level, scales
* our end index.
* On exit: returns how many extra entries required for next page table level
*
* Preserves: vstart, vend, shift, ptrs
* Preserves: vstart, vend
* Returns: istart, iend, count
*/
.macro compute_indices, vstart, vend, shift, ptrs, istart, iend, count
lsr \iend, \vend, \shift
mov \istart, \ptrs
sub \istart, \istart, #1
and \iend, \iend, \istart // iend = (vend >> shift) & (ptrs - 1)
mov \istart, \ptrs
mul \istart, \istart, \count
add \iend, \iend, \istart // iend += count * ptrs
// our entries span multiple tables
lsr \istart, \vstart, \shift
mov \count, \ptrs
sub \count, \count, #1
and \istart, \istart, \count
.macro compute_indices, vstart, vend, shift, order, istart, iend, count
ubfx \istart, \vstart, \shift, \order
ubfx \iend, \vend, \shift, \order
add \iend, \iend, \count, lsl \order
sub \count, \iend, \istart
.endm
@ -218,119 +199,116 @@ SYM_CODE_END(preserve_boot_args)
* vend: virtual address of end of range - we map [vstart, vend - 1]
* flags: flags to use to map last level entries
* phys: physical address corresponding to vstart - physical memory is contiguous
* pgds: the number of pgd entries
* order: #imm 2log(number of entries in PGD table)
*
* If extra_shift is set, an extra level will be populated if the end address does
* not fit in 'extra_shift' bits. This assumes vend is in the TTBR0 range.
*
* Temporaries: istart, iend, tmp, count, sv - these need to be different registers
* Preserves: vstart, flags
* Corrupts: tbl, rtbl, vend, istart, iend, tmp, count, sv
*/
.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, pgds, istart, iend, tmp, count, sv
.macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift
sub \vend, \vend, #1
add \rtbl, \tbl, #PAGE_SIZE
mov \sv, \rtbl
mov \count, #0
compute_indices \vstart, \vend, #PGDIR_SHIFT, \pgds, \istart, \iend, \count
.ifnb \extra_shift
tst \vend, #~((1 << (\extra_shift)) - 1)
b.eq .L_\@
compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
.endif
.L_\@:
compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#if SWAPPER_PGTABLE_LEVELS > 3
compute_indices \vstart, \vend, #PUD_SHIFT, #PTRS_PER_PUD, \istart, \iend, \count
compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
mov \sv, \rtbl
#endif
#if SWAPPER_PGTABLE_LEVELS > 2
compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #PTRS_PER_PMD, \istart, \iend, \count
compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
mov \sv, \rtbl
populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp
mov \tbl, \sv
#endif
compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #PTRS_PER_PTE, \istart, \iend, \count
bic \count, \phys, #SWAPPER_BLOCK_SIZE - 1
populate_entries \tbl, \count, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count
bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1
populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp
.endm
/*
* Setup the initial page tables. We only setup the barest amount which is
* required to get the kernel running. The following sections are required:
* - identity mapping to enable the MMU (low address, TTBR0)
* - first few MB of the kernel linear mapping to jump to once the MMU has
* been enabled
* Remap a subregion created with the map_memory macro with modified attributes
* or output address. The entire remapped region must have been covered in the
* invocation of map_memory.
*
* x0: last level table address (returned in first argument to map_memory)
* x1: start VA of the existing mapping
* x2: start VA of the region to update
* x3: end VA of the region to update (exclusive)
* x4: start PA associated with the region to update
* x5: attributes to set on the updated region
* x6: order of the last level mappings
*/
SYM_FUNC_START_LOCAL(__create_page_tables)
SYM_FUNC_START_LOCAL(remap_region)
sub x3, x3, #1 // make end inclusive
// Get the index offset for the start of the last level table
lsr x1, x1, x6
bfi x1, xzr, #0, #PAGE_SHIFT - 3
// Derive the start and end indexes into the last level table
// associated with the provided region
lsr x2, x2, x6
lsr x3, x3, x6
sub x2, x2, x1
sub x3, x3, x1
mov x1, #1
lsl x6, x1, x6 // block size at this level
populate_entries x0, x4, x2, x3, x5, x6, x7
ret
SYM_FUNC_END(remap_region)
SYM_FUNC_START_LOCAL(create_idmap)
mov x28, lr
/*
* Invalidate the init page tables to avoid potential dirty cache lines
* being evicted. Other page tables are allocated in rodata as part of
* the kernel image, and thus are clean to the PoC per the boot
* protocol.
*/
adrp x0, init_pg_dir
adrp x1, init_pg_end
bl dcache_inval_poc
/*
* Clear the init page tables.
*/
adrp x0, init_pg_dir
adrp x1, init_pg_end
sub x1, x1, x0
1: stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
stp xzr, xzr, [x0], #16
subs x1, x1, #64
b.ne 1b
mov x7, SWAPPER_MM_MMUFLAGS
/*
* Create the identity mapping.
*/
adrp x0, idmap_pg_dir
adrp x3, __idmap_text_start // __pa(__idmap_text_start)
#ifdef CONFIG_ARM64_VA_BITS_52
mrs_s x6, SYS_ID_AA64MMFR2_EL1
and x6, x6, #(0xf << ID_AA64MMFR2_LVA_SHIFT)
mov x5, #52
cbnz x6, 1f
#endif
mov x5, #VA_BITS_MIN
1:
adr_l x6, vabits_actual
str x5, [x6]
dmb sy
dc ivac, x6 // Invalidate potentially stale cache line
/*
* VA_BITS may be too small to allow for an ID mapping to be created
* that covers system RAM if that is located sufficiently high in the
* physical address space. So for the ID map, use an extended virtual
* range in that case, and configure an additional translation level
* if needed.
* The ID map carries a 1:1 mapping of the physical address range
* covered by the loaded image, which could be anywhere in DRAM. This
* means that the required size of the VA (== PA) space is decided at
* boot time, and could be more than the configured size of the VA
* space for ordinary kernel and user space mappings.
*
* Calculate the maximum allowed value for TCR_EL1.T0SZ so that the
* entire ID map region can be mapped. As T0SZ == (64 - #bits used),
* this number conveniently equals the number of leading zeroes in
* the physical address of __idmap_text_end.
* There are three cases to consider here:
* - 39 <= VA_BITS < 48, and the ID map needs up to 48 VA bits to cover
* the placement of the image. In this case, we configure one extra
* level of translation on the fly for the ID map only. (This case
* also covers 42-bit VA/52-bit PA on 64k pages).
*
* - VA_BITS == 48, and the ID map needs more than 48 VA bits. This can
* only happen when using 64k pages, in which case we need to extend
* the root level table rather than add a level. Note that we can
* treat this case as 'always extended' as long as we take care not
* to program an unsupported T0SZ value into the TCR register.
*
* - Combinations that would require two additional levels of
* translation are not supported, e.g., VA_BITS==36 on 16k pages, or
* VA_BITS==39/4k pages with 5-level paging, where the input address
* requires more than 47 or 48 bits, respectively.
*/
adrp x5, __idmap_text_end
clz x5, x5
cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
b.ge 1f // .. then skip VA range extension
adr_l x6, idmap_t0sz
str x5, [x6]
dmb sy
dc ivac, x6 // Invalidate potentially stale cache line
#if (VA_BITS < 48)
#define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT)
#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3)
#define EXTRA_PTRS (1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
/*
* If VA_BITS < 48, we have to configure an additional table level.
@ -342,36 +320,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
#if VA_BITS != EXTRA_SHIFT
#error "Mismatch between VA_BITS and page size/number of translation levels"
#endif
mov x4, EXTRA_PTRS
create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
#else
#define IDMAP_PGD_ORDER (PHYS_MASK_SHIFT - PGDIR_SHIFT)
#define EXTRA_SHIFT
/*
* If VA_BITS == 48, we don't have to configure an additional
* translation level, but the top-level table has more entries.
*/
mov x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
str_l x4, idmap_ptrs_per_pgd, x5
#endif
1:
ldr_l x4, idmap_ptrs_per_pgd
adr_l x6, __idmap_text_end // __pa(__idmap_text_end)
adrp x0, init_idmap_pg_dir
adrp x3, _text
adrp x6, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
mov x7, SWAPPER_RX_MMUFLAGS
map_memory x0, x1, x3, x6, x7, x3, x4, x10, x11, x12, x13, x14
map_memory x0, x1, x3, x6, x7, x3, IDMAP_PGD_ORDER, x10, x11, x12, x13, x14, EXTRA_SHIFT
/*
* Map the kernel image (starting with PHYS_OFFSET).
*/
adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
mov x4, PTRS_PER_PGD
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
/* Remap the kernel page tables r/w in the ID map */
adrp x1, _text
adrp x2, init_pg_dir
adrp x3, init_pg_end
bic x4, x2, #SWAPPER_BLOCK_SIZE - 1
mov x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
map_memory x0, x1, x5, x6, x7, x3, x4, x10, x11, x12, x13, x14
/* Remap the FDT after the kernel image */
adrp x1, _text
adrp x22, _end + SWAPPER_BLOCK_SIZE
bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
mov x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
/*
* Since the page tables have been populated with non-cacheable
@ -380,16 +362,27 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
*/
dmb sy
adrp x0, idmap_pg_dir
adrp x1, idmap_pg_end
adrp x0, init_idmap_pg_dir
adrp x1, init_idmap_pg_end
bl dcache_inval_poc
adrp x0, init_pg_dir
adrp x1, init_pg_end
bl dcache_inval_poc
ret x28
SYM_FUNC_END(__create_page_tables)
SYM_FUNC_END(create_idmap)
SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
add x5, x5, x23 // add KASLR displacement
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
mov x7, SWAPPER_RW_MMUFLAGS
map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
dsb ishst // sync with page table walker
ret
SYM_FUNC_END(create_kernel_mapping)
/*
* Initialize CPU registers with task-specific and cpu-specific context.
@ -420,7 +413,7 @@ SYM_FUNC_END(__create_page_tables)
/*
* The following fragment of code is executed with the MMU enabled.
*
* x0 = __PHYS_OFFSET
* x0 = __pa(KERNEL_START)
*/
SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task
@ -439,6 +432,9 @@ SYM_FUNC_START_LOCAL(__primary_switched)
sub x4, x4, x0 // the kernel virtual and
str_l x4, kimage_voffset, x5 // physical mappings
mov x0, x20
bl set_cpu_boot_mode_flag
// Clear BSS
adr_l x0, __bss_start
mov x1, xzr
@ -447,35 +443,30 @@ SYM_FUNC_START_LOCAL(__primary_switched)
bl __pi_memset
dsb ishst // Make zero page visible to PTW
#if VA_BITS > 48
adr_l x8, vabits_actual // Set this early so KASAN early init
str x25, [x8] // ... observes the correct value
dc civac, x8 // Make visible to booting secondaries
#endif
#ifdef CONFIG_RANDOMIZE_BASE
adrp x5, memstart_offset_seed // Save KASLR linear map seed
strh w24, [x5, :lo12:memstart_offset_seed]
#endif
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
bl kasan_early_init
#endif
mov x0, x21 // pass FDT address in x0
bl early_fdt_map // Try mapping the FDT early
mov x0, x20 // pass the full boot status
bl init_feature_override // Parse cpu feature overrides
#ifdef CONFIG_RANDOMIZE_BASE
tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized?
b.ne 0f
bl kaslr_early_init // parse FDT for KASLR options
cbz x0, 0f // KASLR disabled? just proceed
orr x23, x23, x0 // record KASLR offset
ldp x29, x30, [sp], #16 // we must enable KASLR, return
ret // to __primary_switch()
0:
#endif
bl switch_to_vhe // Prefer VHE if possible
mov x0, x20
bl finalise_el2 // Prefer VHE if possible
ldp x29, x30, [sp], #16
bl start_kernel
ASM_BUG()
SYM_FUNC_END(__primary_switched)
.pushsection ".rodata", "a"
SYM_DATA_START(kimage_vaddr)
.quad _text
SYM_DATA_END(kimage_vaddr)
EXPORT_SYMBOL(kimage_vaddr)
.popsection
/*
* end early head section, begin head code that is also used for
* hotplug and needs to have the same protections as the text region
@ -490,8 +481,9 @@ EXPORT_SYMBOL(kimage_vaddr)
* Since we cannot always rely on ERET synchronizing writes to sysregs (e.g. if
* SCTLR_ELx.EOS is clear), we place an ISB prior to ERET.
*
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if
* booted in EL1 or EL2 respectively.
* Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x0 if
* booted in EL1 or EL2 respectively, with the top 32 bits containing
* potential context flags. These flags are *not* stored in __boot_cpu_mode.
*/
SYM_FUNC_START(init_kernel_el)
mrs x0, CurrentEL
@ -520,6 +512,8 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
msr vbar_el2, x0
isb
mov_q x1, INIT_SCTLR_EL1_MMU_OFF
/*
* Fruity CPUs seem to have HCR_EL2.E2H set to RES1,
* making it impossible to start in nVHE mode. Is that
@ -529,34 +523,19 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL)
and x0, x0, #HCR_E2H
cbz x0, 1f
/* Switching to VHE requires a sane SCTLR_EL1 as a start */
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr_s SYS_SCTLR_EL12, x0
/*
* Force an eret into a helper "function", and let it return
* to our original caller... This makes sure that we have
* initialised the basic PSTATE state.
*/
mov x0, #INIT_PSTATE_EL2
msr spsr_el1, x0
adr x0, __cpu_stick_to_vhe
msr elr_el1, x0
eret
/* Set a sane SCTLR_EL1, the VHE way */
msr_s SYS_SCTLR_EL12, x1
mov x2, #BOOT_CPU_FLAG_E2H
b 2f
1:
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr sctlr_el1, x0
msr sctlr_el1, x1
mov x2, xzr
2:
msr elr_el2, lr
mov w0, #BOOT_CPU_MODE_EL2
orr x0, x0, x2
eret
__cpu_stick_to_vhe:
mov x0, #HVC_VHE_RESTART
hvc #0
mov x0, #BOOT_CPU_MODE_EL2
ret
SYM_FUNC_END(init_kernel_el)
/*
@ -569,52 +548,21 @@ SYM_FUNC_START_LOCAL(set_cpu_boot_mode_flag)
b.ne 1f
add x1, x1, #4
1: str w0, [x1] // Save CPU boot mode
dmb sy
dc ivac, x1 // Invalidate potentially stale cache line
ret
SYM_FUNC_END(set_cpu_boot_mode_flag)
/*
* These values are written with the MMU off, but read with the MMU on.
* Writers will invalidate the corresponding address, discarding up to a
* 'Cache Writeback Granule' (CWG) worth of data. The linker script ensures
* sufficient alignment that the CWG doesn't overlap another section.
*/
.pushsection ".mmuoff.data.write", "aw"
/*
* We need to find out the CPU boot mode long after boot, so we need to
* store it in a writable variable.
*
* This is not in .bss, because we set it sufficiently early that the boot-time
* zeroing of .bss would clobber it.
*/
SYM_DATA_START(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long BOOT_CPU_MODE_EL1
SYM_DATA_END(__boot_cpu_mode)
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
*/
SYM_DATA_START(__early_cpu_boot_status)
.quad 0
SYM_DATA_END(__early_cpu_boot_status)
.popsection
/*
* This provides a "holding pen" for platforms to hold all secondary
* cores are held until we're ready for them to initialise.
*/
SYM_FUNC_START(secondary_holding_pen)
bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
mrs x0, mpidr_el1
mrs x2, mpidr_el1
mov_q x1, MPIDR_HWID_BITMASK
and x0, x0, x1
and x2, x2, x1
adr_l x3, secondary_holding_pen_release
pen: ldr x4, [x3]
cmp x4, x0
cmp x4, x2
b.eq secondary_startup
wfe
b pen
@ -626,7 +574,6 @@ SYM_FUNC_END(secondary_holding_pen)
*/
SYM_FUNC_START(secondary_entry)
bl init_kernel_el // w0=cpu_boot_mode
bl set_cpu_boot_mode_flag
b secondary_startup
SYM_FUNC_END(secondary_entry)
@ -634,16 +581,24 @@ SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
bl switch_to_vhe
mov x20, x0 // preserve boot mode
bl finalise_el2
bl __cpu_secondary_check52bitva
#if VA_BITS > 48
ldr_l x0, vabits_actual
#endif
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =__secondary_switched
br x8
SYM_FUNC_END(secondary_startup)
SYM_FUNC_START_LOCAL(__secondary_switched)
mov x0, x20
bl set_cpu_boot_mode_flag
str_l xzr, __early_cpu_boot_status, x3
adr_l x5, vectors
msr vbar_el1, x5
isb
@ -691,6 +646,7 @@ SYM_FUNC_END(__secondary_too_slow)
*
* x0 = SCTLR_EL1 value for turning on the MMU.
* x1 = TTBR1_EL1 value
* x2 = ID map root table address
*
* Returns to the caller via x30/lr. This requires the caller to be covered
* by the .idmap.text section.
@ -699,20 +655,15 @@ SYM_FUNC_END(__secondary_too_slow)
* If it isn't, park the CPU
*/
SYM_FUNC_START(__enable_mmu)
mrs x2, ID_AA64MMFR0_EL1
ubfx x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
mrs x3, ID_AA64MMFR0_EL1
ubfx x3, x3, #ID_AA64MMFR0_TGRAN_SHIFT, 4
cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
b.lt __no_granule_support
cmp x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
cmp x3, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
b.gt __no_granule_support
update_early_cpu_boot_status 0, x2, x3
adrp x2, idmap_pg_dir
phys_to_ttbr x1, x1
phys_to_ttbr x2, x2
msr ttbr0_el1, x2 // load TTBR0
offset_ttbr1 x1, x3
msr ttbr1_el1, x1 // load TTBR1
isb
load_ttbr1 x1, x1, x3
set_sctlr_el1 x0
@ -720,7 +671,7 @@ SYM_FUNC_START(__enable_mmu)
SYM_FUNC_END(__enable_mmu)
SYM_FUNC_START(__cpu_secondary_check52bitva)
#ifdef CONFIG_ARM64_VA_BITS_52
#if VA_BITS > 48
ldr_l x0, vabits_actual
cmp x0, #52
b.ne 2f
@ -755,13 +706,10 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
* Iterate over each entry in the relocation table, and apply the
* relocations in place.
*/
ldr w9, =__rela_offset // offset to reloc table
ldr w10, =__rela_size // size of reloc table
adr_l x9, __rela_start
adr_l x10, __rela_end
mov_q x11, KIMAGE_VADDR // default virtual offset
add x11, x11, x23 // actual virtual offset
add x9, x9, x11 // __va(.rela)
add x10, x9, x10 // __va(.rela) + sizeof(.rela)
0: cmp x9, x10
b.hs 1f
@ -804,21 +752,9 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
* entry in x9, the address being relocated by the current address or
* bitmap entry in x13 and the address being relocated by the current
* bit in x14.
*
* Because addends are stored in place in the binary, RELR relocations
* cannot be applied idempotently. We use x24 to keep track of the
* currently applied displacement so that we can correctly relocate if
* __relocate_kernel is called twice with non-zero displacements (i.e.
* if there is both a physical misalignment and a KASLR displacement).
*/
ldr w9, =__relr_offset // offset to reloc table
ldr w10, =__relr_size // size of reloc table
add x9, x9, x11 // __va(.relr)
add x10, x9, x10 // __va(.relr) + sizeof(.relr)
sub x15, x23, x24 // delta from previous offset
cbz x15, 7f // nothing to do if unchanged
mov x24, x23 // save new offset
adr_l x9, __relr_start
adr_l x10, __relr_end
2: cmp x9, x10
b.hs 7f
@ -826,7 +762,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
tbnz x11, #0, 3f // branch to handle bitmaps
add x13, x11, x23
ldr x12, [x13] // relocate address entry
add x12, x12, x15
add x12, x12, x23
str x12, [x13], #8 // adjust to start of bitmap
b 2b
@ -835,7 +771,7 @@ SYM_FUNC_START_LOCAL(__relocate_kernel)
cbz x11, 6f
tbz x11, #0, 5f // skip bit if not set
ldr x12, [x14] // relocate bit
add x12, x12, x15
add x12, x12, x23
str x12, [x14]
5: add x14, x14, #8 // move to next bit's address
@ -856,43 +792,32 @@ SYM_FUNC_END(__relocate_kernel)
#endif
SYM_FUNC_START_LOCAL(__primary_switch)
#ifdef CONFIG_RANDOMIZE_BASE
mov x19, x0 // preserve new SCTLR_EL1 value
mrs x20, sctlr_el1 // preserve old SCTLR_EL1 value
#endif
adrp x1, init_pg_dir
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
#ifdef CONFIG_RELOCATABLE
#ifdef CONFIG_RELR
mov x24, #0 // no RELR displacement yet
#endif
bl __relocate_kernel
adrp x23, KERNEL_START
and x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
ldr x8, =__primary_switched
adrp x0, __PHYS_OFFSET
blr x8
/*
* If we return here, we have a KASLR displacement in x23 which we need
* to take into account by discarding the current kernel mapping and
* creating a new one.
*/
pre_disable_mmu_workaround
msr sctlr_el1, x20 // disable the MMU
isb
bl __create_page_tables // recreate kernel mapping
tlbi vmalle1 // Remove any stale TLB entries
dsb nsh
isb
set_sctlr_el1 x19 // re-enable the MMU
mov x0, x22
adrp x1, init_pg_end
mov sp, x1
mov x29, xzr
bl __pi_kaslr_early_init
and x24, x0, #SZ_2M - 1 // capture memstart offset seed
bic x0, x0, #SZ_2M - 1
orr x23, x23, x0 // record kernel offset
#endif
#endif
bl clear_page_tables
bl create_kernel_mapping
adrp x1, init_pg_dir
load_ttbr1 x1, x1, x2
#ifdef CONFIG_RELOCATABLE
bl __relocate_kernel
#endif
#endif
ldr x8, =__primary_switched
adrp x0, __PHYS_OFFSET
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
SYM_FUNC_END(__primary_switch)

View File

@ -16,6 +16,30 @@
#include <asm/ptrace.h>
#include <asm/virt.h>
// Warning, hardcoded register allocation
// This will clobber x1 and x2, and expect x1 to contain
// the id register value as read from the HW
.macro __check_override idreg, fld, width, pass, fail
ubfx x1, x1, #\fld, #\width
cbz x1, \fail
adr_l x1, \idreg\()_override
ldr x2, [x1, FTR_OVR_VAL_OFFSET]
ldr x1, [x1, FTR_OVR_MASK_OFFSET]
ubfx x2, x2, #\fld, #\width
ubfx x1, x1, #\fld, #\width
cmp x1, xzr
and x2, x2, x1
csinv x2, x2, xzr, ne
cbnz x2, \pass
b \fail
.endm
.macro check_override idreg, fld, pass, fail
mrs x1, \idreg\()_el1
__check_override \idreg \fld 4 \pass \fail
.endm
.text
.pushsection .hyp.text, "ax"
@ -51,8 +75,8 @@ SYM_CODE_START_LOCAL(elx_sync)
msr vbar_el2, x1
b 9f
1: cmp x0, #HVC_VHE_RESTART
b.eq mutate_to_vhe
1: cmp x0, #HVC_FINALISE_EL2
b.eq __finalise_el2
2: cmp x0, #HVC_SOFT_RESTART
b.ne 3f
@ -73,27 +97,67 @@ SYM_CODE_START_LOCAL(elx_sync)
eret
SYM_CODE_END(elx_sync)
// nVHE? No way! Give me the real thing!
SYM_CODE_START_LOCAL(mutate_to_vhe)
SYM_CODE_START_LOCAL(__finalise_el2)
check_override id_aa64pfr0 ID_AA64PFR0_SVE_SHIFT .Linit_sve .Lskip_sve
.Linit_sve: /* SVE register access */
mrs x0, cptr_el2 // Disable SVE traps
bic x0, x0, #CPTR_EL2_TZ
msr cptr_el2, x0
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
.Lskip_sve:
check_override id_aa64pfr1 ID_AA64PFR1_SME_SHIFT .Linit_sme .Lskip_sme
.Linit_sme: /* SME register access and priority mapping */
mrs x0, cptr_el2 // Disable SME traps
bic x0, x0, #CPTR_EL2_TSM
msr cptr_el2, x0
isb
mrs x1, sctlr_el2
orr x1, x1, #SCTLR_ELx_ENTP2 // Disable TPIDR2 traps
msr sctlr_el2, x1
isb
mov x0, #0 // SMCR controls
// Full FP in SM?
mrs_s x1, SYS_ID_AA64SMFR0_EL1
__check_override id_aa64smfr0 ID_AA64SMFR0_EL1_FA64_SHIFT 1 .Linit_sme_fa64 .Lskip_sme_fa64
.Linit_sme_fa64:
orr x0, x0, SMCR_ELx_FA64_MASK
.Lskip_sme_fa64:
orr x0, x0, #SMCR_ELx_LEN_MASK // Enable full SME vector
msr_s SYS_SMCR_EL2, x0 // length for EL1.
mrs_s x1, SYS_SMIDR_EL1 // Priority mapping supported?
ubfx x1, x1, #SMIDR_EL1_SMPS_SHIFT, #1
cbz x1, .Lskip_sme
msr_s SYS_SMPRIMAP_EL2, xzr // Make all priorities equal
mrs x1, id_aa64mmfr1_el1 // HCRX_EL2 present?
ubfx x1, x1, #ID_AA64MMFR1_HCX_SHIFT, #4
cbz x1, .Lskip_sme
mrs_s x1, SYS_HCRX_EL2
orr x1, x1, #HCRX_EL2_SMPME_MASK // Enable priority mapping
msr_s SYS_HCRX_EL2, x1
.Lskip_sme:
// nVHE? No way! Give me the real thing!
// Sanity check: MMU *must* be off
mrs x1, sctlr_el2
tbnz x1, #0, 1f
// Needs to be VHE capable, obviously
mrs x1, id_aa64mmfr1_el1
ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
cbz x1, 1f
// Check whether VHE is disabled from the command line
adr_l x1, id_aa64mmfr1_override
ldr x2, [x1, FTR_OVR_VAL_OFFSET]
ldr x1, [x1, FTR_OVR_MASK_OFFSET]
ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
ubfx x1, x1, #ID_AA64MMFR1_VHE_SHIFT, #4
cmp x1, xzr
and x2, x2, x1
csinv x2, x2, xzr, ne
cbnz x2, 2f
check_override id_aa64mmfr1 ID_AA64MMFR1_VHE_SHIFT 2f 1f
1: mov_q x0, HVC_STUB_ERR
eret
@ -140,10 +204,10 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
msr spsr_el1, x0
b enter_vhe
SYM_CODE_END(mutate_to_vhe)
SYM_CODE_END(__finalise_el2)
// At the point where we reach enter_vhe(), we run with
// the MMU off (which is enforced by mutate_to_vhe()).
// the MMU off (which is enforced by __finalise_el2()).
// We thus need to be in the idmap, or everything will
// explode when enabling the MMU.
@ -222,12 +286,12 @@ SYM_FUNC_START(__hyp_reset_vectors)
SYM_FUNC_END(__hyp_reset_vectors)
/*
* Entry point to switch to VHE if deemed capable
* Entry point to finalise EL2 and switch to VHE if deemed capable
*
* w0: boot mode, as returned by init_kernel_el()
*/
SYM_FUNC_START(switch_to_vhe)
SYM_FUNC_START(finalise_el2)
// Need to have booted at EL2
adr_l x1, __boot_cpu_mode
ldr w0, [x1]
cmp w0, #BOOT_CPU_MODE_EL2
b.ne 1f
@ -236,9 +300,8 @@ SYM_FUNC_START(switch_to_vhe)
cmp x0, #CurrentEL_EL1
b.ne 1f
// Turn the world upside down
mov x0, #HVC_VHE_RESTART
mov x0, #HVC_FINALISE_EL2
hvc #0
1:
ret
SYM_FUNC_END(switch_to_vhe)
SYM_FUNC_END(finalise_el2)

View File

@ -19,16 +19,21 @@
#define FTR_ALIAS_NAME_LEN 30
#define FTR_ALIAS_OPTION_LEN 116
static u64 __boot_status __initdata;
struct ftr_set_desc {
char name[FTR_DESC_NAME_LEN];
struct arm64_ftr_override *override;
struct {
char name[FTR_DESC_FIELD_LEN];
u8 shift;
u8 width;
bool (*filter)(u64 val);
} fields[];
};
#define FIELD(n, s, f) { .name = n, .shift = s, .width = 4, .filter = f }
static bool __init mmfr1_vh_filter(u64 val)
{
/*
@ -37,24 +42,65 @@ static bool __init mmfr1_vh_filter(u64 val)
* the user was trying to force nVHE on us, proceed with
* attitude adjustment.
*/
return !(is_kernel_in_hyp_mode() && val == 0);
return !(__boot_status == (BOOT_CPU_FLAG_E2H | BOOT_CPU_MODE_EL2) &&
val == 0);
}
static const struct ftr_set_desc mmfr1 __initconst = {
.name = "id_aa64mmfr1",
.override = &id_aa64mmfr1_override,
.fields = {
{ "vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter },
FIELD("vh", ID_AA64MMFR1_VHE_SHIFT, mmfr1_vh_filter),
{}
},
};
static bool __init pfr0_sve_filter(u64 val)
{
/*
* Disabling SVE also means disabling all the features that
* are associated with it. The easiest way to do it is just to
* override id_aa64zfr0_el1 to be 0.
*/
if (!val) {
id_aa64zfr0_override.val = 0;
id_aa64zfr0_override.mask = GENMASK(63, 0);
}
return true;
}
static const struct ftr_set_desc pfr0 __initconst = {
.name = "id_aa64pfr0",
.override = &id_aa64pfr0_override,
.fields = {
FIELD("sve", ID_AA64PFR0_SVE_SHIFT, pfr0_sve_filter),
{}
},
};
static bool __init pfr1_sme_filter(u64 val)
{
/*
* Similarly to SVE, disabling SME also means disabling all
* the features that are associated with it. Just set
* id_aa64smfr0_el1 to 0 and don't look back.
*/
if (!val) {
id_aa64smfr0_override.val = 0;
id_aa64smfr0_override.mask = GENMASK(63, 0);
}
return true;
}
static const struct ftr_set_desc pfr1 __initconst = {
.name = "id_aa64pfr1",
.override = &id_aa64pfr1_override,
.fields = {
{ "bt", ID_AA64PFR1_BT_SHIFT },
{ "mte", ID_AA64PFR1_MTE_SHIFT},
FIELD("bt", ID_AA64PFR1_BT_SHIFT, NULL ),
FIELD("mte", ID_AA64PFR1_MTE_SHIFT, NULL),
FIELD("sme", ID_AA64PFR1_SME_SHIFT, pfr1_sme_filter),
{}
},
};
@ -63,10 +109,10 @@ static const struct ftr_set_desc isar1 __initconst = {
.name = "id_aa64isar1",
.override = &id_aa64isar1_override,
.fields = {
{ "gpi", ID_AA64ISAR1_EL1_GPI_SHIFT },
{ "gpa", ID_AA64ISAR1_EL1_GPA_SHIFT },
{ "api", ID_AA64ISAR1_EL1_API_SHIFT },
{ "apa", ID_AA64ISAR1_EL1_APA_SHIFT },
FIELD("gpi", ID_AA64ISAR1_EL1_GPI_SHIFT, NULL),
FIELD("gpa", ID_AA64ISAR1_EL1_GPA_SHIFT, NULL),
FIELD("api", ID_AA64ISAR1_EL1_API_SHIFT, NULL),
FIELD("apa", ID_AA64ISAR1_EL1_APA_SHIFT, NULL),
{}
},
};
@ -75,8 +121,18 @@ static const struct ftr_set_desc isar2 __initconst = {
.name = "id_aa64isar2",
.override = &id_aa64isar2_override,
.fields = {
{ "gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT },
{ "apa3", ID_AA64ISAR2_EL1_APA3_SHIFT },
FIELD("gpa3", ID_AA64ISAR2_EL1_GPA3_SHIFT, NULL),
FIELD("apa3", ID_AA64ISAR2_EL1_APA3_SHIFT, NULL),
{}
},
};
static const struct ftr_set_desc smfr0 __initconst = {
.name = "id_aa64smfr0",
.override = &id_aa64smfr0_override,
.fields = {
/* FA64 is a one bit field... :-/ */
{ "fa64", ID_AA64SMFR0_EL1_FA64_SHIFT, 1, },
{}
},
};
@ -89,16 +145,18 @@ static const struct ftr_set_desc kaslr __initconst = {
.override = &kaslr_feature_override,
#endif
.fields = {
{ "disabled", 0 },
FIELD("disabled", 0, NULL),
{}
},
};
static const struct ftr_set_desc * const regs[] __initconst = {
&mmfr1,
&pfr0,
&pfr1,
&isar1,
&isar2,
&smfr0,
&kaslr,
};
@ -108,6 +166,8 @@ static const struct {
} aliases[] __initconst = {
{ "kvm-arm.mode=nvhe", "id_aa64mmfr1.vh=0" },
{ "kvm-arm.mode=protected", "id_aa64mmfr1.vh=0" },
{ "arm64.nosve", "id_aa64pfr0.sve=0 id_aa64pfr1.sme=0" },
{ "arm64.nosme", "id_aa64pfr1.sme=0" },
{ "arm64.nobti", "id_aa64pfr1.bt=0" },
{ "arm64.nopauth",
"id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 "
@ -144,7 +204,8 @@ static void __init match_options(const char *cmdline)
for (f = 0; strlen(regs[i]->fields[f].name); f++) {
u64 shift = regs[i]->fields[f].shift;
u64 mask = 0xfUL << shift;
u64 width = regs[i]->fields[f].width ?: 4;
u64 mask = GENMASK_ULL(shift + width - 1, shift);
u64 v;
if (find_field(cmdline, regs[i], f, &v))
@ -152,7 +213,7 @@ static void __init match_options(const char *cmdline)
/*
* If an override gets filtered out, advertise
* it by setting the value to 0xf, but
* it by setting the value to the all-ones while
* clearing the mask... Yes, this is fragile.
*/
if (regs[i]->fields[f].filter &&
@ -234,9 +295,9 @@ static __init void parse_cmdline(void)
}
/* Keep checkers quiet */
void init_feature_override(void);
void init_feature_override(u64 boot_status);
asmlinkage void __init init_feature_override(void)
asmlinkage void __init init_feature_override(u64 boot_status)
{
int i;
@ -247,6 +308,8 @@ asmlinkage void __init init_feature_override(void)
}
}
__boot_status = boot_status;
parse_cmdline();
for (i = 0; i < ARRAY_SIZE(regs); i++) {

View File

@ -10,11 +10,8 @@
#error This file should only be included in vmlinux.lds.S
#endif
#ifdef CONFIG_EFI
__efistub_kernel_size = _edata - _text;
__efistub_primary_entry_offset = primary_entry - _text;
PROVIDE(__efistub_kernel_size = _edata - _text);
PROVIDE(__efistub_primary_entry_offset = primary_entry - _text);
/*
* The EFI stub has its own symbol namespace prefixed by __efistub_, to
@ -25,31 +22,37 @@ __efistub_primary_entry_offset = primary_entry - _text;
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
__efistub_memcmp = __pi_memcmp;
__efistub_memchr = __pi_memchr;
__efistub_memcpy = __pi_memcpy;
__efistub_memmove = __pi_memmove;
__efistub_memset = __pi_memset;
__efistub_strlen = __pi_strlen;
__efistub_strnlen = __pi_strnlen;
__efistub_strcmp = __pi_strcmp;
__efistub_strncmp = __pi_strncmp;
__efistub_strrchr = __pi_strrchr;
__efistub_dcache_clean_poc = __pi_dcache_clean_poc;
PROVIDE(__efistub_memcmp = __pi_memcmp);
PROVIDE(__efistub_memchr = __pi_memchr);
PROVIDE(__efistub_memcpy = __pi_memcpy);
PROVIDE(__efistub_memmove = __pi_memmove);
PROVIDE(__efistub_memset = __pi_memset);
PROVIDE(__efistub_strlen = __pi_strlen);
PROVIDE(__efistub_strnlen = __pi_strnlen);
PROVIDE(__efistub_strcmp = __pi_strcmp);
PROVIDE(__efistub_strncmp = __pi_strncmp);
PROVIDE(__efistub_strrchr = __pi_strrchr);
PROVIDE(__efistub_dcache_clean_poc = __pi_dcache_clean_poc);
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
__efistub___memcpy = __pi_memcpy;
__efistub___memmove = __pi_memmove;
__efistub___memset = __pi_memset;
#endif
PROVIDE(__efistub__text = _text);
PROVIDE(__efistub__end = _end);
PROVIDE(__efistub__edata = _edata);
PROVIDE(__efistub_screen_info = screen_info);
PROVIDE(__efistub__ctype = _ctype);
__efistub__text = _text;
__efistub__end = _end;
__efistub__edata = _edata;
__efistub_screen_info = screen_info;
__efistub__ctype = _ctype;
/*
* The __ prefixed memcpy/memset/memmove symbols are provided by KASAN, which
* instruments the conventional ones. Therefore, any references from the EFI
* stub or other position independent, low level C code should be redirected to
* the non-instrumented versions as well.
*/
PROVIDE(__efistub___memcpy = __pi_memcpy);
PROVIDE(__efistub___memmove = __pi_memmove);
PROVIDE(__efistub___memset = __pi_memset);
#endif
PROVIDE(__pi___memcpy = __pi_memcpy);
PROVIDE(__pi___memmove = __pi_memmove);
PROVIDE(__pi___memset = __pi_memset);
#ifdef CONFIG_KVM

View File

@ -13,7 +13,6 @@
#include <linux/pgtable.h>
#include <linux/random.h>
#include <asm/cacheflush.h>
#include <asm/fixmap.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
@ -21,128 +20,45 @@
#include <asm/sections.h>
#include <asm/setup.h>
enum kaslr_status {
KASLR_ENABLED,
KASLR_DISABLED_CMDLINE,
KASLR_DISABLED_NO_SEED,
KASLR_DISABLED_FDT_REMAP,
};
static enum kaslr_status __initdata kaslr_status;
u64 __ro_after_init module_alloc_base;
u16 __initdata memstart_offset_seed;
static __init u64 get_kaslr_seed(void *fdt)
{
int node, len;
fdt64_t *prop;
u64 ret;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
if (!prop || len != sizeof(u64))
return 0;
ret = fdt64_to_cpu(*prop);
*prop = 0;
return ret;
}
struct arm64_ftr_override kaslr_feature_override __initdata;
/*
* This routine will be executed with the kernel mapped at its default virtual
* address, and if it returns successfully, the kernel will be remapped, and
* start_kernel() will be executed from a randomized virtual offset. The
* relocation will result in all absolute references (e.g., static variables
* containing function pointers) to be reinitialized, and zero-initialized
* .bss variables will be reset to 0.
*/
u64 __init kaslr_early_init(void)
static int __init kaslr_init(void)
{
void *fdt;
u64 seed, offset, mask, module_range;
unsigned long raw;
u64 module_range;
u32 seed;
/*
* Set a reasonable default for module_alloc_base in case
* we end up running with module randomization disabled.
*/
module_alloc_base = (u64)_etext - MODULES_VSIZE;
dcache_clean_inval_poc((unsigned long)&module_alloc_base,
(unsigned long)&module_alloc_base +
sizeof(module_alloc_base));
/*
* Try to map the FDT early. If this fails, we simply bail,
* and proceed with KASLR disabled. We will make another
* attempt at mapping the FDT in setup_machine()
*/
fdt = get_early_fdt_ptr();
if (!fdt) {
kaslr_status = KASLR_DISABLED_FDT_REMAP;
return 0;
}
/*
* Retrieve (and wipe) the seed from the FDT
*/
seed = get_kaslr_seed(fdt);
/*
* Check if 'nokaslr' appears on the command line, and
* return 0 if that is the case.
*/
if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) {
kaslr_status = KASLR_DISABLED_CMDLINE;
pr_info("KASLR disabled on command line\n");
return 0;
}
/*
* Mix in any entropy obtainable architecturally if enabled
* and supported.
*/
if (arch_get_random_seed_long_early(&raw))
seed ^= raw;
if (!seed) {
kaslr_status = KASLR_DISABLED_NO_SEED;
if (!kaslr_offset()) {
pr_warn("KASLR disabled due to lack of seed\n");
return 0;
}
pr_info("KASLR enabled\n");
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
* middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
* the lower and upper quarters to avoid colliding with other
* allocations.
* Even if we could randomize at page granularity for 16k and 64k pages,
* let's always round to 2 MB so we don't interfere with the ability to
* map using contiguous PTEs
* KASAN without KASAN_VMALLOC does not expect the module region to
* intersect the vmalloc region, since shadow memory is allocated for
* each module at load time, whereas the vmalloc region will already be
* shadowed by KASAN zero pages.
*/
mask = ((1UL << (VA_BITS_MIN - 2)) - 1) & ~(SZ_2M - 1);
offset = BIT(VA_BITS_MIN - 3) + (seed & mask);
BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) ||
IS_ENABLED(CONFIG_KASAN_SW_TAGS)) &&
!IS_ENABLED(CONFIG_KASAN_VMALLOC));
/* use the top 16 bits to randomize the linear region */
memstart_offset_seed = seed >> 48;
if (!IS_ENABLED(CONFIG_KASAN_VMALLOC) &&
(IS_ENABLED(CONFIG_KASAN_GENERIC) ||
IS_ENABLED(CONFIG_KASAN_SW_TAGS)))
/*
* KASAN without KASAN_VMALLOC does not expect the module region
* to intersect the vmalloc region, since shadow memory is
* allocated for each module at load time, whereas the vmalloc
* region is shadowed by KASAN zero pages. So keep modules
* out of the vmalloc region if KASAN is enabled without
* KASAN_VMALLOC, and put the kernel well within 4 GB of the
* module region.
*/
return offset % SZ_2G;
seed = get_random_u32();
if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) {
/*
@ -154,8 +70,7 @@ u64 __init kaslr_early_init(void)
* resolved normally.)
*/
module_range = SZ_2G - (u64)(_end - _stext);
module_alloc_base = max((u64)_end + offset - SZ_2G,
(u64)MODULES_VADDR);
module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR);
} else {
/*
* Randomize the module region by setting module_alloc_base to
@ -167,40 +82,12 @@ u64 __init kaslr_early_init(void)
* when ARM64_MODULE_PLTS is enabled.
*/
module_range = MODULES_VSIZE - (u64)(_etext - _stext);
module_alloc_base = (u64)_etext + offset - MODULES_VSIZE;
}
/* use the lower 21 bits to randomize the base of the module region */
module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21;
module_alloc_base &= PAGE_MASK;
dcache_clean_inval_poc((unsigned long)&module_alloc_base,
(unsigned long)&module_alloc_base +
sizeof(module_alloc_base));
dcache_clean_inval_poc((unsigned long)&memstart_offset_seed,
(unsigned long)&memstart_offset_seed +
sizeof(memstart_offset_seed));
return offset;
}
static int __init kaslr_init(void)
{
switch (kaslr_status) {
case KASLR_ENABLED:
pr_info("KASLR enabled\n");
break;
case KASLR_DISABLED_CMDLINE:
pr_info("KASLR disabled on command line\n");
break;
case KASLR_DISABLED_NO_SEED:
pr_warn("KASLR disabled due to lack of seed\n");
break;
case KASLR_DISABLED_FDT_REMAP:
pr_warn("KASLR disabled due to FDT remapping failure\n");
break;
}
return 0;
}
core_initcall(kaslr_init)
subsys_initcall(kaslr_init)

View File

@ -0,0 +1,33 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright 2022 Google LLC
KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
-Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
$(call cc-option,-mbranch-protection=none) \
-I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
-include $(srctree)/include/linux/hidden.h \
-D__DISABLE_EXPORTS -ffreestanding -D__NO_FORTIFY \
$(call cc-option,-fno-addrsig)
# remove SCS flags from all objects in this directory
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
# disable LTO
KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
GCOV_PROFILE := n
KASAN_SANITIZE := n
KCSAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
--remove-section=.note.gnu.property \
--prefix-alloc-sections=.init
$(obj)/%.pi.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
$(call if_changed_rule,cc_o_c)
obj-y := kaslr_early.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
extra-y := $(patsubst %.pi.o,%.o,$(obj-y))

View File

@ -0,0 +1,112 @@
// SPDX-License-Identifier: GPL-2.0-only
// Copyright 2022 Google LLC
// Author: Ard Biesheuvel <ardb@google.com>
// NOTE: code in this file runs *very* early, and is not permitted to use
// global variables or anything that relies on absolute addressing.
#include <linux/libfdt.h>
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
#include <linux/sizes.h>
#include <linux/string.h>
#include <asm/archrandom.h>
#include <asm/memory.h>
/* taken from lib/string.c */
static char *__strstr(const char *s1, const char *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
static bool cmdline_contains_nokaslr(const u8 *cmdline)
{
const u8 *str;
str = __strstr(cmdline, "nokaslr");
return str == cmdline || (str > cmdline && *(str - 1) == ' ');
}
static bool is_kaslr_disabled_cmdline(void *fdt)
{
if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
int node;
const u8 *prop;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
goto out;
prop = fdt_getprop(fdt, node, "bootargs", NULL);
if (!prop)
goto out;
if (cmdline_contains_nokaslr(prop))
return true;
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND))
goto out;
return false;
}
out:
return cmdline_contains_nokaslr(CONFIG_CMDLINE);
}
static u64 get_kaslr_seed(void *fdt)
{
int node, len;
fdt64_t *prop;
u64 ret;
node = fdt_path_offset(fdt, "/chosen");
if (node < 0)
return 0;
prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
if (!prop || len != sizeof(u64))
return 0;
ret = fdt64_to_cpu(*prop);
*prop = 0;
return ret;
}
asmlinkage u64 kaslr_early_init(void *fdt)
{
u64 seed;
if (is_kaslr_disabled_cmdline(fdt))
return 0;
seed = get_kaslr_seed(fdt);
if (!seed) {
#ifdef CONFIG_ARCH_RANDOM
if (!__early_cpu_has_rndr() ||
!__arm64_rndr((unsigned long *)&seed))
#endif
return 0;
}
/*
* OK, so we are proceeding with KASLR enabled. Calculate a suitable
* kernel image offset from the seed. Let's place the kernel in the
* middle half of the VMALLOC area (VA_BITS_MIN - 2), and stay clear of
* the lower and upper quarters to avoid colliding with other
* allocations.
*/
return BIT(VA_BITS_MIN - 3) + (seed & GENMASK(VA_BITS_MIN - 3, 0));
}

View File

@ -100,10 +100,11 @@ SYM_FUNC_END(__cpu_suspend_enter)
.pushsection ".idmap.text", "awx"
SYM_CODE_START(cpu_resume)
bl init_kernel_el
bl switch_to_vhe
bl finalise_el2
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =_cpu_resume
br x8

View File

@ -52,7 +52,7 @@ void notrace __cpu_suspend_exit(void)
/* Restore CnP bit in TTBR1_EL1 */
if (system_supports_cnp())
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
/*
* PSTATE was not saved over suspend/resume, re-enable any detected

View File

@ -199,8 +199,7 @@ SECTIONS
}
idmap_pg_dir = .;
. += IDMAP_DIR_SIZE;
idmap_pg_end = .;
. += PAGE_SIZE;
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
tramp_pg_dir = .;
@ -236,6 +235,10 @@ SECTIONS
__inittext_end = .;
__initdata_begin = .;
init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
init_idmap_pg_end = .;
.init.data : {
INIT_DATA
INIT_SETUP(16)
@ -254,21 +257,17 @@ SECTIONS
HYPERVISOR_RELOC_SECTION
.rela.dyn : ALIGN(8) {
__rela_start = .;
*(.rela .rela*)
__rela_end = .;
}
__rela_offset = ABSOLUTE(ADDR(.rela.dyn) - KIMAGE_VADDR);
__rela_size = SIZEOF(.rela.dyn);
#ifdef CONFIG_RELR
.relr.dyn : ALIGN(8) {
__relr_start = .;
*(.relr.dyn)
__relr_end = .;
}
__relr_offset = ABSOLUTE(ADDR(.relr.dyn) - KIMAGE_VADDR);
__relr_size = SIZEOF(.relr.dyn);
#endif
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
__init_end = .;

View File

@ -236,7 +236,7 @@ static void __init kasan_init_shadow(void)
*/
memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(tmp_pg_dir));
dsb(ishst);
cpu_replace_ttbr1(lm_alias(tmp_pg_dir));
cpu_replace_ttbr1(lm_alias(tmp_pg_dir), idmap_pg_dir);
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
@ -280,7 +280,7 @@ static void __init kasan_init_shadow(void)
PAGE_KERNEL_RO));
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir);
}
static void __init kasan_init_depth(void)

View File

@ -43,15 +43,27 @@
#define NO_CONT_MAPPINGS BIT(1)
#define NO_EXEC_MAPPINGS BIT(2) /* assumes FEAT_HPDS is not used */
u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
int idmap_t0sz __ro_after_init;
u64 __section(".mmuoff.data.write") vabits_actual;
#if VA_BITS > 48
u64 vabits_actual __ro_after_init = VA_BITS_MIN;
EXPORT_SYMBOL(vabits_actual);
#endif
u64 kimage_vaddr __ro_after_init = (u64)&_text;
EXPORT_SYMBOL(kimage_vaddr);
u64 kimage_voffset __ro_after_init;
EXPORT_SYMBOL(kimage_voffset);
u32 __boot_cpu_mode[] = { BOOT_CPU_MODE_EL2, BOOT_CPU_MODE_EL1 };
/*
* The booting CPU updates the failed status @__early_cpu_boot_status,
* with MMU turned off.
*/
long __section(".mmuoff.data.write") __early_cpu_boot_status;
/*
* Empty_zero_page is a special page that is used for zero-initialized data
* and COW.
@ -763,22 +775,57 @@ static void __init map_kernel(pgd_t *pgdp)
kasan_copy_shadow(pgdp);
}
static void __init create_idmap(void)
{
u64 start = __pa_symbol(__idmap_text_start);
u64 size = __pa_symbol(__idmap_text_end) - start;
pgd_t *pgd = idmap_pg_dir;
u64 pgd_phys;
/* check if we need an additional level of translation */
if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
set_pgd(&idmap_pg_dir[start >> VA_BITS],
__pgd(pgd_phys | P4D_TYPE_TABLE));
pgd = __va(pgd_phys);
}
__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
early_pgtable_alloc, 0);
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
extern u32 __idmap_kpti_flag;
u64 pa = __pa_symbol(&__idmap_kpti_flag);
/*
* The KPTI G-to-nG conversion code needs a read-write mapping
* of its synchronization flag in the ID map.
*/
__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
early_pgtable_alloc, 0);
}
}
void __init paging_init(void)
{
pgd_t *pgdp = pgd_set_fixmap(__pa_symbol(swapper_pg_dir));
extern pgd_t init_idmap_pg_dir[];
idmap_t0sz = 63UL - __fls(__pa_symbol(_end) | GENMASK(VA_BITS_MIN - 1, 0));
map_kernel(pgdp);
map_mem(pgdp);
pgd_clear_fixmap();
cpu_replace_ttbr1(lm_alias(swapper_pg_dir));
cpu_replace_ttbr1(lm_alias(swapper_pg_dir), init_idmap_pg_dir);
init_mm.pgd = swapper_pg_dir;
memblock_phys_free(__pa_symbol(init_pg_dir),
__pa_symbol(init_pg_end) - __pa_symbol(init_pg_dir));
memblock_allow_resize();
create_idmap();
}
/*

View File

@ -249,8 +249,10 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1)
*
* Called exactly once from stop_machine context by each CPU found during boot.
*/
__idmap_kpti_flag:
.long 1
.pushsection ".data", "aw", %progbits
SYM_DATA(__idmap_kpti_flag, .long 1)
.popsection
SYM_FUNC_START(idmap_kpti_install_ng_mappings)
cpu .req w0
temp_pte .req x0
@ -273,7 +275,7 @@ SYM_FUNC_START(idmap_kpti_install_ng_mappings)
mov x5, x3 // preserve temp_pte arg
mrs swapper_ttb, ttbr1_el1
adr flag_ptr, __idmap_kpti_flag
adr_l flag_ptr, __idmap_kpti_flag
cbnz cpu, __idmap_kpti_secondary
@ -396,6 +398,8 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings)
*
* Initialise the processor for turning the MMU on.
*
* Input:
* x0 - actual number of VA bits (ignored unless VA_BITS > 48)
* Output:
* Return in x0 the value of the SCTLR_EL1 register.
*/
@ -465,12 +469,11 @@ SYM_FUNC_START(__cpu_setup)
tcr_clear_errata_bits tcr, x9, x5
#ifdef CONFIG_ARM64_VA_BITS_52
ldr_l x9, vabits_actual
sub x9, xzr, x9
sub x9, xzr, x0
add x9, x9, #64
tcr_set_t1sz tcr, x9
#else
ldr_l x9, idmap_t0sz
idmap_get_t0sz x9
#endif
tcr_set_t0sz tcr, x9