linux/arch/arm64/mm/init.c
Linus Torvalds 588ab3f9af arm64 updates for 4.6:
- Initial page table creation reworked to avoid breaking large block
   mappings (huge pages) into smaller ones. The ARM architecture requires
   break-before-make in such cases to avoid TLB conflicts but that's not
   always possible on live page tables
 
 - Kernel virtual memory layout: the kernel image is no longer linked to
   the bottom of the linear mapping (PAGE_OFFSET) but at the bottom of
   the vmalloc space, allowing the kernel to be loaded (nearly) anywhere
   in physical RAM
 
 - Kernel ASLR: position independent kernel Image and modules being
   randomly mapped in the vmalloc space with the randomness is provided
   by UEFI (efi_get_random_bytes() patches merged via the arm64 tree,
   acked by Matt Fleming)
 
 - Implement relative exception tables for arm64, required by KASLR
   (initial code for ARCH_HAS_RELATIVE_EXTABLE added to lib/extable.c but
   actual x86 conversion to deferred to 4.7 because of the merge
   dependencies)
 
 - Support for the User Access Override feature of ARMv8.2: this allows
   uaccess functions (get_user etc.) to be implemented using LDTR/STTR
   instructions. Such instructions, when run by the kernel, perform
   unprivileged accesses adding an extra level of protection. The
   set_fs() macro is used to "upgrade" such instruction to privileged
   accesses via the UAO bit
 
 - Half-precision floating point support (part of ARMv8.2)
 
 - Optimisations for CPUs with or without a hardware prefetcher (using
   run-time code patching)
 
 - copy_page performance improvement to deal with 128 bytes at a time
 
 - Sanity checks on the CPU capabilities (via CPUID) to prevent
   incompatible secondary CPUs from being brought up (e.g. weird
   big.LITTLE configurations)
 
 - valid_user_regs() reworked for better sanity check of the sigcontext
   information (restored pstate information)
 
 - ACPI parking protocol implementation
 
 - CONFIG_DEBUG_RODATA enabled by default
 
 - VDSO code marked as read-only
 
 - DEBUG_PAGEALLOC support
 
 - ARCH_HAS_UBSAN_SANITIZE_ALL enabled
 
 - Erratum workaround Cavium ThunderX SoC
 
 - set_pte_at() fix for PROT_NONE mappings
 
 - Code clean-ups
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJW6u95AAoJEGvWsS0AyF7xMyoP/3x2O6bgreSQ84BdO4JChN4+
 RQ9OVdX8u2ItO9sgaCY2AA6KoiBuEjGmPl/XRuK0I7DpODTtRjEXQHuNNhz8AelC
 hn4AEVqamY6Z5BzHFIjs8G9ydEbq+OXcKWEdwSsBhP/cMvI7ss3dps1f5iNPT5Vv
 50E/kUz+aWYy7pKlB18VDV7TUOA3SuYuGknWV8+bOY5uPb8hNT3Y3fHOg/EuNNN3
 DIuYH1V7XQkXtF+oNVIGxzzJCXULBE7egMcWAm1ydSOHK0JwkZAiL7OhI7ceVD0x
 YlDxBnqmi4cgzfBzTxITAhn3OParwN6udQprdF1WGtFF6fuY2eRDSH/L/iZoE4DY
 OulL951OsBtF8YC3+RKLk908/0bA2Uw8ftjCOFJTYbSnZBj1gWK41VkCYMEXiHQk
 EaN8+2Iw206iYIoyvdjGCLw7Y0oakDoVD9vmv12SOaHeQljTkjoN8oIlfjjKTeP7
 3AXj5v9BDMDVh40nkVayysRNvqe48Kwt9Wn0rhVTLxwdJEiFG/OIU6HLuTkretdN
 dcCNFSQrRieSFHpBK9G0vKIpIss1ZwLm8gjocVXH7VK4Mo/TNQe4p2/wAF29mq4r
 xu1UiXmtU3uWxiqZnt72LOYFCarQ0sFA5+pMEvF5W+NrVB0wGpXhcwm+pGsIi4IM
 LepccTgykiUBqW5TRzPz
 =/oS+
 -----END PGP SIGNATURE-----

Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 updates from Catalin Marinas:
 "Here are the main arm64 updates for 4.6.  There are some relatively
  intrusive changes to support KASLR, the reworking of the kernel
  virtual memory layout and initial page table creation.

  Summary:

   - Initial page table creation reworked to avoid breaking large block
     mappings (huge pages) into smaller ones.  The ARM architecture
     requires break-before-make in such cases to avoid TLB conflicts but
     that's not always possible on live page tables

   - Kernel virtual memory layout: the kernel image is no longer linked
     to the bottom of the linear mapping (PAGE_OFFSET) but at the bottom
     of the vmalloc space, allowing the kernel to be loaded (nearly)
     anywhere in physical RAM

   - Kernel ASLR: position independent kernel Image and modules being
     randomly mapped in the vmalloc space with the randomness is
     provided by UEFI (efi_get_random_bytes() patches merged via the
     arm64 tree, acked by Matt Fleming)

   - Implement relative exception tables for arm64, required by KASLR
     (initial code for ARCH_HAS_RELATIVE_EXTABLE added to lib/extable.c
     but actual x86 conversion to deferred to 4.7 because of the merge
     dependencies)

   - Support for the User Access Override feature of ARMv8.2: this
     allows uaccess functions (get_user etc.) to be implemented using
     LDTR/STTR instructions.  Such instructions, when run by the kernel,
     perform unprivileged accesses adding an extra level of protection.
     The set_fs() macro is used to "upgrade" such instruction to
     privileged accesses via the UAO bit

   - Half-precision floating point support (part of ARMv8.2)

   - Optimisations for CPUs with or without a hardware prefetcher (using
     run-time code patching)

   - copy_page performance improvement to deal with 128 bytes at a time

   - Sanity checks on the CPU capabilities (via CPUID) to prevent
     incompatible secondary CPUs from being brought up (e.g.  weird
     big.LITTLE configurations)

   - valid_user_regs() reworked for better sanity check of the
     sigcontext information (restored pstate information)

   - ACPI parking protocol implementation

   - CONFIG_DEBUG_RODATA enabled by default

   - VDSO code marked as read-only

   - DEBUG_PAGEALLOC support

   - ARCH_HAS_UBSAN_SANITIZE_ALL enabled

   - Erratum workaround Cavium ThunderX SoC

   - set_pte_at() fix for PROT_NONE mappings

   - Code clean-ups"

* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (99 commits)
  arm64: kasan: Fix zero shadow mapping overriding kernel image shadow
  arm64: kasan: Use actual memory node when populating the kernel image shadow
  arm64: Update PTE_RDONLY in set_pte_at() for PROT_NONE permission
  arm64: Fix misspellings in comments.
  arm64: efi: add missing frame pointer assignment
  arm64: make mrs_s prefixing implicit in read_cpuid
  arm64: enable CONFIG_DEBUG_RODATA by default
  arm64: Rework valid_user_regs
  arm64: mm: check at build time that PAGE_OFFSET divides the VA space evenly
  arm64: KVM: Move kvm_call_hyp back to its original localtion
  arm64: mm: treat memstart_addr as a signed quantity
  arm64: mm: list kernel sections in order
  arm64: lse: deal with clobbered IP registers after branch via PLT
  arm64: mm: dump: Use VA_START directly instead of private LOWEST_ADDR
  arm64: kconfig: add submenu for 8.2 architectural features
  arm64: kernel: acpi: fix ioremap in ACPI parking protocol cpu_postboot
  arm64: Add support for Half precision floating point
  arm64: Remove fixmap include fragility
  arm64: Add workaround for Cavium erratum 27456
  arm64: mm: Mark .rodata as RO
  ...
2016-03-17 20:03:47 -07:00

473 lines
12 KiB
C

/*
* Based on arch/arm/mm/init.c
*
* Copyright (C) 1995-2005 Russell King
* Copyright (C) 2012 ARM Ltd.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/errno.h>
#include <linux/swap.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/mman.h>
#include <linux/nodemask.h>
#include <linux/initrd.h>
#include <linux/gfp.h>
#include <linux/memblock.h>
#include <linux/sort.h>
#include <linux/of_fdt.h>
#include <linux/dma-mapping.h>
#include <linux/dma-contiguous.h>
#include <linux/efi.h>
#include <linux/swiotlb.h>
#include <asm/boot.h>
#include <asm/fixmap.h>
#include <asm/kasan.h>
#include <asm/kernel-pgtable.h>
#include <asm/memory.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sizes.h>
#include <asm/tlb.h>
#include <asm/alternative.h>
#include "mm.h"
/*
* We need to be able to catch inadvertent references to memstart_addr
* that occur (potentially in generic code) before arm64_memblock_init()
* executes, which assigns it its actual value. So use a default value
* that cannot be mistaken for a real physical address.
*/
s64 memstart_addr __read_mostly = -1;
phys_addr_t arm64_dma_phys_limit __read_mostly;
#ifdef CONFIG_BLK_DEV_INITRD
static int __init early_initrd(char *p)
{
unsigned long start, size;
char *endp;
start = memparse(p, &endp);
if (*endp == ',') {
size = memparse(endp + 1, NULL);
initrd_start = start;
initrd_end = start + size;
}
return 0;
}
early_param("initrd", early_initrd);
#endif
/*
* Return the maximum physical address for ZONE_DMA (DMA_BIT_MASK(32)). It
* currently assumes that for memory starting above 4G, 32-bit devices will
* use a DMA offset.
*/
static phys_addr_t __init max_zone_dma_phys(void)
{
phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32);
return min(offset + (1ULL << 32), memblock_end_of_DRAM());
}
static void __init zone_sizes_init(unsigned long min, unsigned long max)
{
struct memblock_region *reg;
unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES];
unsigned long max_dma = min;
memset(zone_size, 0, sizeof(zone_size));
/* 4GB maximum for 32-bit only capable devices */
#ifdef CONFIG_ZONE_DMA
max_dma = PFN_DOWN(arm64_dma_phys_limit);
zone_size[ZONE_DMA] = max_dma - min;
#endif
zone_size[ZONE_NORMAL] = max - max_dma;
memcpy(zhole_size, zone_size, sizeof(zhole_size));
for_each_memblock(memory, reg) {
unsigned long start = memblock_region_memory_base_pfn(reg);
unsigned long end = memblock_region_memory_end_pfn(reg);
if (start >= max)
continue;
#ifdef CONFIG_ZONE_DMA
if (start < max_dma) {
unsigned long dma_end = min(end, max_dma);
zhole_size[ZONE_DMA] -= dma_end - start;
}
#endif
if (end > max_dma) {
unsigned long normal_end = min(end, max);
unsigned long normal_start = max(start, max_dma);
zhole_size[ZONE_NORMAL] -= normal_end - normal_start;
}
}
free_area_init_node(0, zone_size, min, zhole_size);
}
#ifdef CONFIG_HAVE_ARCH_PFN_VALID
int pfn_valid(unsigned long pfn)
{
return memblock_is_map_memory(pfn << PAGE_SHIFT);
}
EXPORT_SYMBOL(pfn_valid);
#endif
#ifndef CONFIG_SPARSEMEM
static void __init arm64_memory_present(void)
{
}
#else
static void __init arm64_memory_present(void)
{
struct memblock_region *reg;
for_each_memblock(memory, reg)
memory_present(0, memblock_region_memory_base_pfn(reg),
memblock_region_memory_end_pfn(reg));
}
#endif
static phys_addr_t memory_limit = (phys_addr_t)ULLONG_MAX;
/*
* Limit the memory size that was specified via FDT.
*/
static int __init early_mem(char *p)
{
if (!p)
return 1;
memory_limit = memparse(p, &p) & PAGE_MASK;
pr_notice("Memory limited to %lldMB\n", memory_limit >> 20);
return 0;
}
early_param("mem", early_mem);
void __init arm64_memblock_init(void)
{
const s64 linear_region_size = -(s64)PAGE_OFFSET;
/*
* Ensure that the linear region takes up exactly half of the kernel
* virtual address space. This way, we can distinguish a linear address
* from a kernel/module/vmalloc address by testing a single bit.
*/
BUILD_BUG_ON(linear_region_size != BIT(VA_BITS - 1));
/*
* Select a suitable value for the base of physical memory.
*/
memstart_addr = round_down(memblock_start_of_DRAM(),
ARM64_MEMSTART_ALIGN);
/*
* Remove the memory that we will not be able to cover with the
* linear mapping. Take care not to clip the kernel which may be
* high in memory.
*/
memblock_remove(max_t(u64, memstart_addr + linear_region_size, __pa(_end)),
ULLONG_MAX);
if (memblock_end_of_DRAM() > linear_region_size)
memblock_remove(0, memblock_end_of_DRAM() - linear_region_size);
/*
* Apply the memory limit if it was set. Since the kernel may be loaded
* high up in memory, add back the kernel region that must be accessible
* via the linear mapping.
*/
if (memory_limit != (phys_addr_t)ULLONG_MAX) {
memblock_enforce_memory_limit(memory_limit);
memblock_add(__pa(_text), (u64)(_end - _text));
}
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
extern u16 memstart_offset_seed;
u64 range = linear_region_size -
(memblock_end_of_DRAM() - memblock_start_of_DRAM());
/*
* If the size of the linear region exceeds, by a sufficient
* margin, the size of the region that the available physical
* memory spans, randomize the linear region as well.
*/
if (memstart_offset_seed > 0 && range >= ARM64_MEMSTART_ALIGN) {
range = range / ARM64_MEMSTART_ALIGN + 1;
memstart_addr -= ARM64_MEMSTART_ALIGN *
((range * memstart_offset_seed) >> 16);
}
}
/*
* Register the kernel text, kernel data, initrd, and initial
* pagetables with memblock.
*/
memblock_reserve(__pa(_text), _end - _text);
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start) {
memblock_reserve(initrd_start, initrd_end - initrd_start);
/* the generic initrd code expects virtual addresses */
initrd_start = __phys_to_virt(initrd_start);
initrd_end = __phys_to_virt(initrd_end);
}
#endif
early_init_fdt_scan_reserved_mem();
/* 4GB maximum for 32-bit only capable devices */
if (IS_ENABLED(CONFIG_ZONE_DMA))
arm64_dma_phys_limit = max_zone_dma_phys();
else
arm64_dma_phys_limit = PHYS_MASK + 1;
dma_contiguous_reserve(arm64_dma_phys_limit);
memblock_allow_resize();
memblock_dump_all();
}
void __init bootmem_init(void)
{
unsigned long min, max;
min = PFN_UP(memblock_start_of_DRAM());
max = PFN_DOWN(memblock_end_of_DRAM());
early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
/*
* Sparsemem tries to allocate bootmem in memory_present(), so must be
* done after the fixed reservations.
*/
arm64_memory_present();
sparse_init();
zone_sizes_init(min, max);
high_memory = __va((max << PAGE_SHIFT) - 1) + 1;
max_pfn = max_low_pfn = max;
}
#ifndef CONFIG_SPARSEMEM_VMEMMAP
static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
{
struct page *start_pg, *end_pg;
unsigned long pg, pgend;
/*
* Convert start_pfn/end_pfn to a struct page pointer.
*/
start_pg = pfn_to_page(start_pfn - 1) + 1;
end_pg = pfn_to_page(end_pfn - 1) + 1;
/*
* Convert to physical addresses, and round start upwards and end
* downwards.
*/
pg = (unsigned long)PAGE_ALIGN(__pa(start_pg));
pgend = (unsigned long)__pa(end_pg) & PAGE_MASK;
/*
* If there are free pages between these, free the section of the
* memmap array.
*/
if (pg < pgend)
free_bootmem(pg, pgend - pg);
}
/*
* The mem_map array can get very big. Free the unused area of the memory map.
*/
static void __init free_unused_memmap(void)
{
unsigned long start, prev_end = 0;
struct memblock_region *reg;
for_each_memblock(memory, reg) {
start = __phys_to_pfn(reg->base);
#ifdef CONFIG_SPARSEMEM
/*
* Take care not to free memmap entries that don't exist due
* to SPARSEMEM sections which aren't present.
*/
start = min(start, ALIGN(prev_end, PAGES_PER_SECTION));
#endif
/*
* If we had a previous bank, and there is a space between the
* current bank and the previous, free it.
*/
if (prev_end && prev_end < start)
free_memmap(prev_end, start);
/*
* Align up here since the VM subsystem insists that the
* memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES.
*/
prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
MAX_ORDER_NR_PAGES);
}
#ifdef CONFIG_SPARSEMEM
if (!IS_ALIGNED(prev_end, PAGES_PER_SECTION))
free_memmap(prev_end, ALIGN(prev_end, PAGES_PER_SECTION));
#endif
}
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
/*
* mem_init() marks the free areas in the mem_map and tells us how much memory
* is free. This is done after various parts of the system have claimed their
* memory after the kernel image.
*/
void __init mem_init(void)
{
swiotlb_init(1);
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
#ifndef CONFIG_SPARSEMEM_VMEMMAP
free_unused_memmap();
#endif
/* this will put all unused low memory onto the freelists */
free_all_bootmem();
mem_init_print_info(NULL);
#define MLK(b, t) b, t, ((t) - (b)) >> 10
#define MLM(b, t) b, t, ((t) - (b)) >> 20
#define MLG(b, t) b, t, ((t) - (b)) >> 30
#define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K)
pr_notice("Virtual kernel memory layout:\n"
#ifdef CONFIG_KASAN
" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n"
#endif
" modules : 0x%16lx - 0x%16lx (%6ld MB)\n"
" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n"
" .text : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .init : 0x%p" " - 0x%p" " (%6ld KB)\n"
" .data : 0x%p" " - 0x%p" " (%6ld KB)\n"
#ifdef CONFIG_SPARSEMEM_VMEMMAP
" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n"
" 0x%16lx - 0x%16lx (%6ld MB actual)\n"
#endif
" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n"
" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n"
" memory : 0x%16lx - 0x%16lx (%6ld MB)\n",
#ifdef CONFIG_KASAN
MLG(KASAN_SHADOW_START, KASAN_SHADOW_END),
#endif
MLM(MODULES_VADDR, MODULES_END),
MLG(VMALLOC_START, VMALLOC_END),
MLK_ROUNDUP(_text, __start_rodata),
MLK_ROUNDUP(__start_rodata, _etext),
MLK_ROUNDUP(__init_begin, __init_end),
MLK_ROUNDUP(_sdata, _edata),
#ifdef CONFIG_SPARSEMEM_VMEMMAP
MLG(VMEMMAP_START,
VMEMMAP_START + VMEMMAP_SIZE),
MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()),
(unsigned long)virt_to_page(high_memory)),
#endif
MLK(FIXADDR_START, FIXADDR_TOP),
MLM(PCI_IO_START, PCI_IO_END),
MLM(__phys_to_virt(memblock_start_of_DRAM()),
(unsigned long)high_memory));
#undef MLK
#undef MLM
#undef MLK_ROUNDUP
/*
* Check boundaries twice: Some fundamental inconsistencies can be
* detected at build time already.
*/
#ifdef CONFIG_COMPAT
BUILD_BUG_ON(TASK_SIZE_32 > TASK_SIZE_64);
#endif
if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) {
extern int sysctl_overcommit_memory;
/*
* On a machine this small we won't get anywhere without
* overcommit, so turn it on by default.
*/
sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
}
}
void free_initmem(void)
{
free_initmem_default(0);
fixup_init();
}
#ifdef CONFIG_BLK_DEV_INITRD
static int keep_initrd __initdata;
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd)
free_reserved_area((void *)start, (void *)end, 0, "initrd");
}
static int __init keepinitrd_setup(char *__unused)
{
keep_initrd = 1;
return 1;
}
__setup("keepinitrd", keepinitrd_setup);
#endif
/*
* Dump out memory limit information on panic.
*/
static int dump_mem_limit(struct notifier_block *self, unsigned long v, void *p)
{
if (memory_limit != (phys_addr_t)ULLONG_MAX) {
pr_emerg("Memory Limit: %llu MB\n", memory_limit >> 20);
} else {
pr_emerg("Memory Limit: none\n");
}
return 0;
}
static struct notifier_block mem_limit_notifier = {
.notifier_call = dump_mem_limit,
};
static int __init register_mem_limit_dumper(void)
{
atomic_notifier_chain_register(&panic_notifier_list,
&mem_limit_notifier);
return 0;
}
__initcall(register_mem_limit_dumper);