Merge branch 'x86-bootmem-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-bootmem-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (30 commits) early_res: Need to save the allocation name in drop_range_partial() sparsemem: Fix compilation on PowerPC early_res: Add free_early_partial() x86: Fix non-bootmem compilation on PowerPC core: Move early_res from arch/x86 to kernel/ x86: Add find_fw_memmap_area Move round_up/down to kernel.h x86: Make 32bit support NO_BOOTMEM early_res: Enhance check_and_double_early_res x86: Move back find_e820_area to e820.c x86: Add find_early_area_size x86: Separate early_res related code from e820.c x86: Move bios page reserve early to head32/64.c sparsemem: Put mem map for one node together. sparsemem: Put usemap for one node together x86: Make 64 bit use early_res instead of bootmem before slab x86: Only call dma32_reserve_bootmem 64bit !CONFIG_NUMA x86: Make early_node_mem get mem > 4 GB if possible x86: Dynamically increase early_res array size x86: Introduce max_early_res and early_res_count ...
This commit is contained in:
commit
a626b46e17
@ -184,6 +184,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING
|
||||
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||
def_bool y
|
||||
|
||||
config HAVE_EARLY_RES
|
||||
def_bool y
|
||||
|
||||
config HAVE_INTEL_TXT
|
||||
def_bool y
|
||||
depends on EXPERIMENTAL && DMAR && ACPI
|
||||
@ -569,6 +572,18 @@ config PARAVIRT_DEBUG
|
||||
Enable to debug paravirt_ops internals. Specifically, BUG if
|
||||
a paravirt_op is missing when it is called.
|
||||
|
||||
config NO_BOOTMEM
|
||||
default y
|
||||
bool "Disable Bootmem code"
|
||||
---help---
|
||||
Use early_res directly instead of bootmem before slab is ready.
|
||||
- allocator (buddy) [generic]
|
||||
- early allocator (bootmem) [generic]
|
||||
- very early allocator (reserve_early*()) [x86]
|
||||
- very very early allocator (early brk model) [x86]
|
||||
So reduce one layer between early allocator to final allocator
|
||||
|
||||
|
||||
config MEMTEST
|
||||
bool "Memtest"
|
||||
---help---
|
||||
|
@ -111,11 +111,8 @@ extern unsigned long end_user_pfn;
|
||||
|
||||
extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align);
|
||||
extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align);
|
||||
extern void reserve_early(u64 start, u64 end, char *name);
|
||||
extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
|
||||
extern void free_early(u64 start, u64 end);
|
||||
extern void early_res_to_bootmem(u64 start, u64 end);
|
||||
extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align);
|
||||
#include <linux/early_res.h>
|
||||
|
||||
extern unsigned long e820_end_of_ram_pfn(void);
|
||||
extern unsigned long e820_end_of_low_ram_pfn(void);
|
||||
|
@ -124,6 +124,8 @@ extern void pci_iommu_alloc(void);
|
||||
#include "pci_64.h"
|
||||
#endif
|
||||
|
||||
void dma32_reserve_bootmem(void);
|
||||
|
||||
/* implement the pci_ DMA API in terms of the generic device dma_ one */
|
||||
#include <asm-generic/pci-dma-compat.h>
|
||||
|
||||
|
@ -22,8 +22,6 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
|
||||
extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
|
||||
int reg, int len, u32 value);
|
||||
|
||||
extern void dma32_reserve_bootmem(void);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _ASM_X86_PCI_64_H */
|
||||
|
@ -23,14 +23,4 @@ extern int reboot_force;
|
||||
|
||||
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
|
||||
|
||||
/*
|
||||
* This looks more complex than it should be. But we need to
|
||||
* get the type for the ~ right in round_down (it needs to be
|
||||
* as wide as the result!), and we want to evaluate the macro
|
||||
* arguments just once each.
|
||||
*/
|
||||
#define __round_mask(x,y) ((__typeof__(x))((y)-1))
|
||||
#define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1)
|
||||
#define round_down(x,y) ((x) & ~__round_mask(x,y))
|
||||
|
||||
#endif /* _ASM_X86_PROTO_H */
|
||||
|
@ -22,10 +22,10 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/kvm_para.h>
|
||||
#include <linux/range.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/e820.h>
|
||||
@ -34,11 +34,6 @@
|
||||
|
||||
#include "mtrr.h"
|
||||
|
||||
struct res_range {
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
};
|
||||
|
||||
struct var_mtrr_range_state {
|
||||
unsigned long base_pfn;
|
||||
unsigned long size_pfn;
|
||||
@ -56,7 +51,7 @@ struct var_mtrr_state {
|
||||
/* Should be related to MTRR_VAR_RANGES nums */
|
||||
#define RANGE_NUM 256
|
||||
|
||||
static struct res_range __initdata range[RANGE_NUM];
|
||||
static struct range __initdata range[RANGE_NUM];
|
||||
static int __initdata nr_range;
|
||||
|
||||
static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
|
||||
@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
|
||||
static int __initdata debug_print;
|
||||
#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
|
||||
|
||||
|
||||
static int __init
|
||||
add_range(struct res_range *range, int nr_range,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
/* Out of slots: */
|
||||
if (nr_range >= RANGE_NUM)
|
||||
return nr_range;
|
||||
|
||||
range[nr_range].start = start;
|
||||
range[nr_range].end = end;
|
||||
|
||||
nr_range++;
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
static int __init
|
||||
add_range_with_merge(struct res_range *range, int nr_range,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Try to merge it with old one: */
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
unsigned long final_start, final_end;
|
||||
unsigned long common_start, common_end;
|
||||
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
|
||||
common_start = max(range[i].start, start);
|
||||
common_end = min(range[i].end, end);
|
||||
if (common_start > common_end + 1)
|
||||
continue;
|
||||
|
||||
final_start = min(range[i].start, start);
|
||||
final_end = max(range[i].end, end);
|
||||
|
||||
range[i].start = final_start;
|
||||
range[i].end = final_end;
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
/* Need to add it: */
|
||||
return add_range(range, nr_range, start, end);
|
||||
}
|
||||
|
||||
static void __init
|
||||
subtract_range(struct res_range *range, unsigned long start, unsigned long end)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
for (j = 0; j < RANGE_NUM; j++) {
|
||||
if (!range[j].end)
|
||||
continue;
|
||||
|
||||
if (start <= range[j].start && end >= range[j].end) {
|
||||
range[j].start = 0;
|
||||
range[j].end = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start <= range[j].start && end < range[j].end &&
|
||||
range[j].start < end + 1) {
|
||||
range[j].start = end + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (start > range[j].start && end >= range[j].end &&
|
||||
range[j].end > start - 1) {
|
||||
range[j].end = start - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start > range[j].start && end < range[j].end) {
|
||||
/* Find the new spare: */
|
||||
for (i = 0; i < RANGE_NUM; i++) {
|
||||
if (range[i].end == 0)
|
||||
break;
|
||||
}
|
||||
if (i < RANGE_NUM) {
|
||||
range[i].end = range[j].end;
|
||||
range[i].start = end + 1;
|
||||
} else {
|
||||
printk(KERN_ERR "run of slot in ranges\n");
|
||||
}
|
||||
range[j].end = start - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int __init cmp_range(const void *x1, const void *x2)
|
||||
{
|
||||
const struct res_range *r1 = x1;
|
||||
const struct res_range *r2 = x2;
|
||||
long start1, start2;
|
||||
|
||||
start1 = r1->start;
|
||||
start2 = r2->start;
|
||||
|
||||
return start1 - start2;
|
||||
}
|
||||
|
||||
static int __init clean_sort_range(struct res_range *range, int az)
|
||||
{
|
||||
int i, j, k = az - 1, nr_range = 0;
|
||||
|
||||
for (i = 0; i < k; i++) {
|
||||
if (range[i].end)
|
||||
continue;
|
||||
for (j = k; j > i; j--) {
|
||||
if (range[j].end) {
|
||||
k = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == i)
|
||||
break;
|
||||
range[i].start = range[k].start;
|
||||
range[i].end = range[k].end;
|
||||
range[k].start = 0;
|
||||
range[k].end = 0;
|
||||
k--;
|
||||
}
|
||||
/* count it */
|
||||
for (i = 0; i < az; i++) {
|
||||
if (!range[i].end) {
|
||||
nr_range = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort them */
|
||||
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
#define BIOS_BUG_MSG KERN_WARNING \
|
||||
"WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
|
||||
|
||||
static int __init
|
||||
x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
|
||||
x86_get_mtrr_mem_range(struct range *range, int nr_range,
|
||||
unsigned long extra_remove_base,
|
||||
unsigned long extra_remove_size)
|
||||
{
|
||||
@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
|
||||
continue;
|
||||
base = range_state[i].base_pfn;
|
||||
size = range_state[i].size_pfn;
|
||||
nr_range = add_range_with_merge(range, nr_range, base,
|
||||
base + size - 1);
|
||||
nr_range = add_range_with_merge(range, RANGE_NUM, nr_range,
|
||||
base, base + size);
|
||||
}
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After WB checking\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
|
||||
range[i].start, range[i].end + 1);
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
|
||||
/* Take out UC ranges: */
|
||||
@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
|
||||
size -= (1<<(20-PAGE_SHIFT)) - base;
|
||||
base = 1<<(20-PAGE_SHIFT);
|
||||
}
|
||||
subtract_range(range, base, base + size - 1);
|
||||
subtract_range(range, RANGE_NUM, base, base + size);
|
||||
}
|
||||
if (extra_remove_size)
|
||||
subtract_range(range, extra_remove_base,
|
||||
extra_remove_base + extra_remove_size - 1);
|
||||
subtract_range(range, RANGE_NUM, extra_remove_base,
|
||||
extra_remove_base + extra_remove_size);
|
||||
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After UC checking\n");
|
||||
for (i = 0; i < RANGE_NUM; i++) {
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
|
||||
range[i].start, range[i].end + 1);
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
}
|
||||
|
||||
@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
|
||||
if (debug_print) {
|
||||
printk(KERN_DEBUG "After sorting\n");
|
||||
for (i = 0; i < nr_range; i++)
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n",
|
||||
range[i].start, range[i].end + 1);
|
||||
printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n",
|
||||
range[i].start, range[i].end);
|
||||
}
|
||||
|
||||
/* clear those is not used */
|
||||
for (i = nr_range; i < RANGE_NUM; i++)
|
||||
memset(&range[i], 0, sizeof(range[i]));
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MTRR_SANITIZER
|
||||
|
||||
static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
|
||||
static unsigned long __init sum_ranges(struct range *range, int nr_range)
|
||||
{
|
||||
unsigned long sum = 0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nr_range; i++)
|
||||
sum += range[i].end + 1 - range[i].start;
|
||||
sum += range[i].end - range[i].start;
|
||||
|
||||
return sum;
|
||||
}
|
||||
@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg)
|
||||
early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
|
||||
|
||||
static int __init
|
||||
x86_setup_var_mtrrs(struct res_range *range, int nr_range,
|
||||
x86_setup_var_mtrrs(struct range *range, int nr_range,
|
||||
u64 chunk_size, u64 gran_size)
|
||||
{
|
||||
struct var_mtrr_state var_state;
|
||||
@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
|
||||
/* Write the range: */
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
set_var_mtrr_range(&var_state, range[i].start,
|
||||
range[i].end - range[i].start + 1);
|
||||
range[i].end - range[i].start);
|
||||
}
|
||||
|
||||
/* Write the last range: */
|
||||
@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
|
||||
unsigned long x_remove_base,
|
||||
unsigned long x_remove_size, int i)
|
||||
{
|
||||
static struct res_range range_new[RANGE_NUM];
|
||||
static struct range range_new[RANGE_NUM];
|
||||
unsigned long range_sums_new;
|
||||
static int nr_range_new;
|
||||
int num_reg;
|
||||
@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits)
|
||||
* [0, 1M) should always be covered by var mtrr with WB
|
||||
* and fixed mtrrs should take effect before var mtrr for it:
|
||||
*/
|
||||
nr_range = add_range_with_merge(range, nr_range, 0,
|
||||
(1ULL<<(20 - PAGE_SHIFT)) - 1);
|
||||
nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0,
|
||||
1ULL<<(20 - PAGE_SHIFT));
|
||||
/* Sort the ranges: */
|
||||
sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
|
||||
sort_range(range, nr_range);
|
||||
|
||||
range_sums = sum_ranges(range, nr_range);
|
||||
printk(KERN_INFO "total RAM covered: %ldM\n",
|
||||
@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
||||
nr_range = 0;
|
||||
if (mtrr_tom2) {
|
||||
range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT));
|
||||
range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1;
|
||||
if (highest_pfn < range[nr_range].end + 1)
|
||||
highest_pfn = range[nr_range].end + 1;
|
||||
range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT;
|
||||
if (highest_pfn < range[nr_range].end)
|
||||
highest_pfn = range[nr_range].end;
|
||||
nr_range++;
|
||||
}
|
||||
nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
|
||||
@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
|
||||
|
||||
/* Check the holes: */
|
||||
for (i = 0; i < nr_range - 1; i++) {
|
||||
if (range[i].end + 1 < range[i+1].start)
|
||||
total_trim_size += real_trim_memory(range[i].end + 1,
|
||||
if (range[i].end < range[i+1].start)
|
||||
total_trim_size += real_trim_memory(range[i].end,
|
||||
range[i+1].start);
|
||||
}
|
||||
|
||||
/* Check the top: */
|
||||
i = nr_range - 1;
|
||||
if (range[i].end + 1 < end_pfn)
|
||||
total_trim_size += real_trim_memory(range[i].end + 1,
|
||||
if (range[i].end < end_pfn)
|
||||
total_trim_size += real_trim_memory(range[i].end,
|
||||
end_pfn);
|
||||
|
||||
if (total_trim_size) {
|
||||
|
@ -12,21 +12,13 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/kexec.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/suspend.h>
|
||||
#include <linux/firmware-map.h>
|
||||
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/e820.h>
|
||||
#include <asm/proto.h>
|
||||
#include <asm/setup.h>
|
||||
#include <asm/trampoline.h>
|
||||
|
||||
/*
|
||||
* The e820 map is the map that gets modified e.g. with command line parameters
|
||||
@ -729,288 +721,6 @@ static int __init e820_mark_nvs_memory(void)
|
||||
core_initcall(e820_mark_nvs_memory);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Early reserved memory areas.
|
||||
*/
|
||||
#define MAX_EARLY_RES 32
|
||||
|
||||
struct early_res {
|
||||
u64 start, end;
|
||||
char name[16];
|
||||
char overlap_ok;
|
||||
};
|
||||
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
|
||||
{ 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
|
||||
#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
|
||||
/*
|
||||
* But first pinch a few for the stack/trampoline stuff
|
||||
* FIXME: Don't need the extra page at 4K, but need to fix
|
||||
* trampoline before removing it. (see the GDT stuff)
|
||||
*/
|
||||
{ PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
|
||||
#endif
|
||||
|
||||
{}
|
||||
};
|
||||
|
||||
static int __init find_overlapped_early(u64 start, u64 end)
|
||||
{
|
||||
int i;
|
||||
struct early_res *r;
|
||||
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
r = &early_res[i];
|
||||
if (end > r->start && start < r->end)
|
||||
break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the i-th range from the early reservation map,
|
||||
* by copying any higher ranges down one over it, and
|
||||
* clearing what had been the last slot.
|
||||
*/
|
||||
static void __init drop_range(int i)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
|
||||
;
|
||||
|
||||
memmove(&early_res[i], &early_res[i + 1],
|
||||
(j - 1 - i) * sizeof(struct early_res));
|
||||
|
||||
early_res[j - 1].end = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Split any existing ranges that:
|
||||
* 1) are marked 'overlap_ok', and
|
||||
* 2) overlap with the stated range [start, end)
|
||||
* into whatever portion (if any) of the existing range is entirely
|
||||
* below or entirely above the stated range. Drop the portion
|
||||
* of the existing range that overlaps with the stated range,
|
||||
* which will allow the caller of this routine to then add that
|
||||
* stated range without conflicting with any existing range.
|
||||
*/
|
||||
static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
|
||||
{
|
||||
int i;
|
||||
struct early_res *r;
|
||||
u64 lower_start, lower_end;
|
||||
u64 upper_start, upper_end;
|
||||
char name[16];
|
||||
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
r = &early_res[i];
|
||||
|
||||
/* Continue past non-overlapping ranges */
|
||||
if (end <= r->start || start >= r->end)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Leave non-ok overlaps as is; let caller
|
||||
* panic "Overlapping early reservations"
|
||||
* when it hits this overlap.
|
||||
*/
|
||||
if (!r->overlap_ok)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We have an ok overlap. We will drop it from the early
|
||||
* reservation map, and add back in any non-overlapping
|
||||
* portions (lower or upper) as separate, overlap_ok,
|
||||
* non-overlapping ranges.
|
||||
*/
|
||||
|
||||
/* 1. Note any non-overlapping (lower or upper) ranges. */
|
||||
strncpy(name, r->name, sizeof(name) - 1);
|
||||
|
||||
lower_start = lower_end = 0;
|
||||
upper_start = upper_end = 0;
|
||||
if (r->start < start) {
|
||||
lower_start = r->start;
|
||||
lower_end = start;
|
||||
}
|
||||
if (r->end > end) {
|
||||
upper_start = end;
|
||||
upper_end = r->end;
|
||||
}
|
||||
|
||||
/* 2. Drop the original ok overlapping range */
|
||||
drop_range(i);
|
||||
|
||||
i--; /* resume for-loop on copied down entry */
|
||||
|
||||
/* 3. Add back in any non-overlapping ranges. */
|
||||
if (lower_end)
|
||||
reserve_early_overlap_ok(lower_start, lower_end, name);
|
||||
if (upper_end)
|
||||
reserve_early_overlap_ok(upper_start, upper_end, name);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init __reserve_early(u64 start, u64 end, char *name,
|
||||
int overlap_ok)
|
||||
{
|
||||
int i;
|
||||
struct early_res *r;
|
||||
|
||||
i = find_overlapped_early(start, end);
|
||||
if (i >= MAX_EARLY_RES)
|
||||
panic("Too many early reservations");
|
||||
r = &early_res[i];
|
||||
if (r->end)
|
||||
panic("Overlapping early reservations "
|
||||
"%llx-%llx %s to %llx-%llx %s\n",
|
||||
start, end - 1, name?name:"", r->start,
|
||||
r->end - 1, r->name);
|
||||
r->start = start;
|
||||
r->end = end;
|
||||
r->overlap_ok = overlap_ok;
|
||||
if (name)
|
||||
strncpy(r->name, name, sizeof(r->name) - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* A few early reservtations come here.
|
||||
*
|
||||
* The 'overlap_ok' in the name of this routine does -not- mean it
|
||||
* is ok for these reservations to overlap an earlier reservation.
|
||||
* Rather it means that it is ok for subsequent reservations to
|
||||
* overlap this one.
|
||||
*
|
||||
* Use this entry point to reserve early ranges when you are doing
|
||||
* so out of "Paranoia", reserving perhaps more memory than you need,
|
||||
* just in case, and don't mind a subsequent overlapping reservation
|
||||
* that is known to be needed.
|
||||
*
|
||||
* The drop_overlaps_that_are_ok() call here isn't really needed.
|
||||
* It would be needed if we had two colliding 'overlap_ok'
|
||||
* reservations, so that the second such would not panic on the
|
||||
* overlap with the first. We don't have any such as of this
|
||||
* writing, but might as well tolerate such if it happens in
|
||||
* the future.
|
||||
*/
|
||||
void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
|
||||
{
|
||||
drop_overlaps_that_are_ok(start, end);
|
||||
__reserve_early(start, end, name, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Most early reservations come here.
|
||||
*
|
||||
* We first have drop_overlaps_that_are_ok() drop any pre-existing
|
||||
* 'overlap_ok' ranges, so that we can then reserve this memory
|
||||
* range without risk of panic'ing on an overlapping overlap_ok
|
||||
* early reservation.
|
||||
*/
|
||||
void __init reserve_early(u64 start, u64 end, char *name)
|
||||
{
|
||||
if (start >= end)
|
||||
return;
|
||||
|
||||
drop_overlaps_that_are_ok(start, end);
|
||||
__reserve_early(start, end, name, 0);
|
||||
}
|
||||
|
||||
void __init free_early(u64 start, u64 end)
|
||||
{
|
||||
struct early_res *r;
|
||||
int i;
|
||||
|
||||
i = find_overlapped_early(start, end);
|
||||
r = &early_res[i];
|
||||
if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
|
||||
panic("free_early on not reserved area: %llx-%llx!",
|
||||
start, end - 1);
|
||||
|
||||
drop_range(i);
|
||||
}
|
||||
|
||||
void __init early_res_to_bootmem(u64 start, u64 end)
|
||||
{
|
||||
int i, count;
|
||||
u64 final_start, final_end;
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
|
||||
count++;
|
||||
|
||||
printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
|
||||
count, start, end);
|
||||
for (i = 0; i < count; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
|
||||
r->start, r->end, r->name);
|
||||
final_start = max(start, r->start);
|
||||
final_end = min(end, r->end);
|
||||
if (final_start >= final_end) {
|
||||
printk(KERN_CONT "\n");
|
||||
continue;
|
||||
}
|
||||
printk(KERN_CONT " ==> [%010llx - %010llx]\n",
|
||||
final_start, final_end);
|
||||
reserve_bootmem_generic(final_start, final_end - final_start,
|
||||
BOOTMEM_DEFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check for already reserved areas */
|
||||
static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
|
||||
{
|
||||
int i;
|
||||
u64 addr = *addrp;
|
||||
int changed = 0;
|
||||
struct early_res *r;
|
||||
again:
|
||||
i = find_overlapped_early(addr, addr + size);
|
||||
r = &early_res[i];
|
||||
if (i < MAX_EARLY_RES && r->end) {
|
||||
*addrp = addr = round_up(r->end, align);
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/* Check for already reserved areas */
|
||||
static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
|
||||
{
|
||||
int i;
|
||||
u64 addr = *addrp, last;
|
||||
u64 size = *sizep;
|
||||
int changed = 0;
|
||||
again:
|
||||
last = addr + size;
|
||||
for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
if (last > r->start && addr < r->start) {
|
||||
size = r->start - addr;
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
if (last > r->end && addr < r->end) {
|
||||
addr = round_up(r->end, align);
|
||||
size = last - addr;
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
if (last <= r->end && addr >= r->start) {
|
||||
(*sizep)++;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (changed) {
|
||||
*addrp = addr;
|
||||
*sizep = size;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a free area with specified alignment in a specific range.
|
||||
*/
|
||||
@ -1020,29 +730,36 @@ u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
u64 addr, last;
|
||||
u64 ei_last;
|
||||
u64 addr;
|
||||
u64 ei_start, ei_last;
|
||||
|
||||
if (ei->type != E820_RAM)
|
||||
continue;
|
||||
addr = round_up(ei->addr, align);
|
||||
|
||||
ei_last = ei->addr + ei->size;
|
||||
if (addr < start)
|
||||
addr = round_up(start, align);
|
||||
if (addr >= ei_last)
|
||||
continue;
|
||||
while (bad_addr(&addr, size, align) && addr+size <= ei_last)
|
||||
;
|
||||
last = addr + size;
|
||||
if (last > ei_last)
|
||||
continue;
|
||||
if (last > end)
|
||||
continue;
|
||||
return addr;
|
||||
ei_start = ei->addr;
|
||||
addr = find_early_area(ei_start, ei_last, start, end,
|
||||
size, align);
|
||||
|
||||
if (addr != -1ULL)
|
||||
return addr;
|
||||
}
|
||||
return -1ULL;
|
||||
}
|
||||
|
||||
u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
|
||||
{
|
||||
return find_e820_area(start, end, size, align);
|
||||
}
|
||||
|
||||
u64 __init get_max_mapped(void)
|
||||
{
|
||||
u64 end = max_pfn_mapped;
|
||||
|
||||
end <<= PAGE_SHIFT;
|
||||
|
||||
return end;
|
||||
}
|
||||
/*
|
||||
* Find next free range after *start
|
||||
*/
|
||||
@ -1052,25 +769,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
|
||||
|
||||
for (i = 0; i < e820.nr_map; i++) {
|
||||
struct e820entry *ei = &e820.map[i];
|
||||
u64 addr, last;
|
||||
u64 ei_last;
|
||||
u64 addr;
|
||||
u64 ei_start, ei_last;
|
||||
|
||||
if (ei->type != E820_RAM)
|
||||
continue;
|
||||
addr = round_up(ei->addr, align);
|
||||
|
||||
ei_last = ei->addr + ei->size;
|
||||
if (addr < start)
|
||||
addr = round_up(start, align);
|
||||
if (addr >= ei_last)
|
||||
continue;
|
||||
*sizep = ei_last - addr;
|
||||
while (bad_addr_size(&addr, sizep, align) &&
|
||||
addr + *sizep <= ei_last)
|
||||
;
|
||||
last = addr + *sizep;
|
||||
if (last > ei_last)
|
||||
continue;
|
||||
return addr;
|
||||
ei_start = ei->addr;
|
||||
addr = find_early_area_size(ei_start, ei_last, start,
|
||||
sizep, align);
|
||||
|
||||
if (addr != -1ULL)
|
||||
return addr;
|
||||
}
|
||||
|
||||
return -1ULL;
|
||||
@ -1429,6 +1140,8 @@ void __init e820_reserve_resources_late(void)
|
||||
end = MAX_RESOURCE_SIZE;
|
||||
if (start >= end)
|
||||
continue;
|
||||
printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
|
||||
start, end);
|
||||
reserve_region_with_split(&iomem_resource, start, end,
|
||||
"RAM buffer");
|
||||
}
|
||||
|
@ -29,6 +29,16 @@ static void __init i386_default_early_setup(void)
|
||||
|
||||
void __init i386_start_kernel(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_TRAMPOLINE
|
||||
/*
|
||||
* But first pinch a few for the stack/trampoline stuff
|
||||
* FIXME: Don't need the extra page at 4K, but need to fix
|
||||
* trampoline before removing it. (see the GDT stuff)
|
||||
*/
|
||||
reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
|
||||
"EX TRAMPOLINE");
|
||||
#endif
|
||||
|
||||
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_INITRD
|
||||
|
@ -7,6 +7,8 @@
|
||||
#include <linux/string.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/dmi.h>
|
||||
#include <linux/range.h>
|
||||
|
||||
#include <asm/pci-direct.h>
|
||||
#include <linux/sort.h>
|
||||
#include <asm/io.h>
|
||||
@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = {
|
||||
{ 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 },
|
||||
};
|
||||
|
||||
struct range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
static int __cpuinit cmp_range(const void *x1, const void *x2)
|
||||
{
|
||||
const struct range *r1 = x1;
|
||||
|
@ -65,7 +65,7 @@ int dma_set_mask(struct device *dev, u64 mask)
|
||||
}
|
||||
EXPORT_SYMBOL(dma_set_mask);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA)
|
||||
static __initdata void *dma32_bootmem_ptr;
|
||||
static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
|
||||
|
||||
@ -116,14 +116,21 @@ static void __init dma32_free_bootmem(void)
|
||||
dma32_bootmem_ptr = NULL;
|
||||
dma32_bootmem_size = 0;
|
||||
}
|
||||
#else
|
||||
void __init dma32_reserve_bootmem(void)
|
||||
{
|
||||
}
|
||||
static void __init dma32_free_bootmem(void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
void __init pci_iommu_alloc(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
/* free the range so iommu could get some range less than 4G */
|
||||
dma32_free_bootmem();
|
||||
#endif
|
||||
|
||||
if (pci_swiotlb_detect())
|
||||
goto out;
|
||||
|
||||
|
@ -969,16 +969,12 @@ void __init setup_arch(char **cmdline_p)
|
||||
#endif
|
||||
|
||||
initmem_init(0, max_pfn, acpi, k8);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* dma32_reserve_bootmem() allocates bootmem which may conflict
|
||||
* with the crashkernel command line, so do that after
|
||||
* reserve_crashkernel()
|
||||
*/
|
||||
dma32_reserve_bootmem();
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
|
||||
#endif
|
||||
|
||||
dma32_reserve_bootmem();
|
||||
|
||||
reserve_ibft_region();
|
||||
|
||||
#ifdef CONFIG_KVM_CLOCK
|
||||
|
@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
|
||||
|
||||
static void __init pcpu_fc_free(void *ptr, size_t size)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
u64 start = __pa(ptr);
|
||||
u64 end = start + size;
|
||||
free_early_partial(start, end);
|
||||
#else
|
||||
free_bootmem(__pa(ptr), size);
|
||||
#endif
|
||||
}
|
||||
|
||||
static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
|
||||
|
@ -750,6 +750,7 @@ static void __init zone_sizes_init(void)
|
||||
free_area_init_nodes(max_zone_pfns);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
static unsigned long __init setup_node_bootmem(int nodeid,
|
||||
unsigned long start_pfn,
|
||||
unsigned long end_pfn,
|
||||
@ -766,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid,
|
||||
printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
|
||||
nodeid, bootmap, bootmap + bootmap_size);
|
||||
free_bootmem_with_active_regions(nodeid, end_pfn);
|
||||
early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
|
||||
|
||||
return bootmap + bootmap_size;
|
||||
}
|
||||
#endif
|
||||
|
||||
void __init setup_bootmem_allocator(void)
|
||||
{
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
int nodeid;
|
||||
unsigned long bootmap_size, bootmap;
|
||||
/*
|
||||
@ -784,11 +786,13 @@ void __init setup_bootmem_allocator(void)
|
||||
if (bootmap == -1L)
|
||||
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
|
||||
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
|
||||
#endif
|
||||
|
||||
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
|
||||
max_pfn_mapped<<PAGE_SHIFT);
|
||||
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
for_each_online_node(nodeid) {
|
||||
unsigned long start_pfn, end_pfn;
|
||||
|
||||
@ -806,6 +810,7 @@ void __init setup_bootmem_allocator(void)
|
||||
bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
|
||||
bootmap);
|
||||
}
|
||||
#endif
|
||||
|
||||
after_bootmem = 1;
|
||||
}
|
||||
|
@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start,
|
||||
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
||||
int acpi, int k8)
|
||||
{
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
unsigned long bootmap_size, bootmap;
|
||||
|
||||
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
|
||||
@ -579,13 +580,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
||||
PAGE_SIZE);
|
||||
if (bootmap == -1L)
|
||||
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
|
||||
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
|
||||
/* don't touch min_low_pfn */
|
||||
bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
|
||||
0, end_pfn);
|
||||
e820_register_active_regions(0, start_pfn, end_pfn);
|
||||
free_bootmem_with_active_regions(0, end_pfn);
|
||||
early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
|
||||
reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
|
||||
#else
|
||||
e820_register_active_regions(0, start_pfn, end_pfn);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -974,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node)
|
||||
if (pmd_none(*pmd)) {
|
||||
pte_t entry;
|
||||
|
||||
p = vmemmap_alloc_block(PMD_SIZE, node);
|
||||
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
|
||||
if (!p)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
||||
|
||||
for_each_online_node(nid) {
|
||||
memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
|
||||
NODE_DATA(nid)->node_id = nid;
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
NODE_DATA(nid)->bdata = &bootmem_node_data[nid];
|
||||
#endif
|
||||
}
|
||||
|
||||
setup_bootmem_allocator();
|
||||
|
@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start,
|
||||
unsigned long end, unsigned long size,
|
||||
unsigned long align)
|
||||
{
|
||||
unsigned long mem = find_e820_area(start, end, size, align);
|
||||
void *ptr;
|
||||
unsigned long mem;
|
||||
|
||||
/*
|
||||
* put it on high as possible
|
||||
* something will go with NODE_DATA
|
||||
*/
|
||||
if (start < (MAX_DMA_PFN<<PAGE_SHIFT))
|
||||
start = MAX_DMA_PFN<<PAGE_SHIFT;
|
||||
if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) &&
|
||||
end > (MAX_DMA32_PFN<<PAGE_SHIFT))
|
||||
start = MAX_DMA32_PFN<<PAGE_SHIFT;
|
||||
mem = find_e820_area(start, end, size, align);
|
||||
if (mem != -1L)
|
||||
return __va(mem);
|
||||
|
||||
ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS));
|
||||
if (ptr == NULL) {
|
||||
printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
|
||||
/* extend the search scope */
|
||||
end = max_pfn_mapped << PAGE_SHIFT;
|
||||
if (end > (MAX_DMA32_PFN<<PAGE_SHIFT))
|
||||
start = MAX_DMA32_PFN<<PAGE_SHIFT;
|
||||
else
|
||||
start = MAX_DMA_PFN<<PAGE_SHIFT;
|
||||
mem = find_e820_area(start, end, size, align);
|
||||
if (mem != -1L)
|
||||
return __va(mem);
|
||||
|
||||
printk(KERN_ERR "Cannot find %lu bytes in node %d\n",
|
||||
size, nodeid);
|
||||
return NULL;
|
||||
}
|
||||
return ptr;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Initialize bootmem allocator for a node */
|
||||
void __init
|
||||
setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||
{
|
||||
unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size;
|
||||
unsigned long start_pfn, last_pfn, nodedata_phys;
|
||||
const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE);
|
||||
unsigned long bootmap_start, nodedata_phys;
|
||||
void *bootmap;
|
||||
int nid;
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
unsigned long bootmap_start, bootmap_pages, bootmap_size;
|
||||
void *bootmap;
|
||||
#endif
|
||||
|
||||
if (!end)
|
||||
return;
|
||||
@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||
|
||||
start = roundup(start, ZONE_ALIGN);
|
||||
|
||||
printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid,
|
||||
printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid,
|
||||
start, end);
|
||||
|
||||
start_pfn = start >> PAGE_SHIFT;
|
||||
@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||
if (node_data[nodeid] == NULL)
|
||||
return;
|
||||
nodedata_phys = __pa(node_data[nodeid]);
|
||||
reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
|
||||
printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
|
||||
nodedata_phys + pgdat_size - 1);
|
||||
nid = phys_to_nid(nodedata_phys);
|
||||
if (nid != nodeid)
|
||||
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
|
||||
|
||||
memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t));
|
||||
NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
|
||||
NODE_DATA(nodeid)->node_id = nodeid;
|
||||
NODE_DATA(nodeid)->node_start_pfn = start_pfn;
|
||||
NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn;
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid];
|
||||
|
||||
/*
|
||||
* Find a place for the bootmem map
|
||||
* nodedata_phys could be on other nodes by alloc_bootmem,
|
||||
@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||
* of alloc_bootmem, that could clash with reserved range
|
||||
*/
|
||||
bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn);
|
||||
nid = phys_to_nid(nodedata_phys);
|
||||
if (nid == nodeid)
|
||||
bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
|
||||
else
|
||||
bootmap_start = roundup(start, PAGE_SIZE);
|
||||
bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE);
|
||||
/*
|
||||
* SMP_CACHE_BYTES could be enough, but init_bootmem_node like
|
||||
* to use that to align to PAGE_SIZE
|
||||
@ -239,18 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||
bootmap = early_node_mem(nodeid, bootmap_start, end,
|
||||
bootmap_pages<<PAGE_SHIFT, PAGE_SIZE);
|
||||
if (bootmap == NULL) {
|
||||
if (nodedata_phys < start || nodedata_phys >= end) {
|
||||
/*
|
||||
* only need to free it if it is from other node
|
||||
* bootmem
|
||||
*/
|
||||
if (nid != nodeid)
|
||||
free_bootmem(nodedata_phys, pgdat_size);
|
||||
}
|
||||
free_early(nodedata_phys, nodedata_phys + pgdat_size);
|
||||
node_data[nodeid] = NULL;
|
||||
return;
|
||||
}
|
||||
bootmap_start = __pa(bootmap);
|
||||
reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT),
|
||||
"BOOTMAP");
|
||||
|
||||
bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
|
||||
bootmap_start >> PAGE_SHIFT,
|
||||
@ -259,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
|
||||
printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n",
|
||||
bootmap_start, bootmap_start + bootmap_size - 1,
|
||||
bootmap_pages);
|
||||
|
||||
free_bootmem_with_active_regions(nodeid, end);
|
||||
|
||||
/*
|
||||
* convert early reserve to bootmem reserve earlier
|
||||
* otherwise early_node_mem could use early reserved mem
|
||||
* on previous node
|
||||
*/
|
||||
early_res_to_bootmem(start, end);
|
||||
|
||||
/*
|
||||
* in some case early_node_mem could use alloc_bootmem
|
||||
* to get range on other node, don't reserve that again
|
||||
*/
|
||||
if (nid != nodeid)
|
||||
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
|
||||
else
|
||||
reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys,
|
||||
pgdat_size, BOOTMEM_DEFAULT);
|
||||
nid = phys_to_nid(bootmap_start);
|
||||
if (nid != nodeid)
|
||||
printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid);
|
||||
else
|
||||
reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start,
|
||||
bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT);
|
||||
|
||||
free_bootmem_with_active_regions(nodeid, end);
|
||||
#endif
|
||||
|
||||
node_set_online(nodeid);
|
||||
}
|
||||
@ -709,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void)
|
||||
for_each_online_node(i)
|
||||
pages += free_all_bootmem_node(NODE_DATA(i));
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
pages += free_all_memory_core_early(MAX_NUMNODES);
|
||||
#endif
|
||||
|
||||
return pages;
|
||||
}
|
||||
|
||||
|
@ -14,8 +14,7 @@ obj-$(CONFIG_X86_VISWS) += visws.o
|
||||
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
|
||||
|
||||
obj-y += common.o early.o
|
||||
obj-y += amd_bus.o
|
||||
obj-$(CONFIG_X86_64) += bus_numa.o
|
||||
obj-y += amd_bus.o bus_numa.o
|
||||
|
||||
ifeq ($(CONFIG_PCI_DEBUG),y)
|
||||
EXTRA_CFLAGS += -DDEBUG
|
||||
|
@ -2,11 +2,11 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/topology.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/range.h>
|
||||
|
||||
#include <asm/pci_x86.h>
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#include <asm/pci-direct.h>
|
||||
#endif
|
||||
|
||||
#include "bus_numa.h"
|
||||
|
||||
@ -15,60 +15,6 @@
|
||||
* also get peer root bus resource for io,mmio
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#define RANGE_NUM 16
|
||||
|
||||
struct res_range {
|
||||
size_t start;
|
||||
size_t end;
|
||||
};
|
||||
|
||||
static void __init update_range(struct res_range *range, size_t start,
|
||||
size_t end)
|
||||
{
|
||||
int i;
|
||||
int j;
|
||||
|
||||
for (j = 0; j < RANGE_NUM; j++) {
|
||||
if (!range[j].end)
|
||||
continue;
|
||||
|
||||
if (start <= range[j].start && end >= range[j].end) {
|
||||
range[j].start = 0;
|
||||
range[j].end = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
|
||||
range[j].start = end + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
|
||||
range[j].end = start - 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start > range[j].start && end < range[j].end) {
|
||||
/* find the new spare */
|
||||
for (i = 0; i < RANGE_NUM; i++) {
|
||||
if (range[i].end == 0)
|
||||
break;
|
||||
}
|
||||
if (i < RANGE_NUM) {
|
||||
range[i].end = range[j].end;
|
||||
range[i].start = end + 1;
|
||||
} else {
|
||||
printk(KERN_ERR "run of slot in ranges\n");
|
||||
}
|
||||
range[j].end = start - 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct pci_hostbridge_probe {
|
||||
u32 bus;
|
||||
u32 slot;
|
||||
@ -111,6 +57,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void)
|
||||
fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1;
|
||||
}
|
||||
|
||||
#define RANGE_NUM 16
|
||||
|
||||
/**
|
||||
* early_fill_mp_bus_to_node()
|
||||
* called before pcibios_scan_root and pci_scan_bus
|
||||
@ -130,16 +78,17 @@ static int __init early_fill_mp_bus_info(void)
|
||||
struct pci_root_info *info;
|
||||
u32 reg;
|
||||
struct resource *res;
|
||||
size_t start;
|
||||
size_t end;
|
||||
struct res_range range[RANGE_NUM];
|
||||
u64 start;
|
||||
u64 end;
|
||||
struct range range[RANGE_NUM];
|
||||
u64 val;
|
||||
u32 address;
|
||||
bool found;
|
||||
|
||||
if (!early_pci_allowed())
|
||||
return -1;
|
||||
|
||||
found_all_numa_early = 0;
|
||||
found = false;
|
||||
for (i = 0; i < ARRAY_SIZE(pci_probes); i++) {
|
||||
u32 id;
|
||||
u16 device;
|
||||
@ -153,12 +102,12 @@ static int __init early_fill_mp_bus_info(void)
|
||||
device = (id>>16) & 0xffff;
|
||||
if (pci_probes[i].vendor == vendor &&
|
||||
pci_probes[i].device == device) {
|
||||
found_all_numa_early = 1;
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found_all_numa_early)
|
||||
if (!found)
|
||||
return 0;
|
||||
|
||||
pci_root_num = 0;
|
||||
@ -196,7 +145,7 @@ static int __init early_fill_mp_bus_info(void)
|
||||
def_link = (reg >> 8) & 0x03;
|
||||
|
||||
memset(range, 0, sizeof(range));
|
||||
range[0].end = 0xffff;
|
||||
add_range(range, RANGE_NUM, 0, 0, 0xffff + 1);
|
||||
/* io port resource */
|
||||
for (i = 0; i < 4; i++) {
|
||||
reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3));
|
||||
@ -220,13 +169,13 @@ static int __init early_fill_mp_bus_info(void)
|
||||
|
||||
info = &pci_root_info[j];
|
||||
printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n",
|
||||
node, link, (u64)start, (u64)end);
|
||||
node, link, start, end);
|
||||
|
||||
/* kernel only handle 16 bit only */
|
||||
if (end > 0xffff)
|
||||
end = 0xffff;
|
||||
update_res(info, start, end, IORESOURCE_IO, 1);
|
||||
update_range(range, start, end);
|
||||
subtract_range(range, RANGE_NUM, start, end + 1);
|
||||
}
|
||||
/* add left over io port range to def node/link, [0, 0xffff] */
|
||||
/* find the position */
|
||||
@ -241,29 +190,32 @@ static int __init early_fill_mp_bus_info(void)
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
|
||||
update_res(info, range[i].start, range[i].end,
|
||||
update_res(info, range[i].start, range[i].end - 1,
|
||||
IORESOURCE_IO, 1);
|
||||
}
|
||||
}
|
||||
|
||||
memset(range, 0, sizeof(range));
|
||||
/* 0xfd00000000-0xffffffffff for HT */
|
||||
range[0].end = (0xfdULL<<32) - 1;
|
||||
end = cap_resource((0xfdULL<<32) - 1);
|
||||
end++;
|
||||
add_range(range, RANGE_NUM, 0, 0, end);
|
||||
|
||||
/* need to take out [0, TOM) for RAM*/
|
||||
address = MSR_K8_TOP_MEM1;
|
||||
rdmsrl(address, val);
|
||||
end = (val & 0xffffff800000ULL);
|
||||
printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20);
|
||||
printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20);
|
||||
if (end < (1ULL<<32))
|
||||
update_range(range, 0, end - 1);
|
||||
subtract_range(range, RANGE_NUM, 0, end);
|
||||
|
||||
/* get mmconfig */
|
||||
get_pci_mmcfg_amd_fam10h_range();
|
||||
/* need to take out mmconf range */
|
||||
if (fam10h_mmconf_end) {
|
||||
printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end);
|
||||
update_range(range, fam10h_mmconf_start, fam10h_mmconf_end);
|
||||
subtract_range(range, RANGE_NUM, fam10h_mmconf_start,
|
||||
fam10h_mmconf_end + 1);
|
||||
}
|
||||
|
||||
/* mmio resource */
|
||||
@ -293,7 +245,7 @@ static int __init early_fill_mp_bus_info(void)
|
||||
info = &pci_root_info[j];
|
||||
|
||||
printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]",
|
||||
node, link, (u64)start, (u64)end);
|
||||
node, link, start, end);
|
||||
/*
|
||||
* some sick allocation would have range overlap with fam10h
|
||||
* mmconf range, so need to update start and end.
|
||||
@ -318,14 +270,15 @@ static int __init early_fill_mp_bus_info(void)
|
||||
/* we got a hole */
|
||||
endx = fam10h_mmconf_start - 1;
|
||||
update_res(info, start, endx, IORESOURCE_MEM, 0);
|
||||
update_range(range, start, endx);
|
||||
printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx);
|
||||
subtract_range(range, RANGE_NUM, start,
|
||||
endx + 1);
|
||||
printk(KERN_CONT " ==> [%llx, %llx]", start, endx);
|
||||
start = fam10h_mmconf_end + 1;
|
||||
changed = 1;
|
||||
}
|
||||
if (changed) {
|
||||
if (start <= end) {
|
||||
printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end);
|
||||
printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end);
|
||||
} else {
|
||||
printk(KERN_CONT "%s\n", endx?"":" ==> none");
|
||||
continue;
|
||||
@ -333,8 +286,9 @@ static int __init early_fill_mp_bus_info(void)
|
||||
}
|
||||
}
|
||||
|
||||
update_res(info, start, end, IORESOURCE_MEM, 1);
|
||||
update_range(range, start, end);
|
||||
update_res(info, cap_resource(start), cap_resource(end),
|
||||
IORESOURCE_MEM, 1);
|
||||
subtract_range(range, RANGE_NUM, start, end + 1);
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
@ -348,8 +302,8 @@ static int __init early_fill_mp_bus_info(void)
|
||||
address = MSR_K8_TOP_MEM2;
|
||||
rdmsrl(address, val);
|
||||
end = (val & 0xffffff800000ULL);
|
||||
printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20);
|
||||
update_range(range, 1ULL<<32, end - 1);
|
||||
printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20);
|
||||
subtract_range(range, RANGE_NUM, 1ULL<<32, end);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -368,7 +322,8 @@ static int __init early_fill_mp_bus_info(void)
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
|
||||
update_res(info, range[i].start, range[i].end,
|
||||
update_res(info, cap_resource(range[i].start),
|
||||
cap_resource(range[i].end - 1),
|
||||
IORESOURCE_MEM, 1);
|
||||
}
|
||||
}
|
||||
@ -384,24 +339,14 @@ static int __init early_fill_mp_bus_info(void)
|
||||
info->bus_min, info->bus_max, info->node, info->link);
|
||||
for (j = 0; j < res_num; j++) {
|
||||
res = &info->res[j];
|
||||
printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n",
|
||||
busnum, j,
|
||||
(res->flags & IORESOURCE_IO)?"io port":"mmio",
|
||||
res->start, res->end);
|
||||
printk(KERN_DEBUG "bus: %02x index %x %pR\n",
|
||||
busnum, j, res);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else /* !CONFIG_X86_64 */
|
||||
|
||||
static int __init early_fill_mp_bus_info(void) { return 0; }
|
||||
|
||||
#endif /* !CONFIG_X86_64 */
|
||||
|
||||
/* common 32/64 bit code */
|
||||
|
||||
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
|
||||
|
||||
static void enable_pci_io_ecs(void *unused)
|
||||
|
@ -1,11 +1,11 @@
|
||||
#include <linux/init.h>
|
||||
#include <linux/pci.h>
|
||||
#include <linux/range.h>
|
||||
|
||||
#include "bus_numa.h"
|
||||
|
||||
int pci_root_num;
|
||||
struct pci_root_info pci_root_info[PCI_ROOT_NR];
|
||||
int found_all_numa_early;
|
||||
|
||||
void x86_pci_root_bus_res_quirks(struct pci_bus *b)
|
||||
{
|
||||
@ -21,10 +21,6 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
|
||||
if (!pci_root_num)
|
||||
return;
|
||||
|
||||
/* for amd, if only one root bus, don't need to do anything */
|
||||
if (pci_root_num < 2 && found_all_numa_early)
|
||||
return;
|
||||
|
||||
for (i = 0; i < pci_root_num; i++) {
|
||||
if (pci_root_info[i].bus_min == b->number)
|
||||
break;
|
||||
@ -52,8 +48,8 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b)
|
||||
}
|
||||
}
|
||||
|
||||
void __devinit update_res(struct pci_root_info *info, size_t start,
|
||||
size_t end, unsigned long flags, int merge)
|
||||
void __devinit update_res(struct pci_root_info *info, resource_size_t start,
|
||||
resource_size_t end, unsigned long flags, int merge)
|
||||
{
|
||||
int i;
|
||||
struct resource *res;
|
||||
@ -61,25 +57,28 @@ void __devinit update_res(struct pci_root_info *info, size_t start,
|
||||
if (start > end)
|
||||
return;
|
||||
|
||||
if (start == MAX_RESOURCE)
|
||||
return;
|
||||
|
||||
if (!merge)
|
||||
goto addit;
|
||||
|
||||
/* try to merge it with old one */
|
||||
for (i = 0; i < info->res_num; i++) {
|
||||
size_t final_start, final_end;
|
||||
size_t common_start, common_end;
|
||||
resource_size_t final_start, final_end;
|
||||
resource_size_t common_start, common_end;
|
||||
|
||||
res = &info->res[i];
|
||||
if (res->flags != flags)
|
||||
continue;
|
||||
|
||||
common_start = max((size_t)res->start, start);
|
||||
common_end = min((size_t)res->end, end);
|
||||
common_start = max(res->start, start);
|
||||
common_end = min(res->end, end);
|
||||
if (common_start > common_end + 1)
|
||||
continue;
|
||||
|
||||
final_start = min((size_t)res->start, start);
|
||||
final_end = max((size_t)res->end, end);
|
||||
final_start = min(res->start, start);
|
||||
final_end = max(res->end, end);
|
||||
|
||||
res->start = final_start;
|
||||
res->end = final_end;
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifdef CONFIG_X86_64
|
||||
|
||||
#ifndef __BUS_NUMA_H
|
||||
#define __BUS_NUMA_H
|
||||
/*
|
||||
* sub bus (transparent) will use entres from 3 to store extra from
|
||||
* root, so need to make sure we have enough slot there.
|
||||
@ -19,8 +19,7 @@ struct pci_root_info {
|
||||
#define PCI_ROOT_NR 4
|
||||
extern int pci_root_num;
|
||||
extern struct pci_root_info pci_root_info[PCI_ROOT_NR];
|
||||
extern int found_all_numa_early;
|
||||
|
||||
extern void update_res(struct pci_root_info *info, size_t start,
|
||||
size_t end, unsigned long flags, int merge);
|
||||
extern void update_res(struct pci_root_info *info, resource_size_t start,
|
||||
resource_size_t end, unsigned long flags, int merge);
|
||||
#endif
|
||||
|
@ -255,10 +255,6 @@ void __init pcibios_resource_survey(void)
|
||||
*/
|
||||
fs_initcall(pcibios_assign_resources);
|
||||
|
||||
void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* If we set up a device for bus mastering, we need to check the latency
|
||||
* timer as certain crappy BIOSes forget to set it properly.
|
||||
|
@ -40,7 +40,7 @@ static void update_resources (struct bus_node *bus_cur, int type, int rangeno);
|
||||
static int once_over (void);
|
||||
static int remove_ranges (struct bus_node *, struct bus_node *);
|
||||
static int update_bridge_ranges (struct bus_node **);
|
||||
static int add_range (int type, struct range_node *, struct bus_node *);
|
||||
static int add_bus_range (int type, struct range_node *, struct bus_node *);
|
||||
static void fix_resources (struct bus_node *);
|
||||
static struct bus_node *find_bus_wprev (u8, struct bus_node **, u8);
|
||||
|
||||
@ -133,7 +133,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node
|
||||
newrange->rangeno = 1;
|
||||
else {
|
||||
/* need to insert our range */
|
||||
add_range (flag, newrange, newbus);
|
||||
add_bus_range (flag, newrange, newbus);
|
||||
debug ("%d resource Primary Bus inserted on bus %x [%x - %x]\n", flag, newbus->busno, newrange->start, newrange->end);
|
||||
}
|
||||
|
||||
@ -384,7 +384,7 @@ int __init ibmphp_rsrc_init (void)
|
||||
* Input: type of the resource, range to add, current bus
|
||||
* Output: 0 or -1, bus and range ptrs
|
||||
********************************************************************************/
|
||||
static int add_range (int type, struct range_node *range, struct bus_node *bus_cur)
|
||||
static int add_bus_range (int type, struct range_node *range, struct bus_node *bus_cur)
|
||||
{
|
||||
struct range_node *range_cur = NULL;
|
||||
struct range_node *range_prev;
|
||||
@ -455,7 +455,7 @@ static int add_range (int type, struct range_node *range, struct bus_node *bus_c
|
||||
|
||||
/*******************************************************************************
|
||||
* This routine goes through the list of resources of type 'type' and updates
|
||||
* the range numbers that they correspond to. It was called from add_range fnc
|
||||
* the range numbers that they correspond to. It was called from add_bus_range fnc
|
||||
*
|
||||
* Input: bus, type of the resource, the rangeno starting from which to update
|
||||
******************************************************************************/
|
||||
@ -1999,7 +1999,7 @@ static int __init update_bridge_ranges (struct bus_node **bus)
|
||||
|
||||
if (bus_sec->noIORanges > 0) {
|
||||
if (!range_exists_already (range, bus_sec, IO)) {
|
||||
add_range (IO, range, bus_sec);
|
||||
add_bus_range (IO, range, bus_sec);
|
||||
++bus_sec->noIORanges;
|
||||
} else {
|
||||
kfree (range);
|
||||
@ -2048,7 +2048,7 @@ static int __init update_bridge_ranges (struct bus_node **bus)
|
||||
|
||||
if (bus_sec->noMemRanges > 0) {
|
||||
if (!range_exists_already (range, bus_sec, MEM)) {
|
||||
add_range (MEM, range, bus_sec);
|
||||
add_bus_range (MEM, range, bus_sec);
|
||||
++bus_sec->noMemRanges;
|
||||
} else {
|
||||
kfree (range);
|
||||
@ -2102,7 +2102,7 @@ static int __init update_bridge_ranges (struct bus_node **bus)
|
||||
|
||||
if (bus_sec->noPFMemRanges > 0) {
|
||||
if (!range_exists_already (range, bus_sec, PFMEM)) {
|
||||
add_range (PFMEM, range, bus_sec);
|
||||
add_bus_range (PFMEM, range, bus_sec);
|
||||
++bus_sec->noPFMemRanges;
|
||||
} else {
|
||||
kfree (range);
|
||||
|
@ -23,6 +23,7 @@ extern unsigned long max_pfn;
|
||||
extern unsigned long saved_max_pfn;
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
/*
|
||||
* node_bootmem_map is a map pointer - the bits represent all physical
|
||||
* memory pages (including holes) on the node.
|
||||
@ -37,6 +38,7 @@ typedef struct bootmem_data {
|
||||
} bootmem_data_t;
|
||||
|
||||
extern bootmem_data_t bootmem_node_data[];
|
||||
#endif
|
||||
|
||||
extern unsigned long bootmem_bootmap_pages(unsigned long);
|
||||
|
||||
@ -46,6 +48,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
|
||||
unsigned long endpfn);
|
||||
extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
|
||||
|
||||
unsigned long free_all_memory_core_early(int nodeid);
|
||||
extern unsigned long free_all_bootmem_node(pg_data_t *pgdat);
|
||||
extern unsigned long free_all_bootmem(void);
|
||||
|
||||
@ -84,6 +87,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat,
|
||||
unsigned long size,
|
||||
unsigned long align,
|
||||
unsigned long goal);
|
||||
void *__alloc_bootmem_node_high(pg_data_t *pgdat,
|
||||
unsigned long size,
|
||||
unsigned long align,
|
||||
unsigned long goal);
|
||||
extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
|
||||
unsigned long size,
|
||||
unsigned long align,
|
||||
|
23
include/linux/early_res.h
Normal file
23
include/linux/early_res.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef _LINUX_EARLY_RES_H
|
||||
#define _LINUX_EARLY_RES_H
|
||||
#ifdef __KERNEL__
|
||||
|
||||
extern void reserve_early(u64 start, u64 end, char *name);
|
||||
extern void reserve_early_overlap_ok(u64 start, u64 end, char *name);
|
||||
extern void free_early(u64 start, u64 end);
|
||||
void free_early_partial(u64 start, u64 end);
|
||||
extern void early_res_to_bootmem(u64 start, u64 end);
|
||||
|
||||
void reserve_early_without_check(u64 start, u64 end, char *name);
|
||||
u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
|
||||
u64 size, u64 align);
|
||||
u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
|
||||
u64 *sizep, u64 align);
|
||||
u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align);
|
||||
u64 get_max_mapped(void);
|
||||
#include <linux/range.h>
|
||||
int get_free_all_memory_range(struct range **rangep, int nodeid);
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _LINUX_EARLY_RES_H */
|
@ -44,6 +44,16 @@ extern const char linux_proc_banner[];
|
||||
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
|
||||
|
||||
/*
|
||||
* This looks more complex than it should be. But we need to
|
||||
* get the type for the ~ right in round_down (it needs to be
|
||||
* as wide as the result!), and we want to evaluate the macro
|
||||
* arguments just once each.
|
||||
*/
|
||||
#define __round_mask(x, y) ((__typeof__(x))((y)-1))
|
||||
#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
|
||||
#define round_down(x, y) ((x) & ~__round_mask(x, y))
|
||||
|
||||
#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f))
|
||||
#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
|
||||
#define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y))
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <linux/prio_tree.h>
|
||||
#include <linux/debug_locks.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/range.h>
|
||||
|
||||
struct mempolicy;
|
||||
struct anon_vma;
|
||||
@ -1049,6 +1050,10 @@ extern void get_pfn_range_for_nid(unsigned int nid,
|
||||
extern unsigned long find_min_pfn_with_active_regions(void);
|
||||
extern void free_bootmem_with_active_regions(int nid,
|
||||
unsigned long max_low_pfn);
|
||||
int add_from_early_node_map(struct range *range, int az,
|
||||
int nr_range, int nid);
|
||||
void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
|
||||
u64 goal, u64 limit);
|
||||
typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
|
||||
extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
|
||||
extern void sparse_memory_present_with_active_regions(int nid);
|
||||
@ -1317,12 +1322,19 @@ extern int randomize_va_space;
|
||||
const char * arch_vma_name(struct vm_area_struct *vma);
|
||||
void print_vma_addr(char *prefix, unsigned long rip);
|
||||
|
||||
void sparse_mem_maps_populate_node(struct page **map_map,
|
||||
unsigned long pnum_begin,
|
||||
unsigned long pnum_end,
|
||||
unsigned long map_count,
|
||||
int nodeid);
|
||||
|
||||
struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
|
||||
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
|
||||
pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
|
||||
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
|
||||
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
|
||||
void *vmemmap_alloc_block(unsigned long size, int node);
|
||||
void *vmemmap_alloc_block_buf(unsigned long size, int node);
|
||||
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
|
||||
int vmemmap_populate_basepages(struct page *start_page,
|
||||
unsigned long pages, int node);
|
||||
|
@ -612,7 +612,9 @@ typedef struct pglist_data {
|
||||
struct page_cgroup *node_page_cgroup;
|
||||
#endif
|
||||
#endif
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
struct bootmem_data *bdata;
|
||||
#endif
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Must be held any time you expect node_start_pfn, node_present_pages
|
||||
|
30
include/linux/range.h
Normal file
30
include/linux/range.h
Normal file
@ -0,0 +1,30 @@
|
||||
#ifndef _LINUX_RANGE_H
|
||||
#define _LINUX_RANGE_H
|
||||
|
||||
struct range {
|
||||
u64 start;
|
||||
u64 end;
|
||||
};
|
||||
|
||||
int add_range(struct range *range, int az, int nr_range,
|
||||
u64 start, u64 end);
|
||||
|
||||
|
||||
int add_range_with_merge(struct range *range, int az, int nr_range,
|
||||
u64 start, u64 end);
|
||||
|
||||
void subtract_range(struct range *range, int az, u64 start, u64 end);
|
||||
|
||||
int clean_sort_range(struct range *range, int az);
|
||||
|
||||
void sort_range(struct range *range, int nr_range);
|
||||
|
||||
#define MAX_RESOURCE ((resource_size_t)~0)
|
||||
static inline resource_size_t cap_resource(u64 val)
|
||||
{
|
||||
if (val > MAX_RESOURCE)
|
||||
return MAX_RESOURCE;
|
||||
|
||||
return val;
|
||||
}
|
||||
#endif
|
@ -10,7 +10,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \
|
||||
kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
|
||||
hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
|
||||
notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \
|
||||
async.o
|
||||
async.o range.o
|
||||
obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o
|
||||
obj-y += groups.o
|
||||
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
|
578
kernel/early_res.c
Normal file
578
kernel/early_res.c
Normal file
@ -0,0 +1,578 @@
|
||||
/*
|
||||
* early_res, could be used to replace bootmem
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/early_res.h>
|
||||
|
||||
/*
|
||||
* Early reserved memory areas.
|
||||
*/
|
||||
/*
|
||||
* need to make sure this one is bigger enough before
|
||||
* find_fw_memmap_area could be used
|
||||
*/
|
||||
#define MAX_EARLY_RES_X 32
|
||||
|
||||
struct early_res {
|
||||
u64 start, end;
|
||||
char name[15];
|
||||
char overlap_ok;
|
||||
};
|
||||
static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata;
|
||||
|
||||
static int max_early_res __initdata = MAX_EARLY_RES_X;
|
||||
static struct early_res *early_res __initdata = &early_res_x[0];
|
||||
static int early_res_count __initdata;
|
||||
|
||||
static int __init find_overlapped_early(u64 start, u64 end)
|
||||
{
|
||||
int i;
|
||||
struct early_res *r;
|
||||
|
||||
for (i = 0; i < max_early_res && early_res[i].end; i++) {
|
||||
r = &early_res[i];
|
||||
if (end > r->start && start < r->end)
|
||||
break;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the i-th range from the early reservation map,
|
||||
* by copying any higher ranges down one over it, and
|
||||
* clearing what had been the last slot.
|
||||
*/
|
||||
static void __init drop_range(int i)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j = i + 1; j < max_early_res && early_res[j].end; j++)
|
||||
;
|
||||
|
||||
memmove(&early_res[i], &early_res[i + 1],
|
||||
(j - 1 - i) * sizeof(struct early_res));
|
||||
|
||||
early_res[j - 1].end = 0;
|
||||
early_res_count--;
|
||||
}
|
||||
|
||||
static void __init drop_range_partial(int i, u64 start, u64 end)
|
||||
{
|
||||
u64 common_start, common_end;
|
||||
u64 old_start, old_end;
|
||||
|
||||
old_start = early_res[i].start;
|
||||
old_end = early_res[i].end;
|
||||
common_start = max(old_start, start);
|
||||
common_end = min(old_end, end);
|
||||
|
||||
/* no overlap ? */
|
||||
if (common_start >= common_end)
|
||||
return;
|
||||
|
||||
if (old_start < common_start) {
|
||||
/* make head segment */
|
||||
early_res[i].end = common_start;
|
||||
if (old_end > common_end) {
|
||||
char name[15];
|
||||
|
||||
/*
|
||||
* Save a local copy of the name, since the
|
||||
* early_res array could get resized inside
|
||||
* reserve_early_without_check() ->
|
||||
* __check_and_double_early_res(), which would
|
||||
* make the current name pointer invalid.
|
||||
*/
|
||||
strncpy(name, early_res[i].name,
|
||||
sizeof(early_res[i].name) - 1);
|
||||
/* add another for left over on tail */
|
||||
reserve_early_without_check(common_end, old_end, name);
|
||||
}
|
||||
return;
|
||||
} else {
|
||||
if (old_end > common_end) {
|
||||
/* reuse the entry for tail left */
|
||||
early_res[i].start = common_end;
|
||||
return;
|
||||
}
|
||||
/* all covered */
|
||||
drop_range(i);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Split any existing ranges that:
|
||||
* 1) are marked 'overlap_ok', and
|
||||
* 2) overlap with the stated range [start, end)
|
||||
* into whatever portion (if any) of the existing range is entirely
|
||||
* below or entirely above the stated range. Drop the portion
|
||||
* of the existing range that overlaps with the stated range,
|
||||
* which will allow the caller of this routine to then add that
|
||||
* stated range without conflicting with any existing range.
|
||||
*/
|
||||
static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
|
||||
{
|
||||
int i;
|
||||
struct early_res *r;
|
||||
u64 lower_start, lower_end;
|
||||
u64 upper_start, upper_end;
|
||||
char name[15];
|
||||
|
||||
for (i = 0; i < max_early_res && early_res[i].end; i++) {
|
||||
r = &early_res[i];
|
||||
|
||||
/* Continue past non-overlapping ranges */
|
||||
if (end <= r->start || start >= r->end)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Leave non-ok overlaps as is; let caller
|
||||
* panic "Overlapping early reservations"
|
||||
* when it hits this overlap.
|
||||
*/
|
||||
if (!r->overlap_ok)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We have an ok overlap. We will drop it from the early
|
||||
* reservation map, and add back in any non-overlapping
|
||||
* portions (lower or upper) as separate, overlap_ok,
|
||||
* non-overlapping ranges.
|
||||
*/
|
||||
|
||||
/* 1. Note any non-overlapping (lower or upper) ranges. */
|
||||
strncpy(name, r->name, sizeof(name) - 1);
|
||||
|
||||
lower_start = lower_end = 0;
|
||||
upper_start = upper_end = 0;
|
||||
if (r->start < start) {
|
||||
lower_start = r->start;
|
||||
lower_end = start;
|
||||
}
|
||||
if (r->end > end) {
|
||||
upper_start = end;
|
||||
upper_end = r->end;
|
||||
}
|
||||
|
||||
/* 2. Drop the original ok overlapping range */
|
||||
drop_range(i);
|
||||
|
||||
i--; /* resume for-loop on copied down entry */
|
||||
|
||||
/* 3. Add back in any non-overlapping ranges. */
|
||||
if (lower_end)
|
||||
reserve_early_overlap_ok(lower_start, lower_end, name);
|
||||
if (upper_end)
|
||||
reserve_early_overlap_ok(upper_start, upper_end, name);
|
||||
}
|
||||
}
|
||||
|
||||
static void __init __reserve_early(u64 start, u64 end, char *name,
|
||||
int overlap_ok)
|
||||
{
|
||||
int i;
|
||||
struct early_res *r;
|
||||
|
||||
i = find_overlapped_early(start, end);
|
||||
if (i >= max_early_res)
|
||||
panic("Too many early reservations");
|
||||
r = &early_res[i];
|
||||
if (r->end)
|
||||
panic("Overlapping early reservations "
|
||||
"%llx-%llx %s to %llx-%llx %s\n",
|
||||
start, end - 1, name ? name : "", r->start,
|
||||
r->end - 1, r->name);
|
||||
r->start = start;
|
||||
r->end = end;
|
||||
r->overlap_ok = overlap_ok;
|
||||
if (name)
|
||||
strncpy(r->name, name, sizeof(r->name) - 1);
|
||||
early_res_count++;
|
||||
}
|
||||
|
||||
/*
|
||||
* A few early reservtations come here.
|
||||
*
|
||||
* The 'overlap_ok' in the name of this routine does -not- mean it
|
||||
* is ok for these reservations to overlap an earlier reservation.
|
||||
* Rather it means that it is ok for subsequent reservations to
|
||||
* overlap this one.
|
||||
*
|
||||
* Use this entry point to reserve early ranges when you are doing
|
||||
* so out of "Paranoia", reserving perhaps more memory than you need,
|
||||
* just in case, and don't mind a subsequent overlapping reservation
|
||||
* that is known to be needed.
|
||||
*
|
||||
* The drop_overlaps_that_are_ok() call here isn't really needed.
|
||||
* It would be needed if we had two colliding 'overlap_ok'
|
||||
* reservations, so that the second such would not panic on the
|
||||
* overlap with the first. We don't have any such as of this
|
||||
* writing, but might as well tolerate such if it happens in
|
||||
* the future.
|
||||
*/
|
||||
void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
|
||||
{
|
||||
drop_overlaps_that_are_ok(start, end);
|
||||
__reserve_early(start, end, name, 1);
|
||||
}
|
||||
|
||||
static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end)
|
||||
{
|
||||
u64 start, end, size, mem;
|
||||
struct early_res *new;
|
||||
|
||||
/* do we have enough slots left ? */
|
||||
if ((max_early_res - early_res_count) > max(max_early_res/8, 2))
|
||||
return;
|
||||
|
||||
/* double it */
|
||||
mem = -1ULL;
|
||||
size = sizeof(struct early_res) * max_early_res * 2;
|
||||
if (early_res == early_res_x)
|
||||
start = 0;
|
||||
else
|
||||
start = early_res[0].end;
|
||||
end = ex_start;
|
||||
if (start + size < end)
|
||||
mem = find_fw_memmap_area(start, end, size,
|
||||
sizeof(struct early_res));
|
||||
if (mem == -1ULL) {
|
||||
start = ex_end;
|
||||
end = get_max_mapped();
|
||||
if (start + size < end)
|
||||
mem = find_fw_memmap_area(start, end, size,
|
||||
sizeof(struct early_res));
|
||||
}
|
||||
if (mem == -1ULL)
|
||||
panic("can not find more space for early_res array");
|
||||
|
||||
new = __va(mem);
|
||||
/* save the first one for own */
|
||||
new[0].start = mem;
|
||||
new[0].end = mem + size;
|
||||
new[0].overlap_ok = 0;
|
||||
/* copy old to new */
|
||||
if (early_res == early_res_x) {
|
||||
memcpy(&new[1], &early_res[0],
|
||||
sizeof(struct early_res) * max_early_res);
|
||||
memset(&new[max_early_res+1], 0,
|
||||
sizeof(struct early_res) * (max_early_res - 1));
|
||||
early_res_count++;
|
||||
} else {
|
||||
memcpy(&new[1], &early_res[1],
|
||||
sizeof(struct early_res) * (max_early_res - 1));
|
||||
memset(&new[max_early_res], 0,
|
||||
sizeof(struct early_res) * max_early_res);
|
||||
}
|
||||
memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
|
||||
early_res = new;
|
||||
max_early_res *= 2;
|
||||
printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n",
|
||||
max_early_res, mem, mem + size - 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Most early reservations come here.
|
||||
*
|
||||
* We first have drop_overlaps_that_are_ok() drop any pre-existing
|
||||
* 'overlap_ok' ranges, so that we can then reserve this memory
|
||||
* range without risk of panic'ing on an overlapping overlap_ok
|
||||
* early reservation.
|
||||
*/
|
||||
void __init reserve_early(u64 start, u64 end, char *name)
|
||||
{
|
||||
if (start >= end)
|
||||
return;
|
||||
|
||||
__check_and_double_early_res(start, end);
|
||||
|
||||
drop_overlaps_that_are_ok(start, end);
|
||||
__reserve_early(start, end, name, 0);
|
||||
}
|
||||
|
||||
void __init reserve_early_without_check(u64 start, u64 end, char *name)
|
||||
{
|
||||
struct early_res *r;
|
||||
|
||||
if (start >= end)
|
||||
return;
|
||||
|
||||
__check_and_double_early_res(start, end);
|
||||
|
||||
r = &early_res[early_res_count];
|
||||
|
||||
r->start = start;
|
||||
r->end = end;
|
||||
r->overlap_ok = 0;
|
||||
if (name)
|
||||
strncpy(r->name, name, sizeof(r->name) - 1);
|
||||
early_res_count++;
|
||||
}
|
||||
|
||||
void __init free_early(u64 start, u64 end)
|
||||
{
|
||||
struct early_res *r;
|
||||
int i;
|
||||
|
||||
i = find_overlapped_early(start, end);
|
||||
r = &early_res[i];
|
||||
if (i >= max_early_res || r->end != end || r->start != start)
|
||||
panic("free_early on not reserved area: %llx-%llx!",
|
||||
start, end - 1);
|
||||
|
||||
drop_range(i);
|
||||
}
|
||||
|
||||
void __init free_early_partial(u64 start, u64 end)
|
||||
{
|
||||
struct early_res *r;
|
||||
int i;
|
||||
|
||||
try_next:
|
||||
i = find_overlapped_early(start, end);
|
||||
if (i >= max_early_res)
|
||||
return;
|
||||
|
||||
r = &early_res[i];
|
||||
/* hole ? */
|
||||
if (r->end >= end && r->start <= start) {
|
||||
drop_range_partial(i, start, end);
|
||||
return;
|
||||
}
|
||||
|
||||
drop_range_partial(i, start, end);
|
||||
goto try_next;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
static void __init subtract_early_res(struct range *range, int az)
|
||||
{
|
||||
int i, count;
|
||||
u64 final_start, final_end;
|
||||
int idx = 0;
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < max_early_res && early_res[i].end; i++)
|
||||
count++;
|
||||
|
||||
/* need to skip first one ?*/
|
||||
if (early_res != early_res_x)
|
||||
idx = 1;
|
||||
|
||||
#define DEBUG_PRINT_EARLY_RES 1
|
||||
|
||||
#if DEBUG_PRINT_EARLY_RES
|
||||
printk(KERN_INFO "Subtract (%d early reservations)\n", count);
|
||||
#endif
|
||||
for (i = idx; i < count; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
#if DEBUG_PRINT_EARLY_RES
|
||||
printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i,
|
||||
r->start, r->end, r->name);
|
||||
#endif
|
||||
final_start = PFN_DOWN(r->start);
|
||||
final_end = PFN_UP(r->end);
|
||||
if (final_start >= final_end)
|
||||
continue;
|
||||
subtract_range(range, az, final_start, final_end);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
int __init get_free_all_memory_range(struct range **rangep, int nodeid)
|
||||
{
|
||||
int i, count;
|
||||
u64 start = 0, end;
|
||||
u64 size;
|
||||
u64 mem;
|
||||
struct range *range;
|
||||
int nr_range;
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < max_early_res && early_res[i].end; i++)
|
||||
count++;
|
||||
|
||||
count *= 2;
|
||||
|
||||
size = sizeof(struct range) * count;
|
||||
end = get_max_mapped();
|
||||
#ifdef MAX_DMA32_PFN
|
||||
if (end > (MAX_DMA32_PFN << PAGE_SHIFT))
|
||||
start = MAX_DMA32_PFN << PAGE_SHIFT;
|
||||
#endif
|
||||
mem = find_fw_memmap_area(start, end, size, sizeof(struct range));
|
||||
if (mem == -1ULL)
|
||||
panic("can not find more space for range free");
|
||||
|
||||
range = __va(mem);
|
||||
/* use early_node_map[] and early_res to get range array at first */
|
||||
memset(range, 0, size);
|
||||
nr_range = 0;
|
||||
|
||||
/* need to go over early_node_map to find out good range for node */
|
||||
nr_range = add_from_early_node_map(range, count, nr_range, nodeid);
|
||||
#ifdef CONFIG_X86_32
|
||||
subtract_range(range, count, max_low_pfn, -1ULL);
|
||||
#endif
|
||||
subtract_early_res(range, count);
|
||||
nr_range = clean_sort_range(range, count);
|
||||
|
||||
/* need to clear it ? */
|
||||
if (nodeid == MAX_NUMNODES) {
|
||||
memset(&early_res[0], 0,
|
||||
sizeof(struct early_res) * max_early_res);
|
||||
early_res = NULL;
|
||||
max_early_res = 0;
|
||||
}
|
||||
|
||||
*rangep = range;
|
||||
return nr_range;
|
||||
}
|
||||
#else
|
||||
void __init early_res_to_bootmem(u64 start, u64 end)
|
||||
{
|
||||
int i, count;
|
||||
u64 final_start, final_end;
|
||||
int idx = 0;
|
||||
|
||||
count = 0;
|
||||
for (i = 0; i < max_early_res && early_res[i].end; i++)
|
||||
count++;
|
||||
|
||||
/* need to skip first one ?*/
|
||||
if (early_res != early_res_x)
|
||||
idx = 1;
|
||||
|
||||
printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n",
|
||||
count - idx, max_early_res, start, end);
|
||||
for (i = idx; i < count; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
|
||||
r->start, r->end, r->name);
|
||||
final_start = max(start, r->start);
|
||||
final_end = min(end, r->end);
|
||||
if (final_start >= final_end) {
|
||||
printk(KERN_CONT "\n");
|
||||
continue;
|
||||
}
|
||||
printk(KERN_CONT " ==> [%010llx - %010llx]\n",
|
||||
final_start, final_end);
|
||||
reserve_bootmem_generic(final_start, final_end - final_start,
|
||||
BOOTMEM_DEFAULT);
|
||||
}
|
||||
/* clear them */
|
||||
memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res);
|
||||
early_res = NULL;
|
||||
max_early_res = 0;
|
||||
early_res_count = 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Check for already reserved areas */
|
||||
static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
|
||||
{
|
||||
int i;
|
||||
u64 addr = *addrp;
|
||||
int changed = 0;
|
||||
struct early_res *r;
|
||||
again:
|
||||
i = find_overlapped_early(addr, addr + size);
|
||||
r = &early_res[i];
|
||||
if (i < max_early_res && r->end) {
|
||||
*addrp = addr = round_up(r->end, align);
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/* Check for already reserved areas */
|
||||
static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
|
||||
{
|
||||
int i;
|
||||
u64 addr = *addrp, last;
|
||||
u64 size = *sizep;
|
||||
int changed = 0;
|
||||
again:
|
||||
last = addr + size;
|
||||
for (i = 0; i < max_early_res && early_res[i].end; i++) {
|
||||
struct early_res *r = &early_res[i];
|
||||
if (last > r->start && addr < r->start) {
|
||||
size = r->start - addr;
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
if (last > r->end && addr < r->end) {
|
||||
addr = round_up(r->end, align);
|
||||
size = last - addr;
|
||||
changed = 1;
|
||||
goto again;
|
||||
}
|
||||
if (last <= r->end && addr >= r->start) {
|
||||
(*sizep)++;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (changed) {
|
||||
*addrp = addr;
|
||||
*sizep = size;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a free area with specified alignment in a specific range.
|
||||
* only with the area.between start to end is active range from early_node_map
|
||||
* so they are good as RAM
|
||||
*/
|
||||
u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end,
|
||||
u64 size, u64 align)
|
||||
{
|
||||
u64 addr, last;
|
||||
|
||||
addr = round_up(ei_start, align);
|
||||
if (addr < start)
|
||||
addr = round_up(start, align);
|
||||
if (addr >= ei_last)
|
||||
goto out;
|
||||
while (bad_addr(&addr, size, align) && addr+size <= ei_last)
|
||||
;
|
||||
last = addr + size;
|
||||
if (last > ei_last)
|
||||
goto out;
|
||||
if (last > end)
|
||||
goto out;
|
||||
|
||||
return addr;
|
||||
|
||||
out:
|
||||
return -1ULL;
|
||||
}
|
||||
|
||||
u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start,
|
||||
u64 *sizep, u64 align)
|
||||
{
|
||||
u64 addr, last;
|
||||
|
||||
addr = round_up(ei_start, align);
|
||||
if (addr < start)
|
||||
addr = round_up(start, align);
|
||||
if (addr >= ei_last)
|
||||
goto out;
|
||||
*sizep = ei_last - addr;
|
||||
while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last)
|
||||
;
|
||||
last = addr + *sizep;
|
||||
if (last > ei_last)
|
||||
goto out;
|
||||
|
||||
return addr;
|
||||
|
||||
out:
|
||||
return -1ULL;
|
||||
}
|
163
kernel/range.c
Normal file
163
kernel/range.c
Normal file
@ -0,0 +1,163 @@
|
||||
/*
|
||||
* Range add and subtract
|
||||
*/
|
||||
#include <linux/module.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include <linux/range.h>
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#endif
|
||||
|
||||
int add_range(struct range *range, int az, int nr_range, u64 start, u64 end)
|
||||
{
|
||||
if (start >= end)
|
||||
return nr_range;
|
||||
|
||||
/* Out of slots: */
|
||||
if (nr_range >= az)
|
||||
return nr_range;
|
||||
|
||||
range[nr_range].start = start;
|
||||
range[nr_range].end = end;
|
||||
|
||||
nr_range++;
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
int add_range_with_merge(struct range *range, int az, int nr_range,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (start >= end)
|
||||
return nr_range;
|
||||
|
||||
/* Try to merge it with old one: */
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
u64 final_start, final_end;
|
||||
u64 common_start, common_end;
|
||||
|
||||
if (!range[i].end)
|
||||
continue;
|
||||
|
||||
common_start = max(range[i].start, start);
|
||||
common_end = min(range[i].end, end);
|
||||
if (common_start > common_end)
|
||||
continue;
|
||||
|
||||
final_start = min(range[i].start, start);
|
||||
final_end = max(range[i].end, end);
|
||||
|
||||
range[i].start = final_start;
|
||||
range[i].end = final_end;
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
/* Need to add it: */
|
||||
return add_range(range, az, nr_range, start, end);
|
||||
}
|
||||
|
||||
void subtract_range(struct range *range, int az, u64 start, u64 end)
|
||||
{
|
||||
int i, j;
|
||||
|
||||
if (start >= end)
|
||||
return;
|
||||
|
||||
for (j = 0; j < az; j++) {
|
||||
if (!range[j].end)
|
||||
continue;
|
||||
|
||||
if (start <= range[j].start && end >= range[j].end) {
|
||||
range[j].start = 0;
|
||||
range[j].end = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start <= range[j].start && end < range[j].end &&
|
||||
range[j].start < end) {
|
||||
range[j].start = end;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (start > range[j].start && end >= range[j].end &&
|
||||
range[j].end > start) {
|
||||
range[j].end = start;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (start > range[j].start && end < range[j].end) {
|
||||
/* Find the new spare: */
|
||||
for (i = 0; i < az; i++) {
|
||||
if (range[i].end == 0)
|
||||
break;
|
||||
}
|
||||
if (i < az) {
|
||||
range[i].end = range[j].end;
|
||||
range[i].start = end;
|
||||
} else {
|
||||
printk(KERN_ERR "run of slot in ranges\n");
|
||||
}
|
||||
range[j].end = start;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int cmp_range(const void *x1, const void *x2)
|
||||
{
|
||||
const struct range *r1 = x1;
|
||||
const struct range *r2 = x2;
|
||||
s64 start1, start2;
|
||||
|
||||
start1 = r1->start;
|
||||
start2 = r2->start;
|
||||
|
||||
return start1 - start2;
|
||||
}
|
||||
|
||||
int clean_sort_range(struct range *range, int az)
|
||||
{
|
||||
int i, j, k = az - 1, nr_range = 0;
|
||||
|
||||
for (i = 0; i < k; i++) {
|
||||
if (range[i].end)
|
||||
continue;
|
||||
for (j = k; j > i; j--) {
|
||||
if (range[j].end) {
|
||||
k = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == i)
|
||||
break;
|
||||
range[i].start = range[k].start;
|
||||
range[i].end = range[k].end;
|
||||
range[k].start = 0;
|
||||
range[k].end = 0;
|
||||
k--;
|
||||
}
|
||||
/* count it */
|
||||
for (i = 0; i < az; i++) {
|
||||
if (!range[i].end) {
|
||||
nr_range = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort them */
|
||||
sort(range, nr_range, sizeof(struct range), cmp_range, NULL);
|
||||
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
void sort_range(struct range *range, int nr_range)
|
||||
{
|
||||
/* sort them */
|
||||
sort(range, nr_range, sizeof(struct range), cmp_range, NULL);
|
||||
}
|
@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME
|
||||
config SPARSEMEM_VMEMMAP_ENABLE
|
||||
bool
|
||||
|
||||
config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
|
||||
def_bool y
|
||||
depends on SPARSEMEM && X86_64
|
||||
|
||||
config SPARSEMEM_VMEMMAP
|
||||
bool "Sparse Memory virtual memmap"
|
||||
depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE
|
||||
|
195
mm/bootmem.c
195
mm/bootmem.c
@ -13,6 +13,7 @@
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/kmemleak.h>
|
||||
#include <linux/range.h>
|
||||
|
||||
#include <asm/bug.h>
|
||||
#include <asm/io.h>
|
||||
@ -32,6 +33,7 @@ unsigned long max_pfn;
|
||||
unsigned long saved_max_pfn;
|
||||
#endif
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata;
|
||||
|
||||
static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list);
|
||||
@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
|
||||
min_low_pfn = start;
|
||||
return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
|
||||
}
|
||||
|
||||
#endif
|
||||
/*
|
||||
* free_bootmem_late - free bootmem pages directly to page allocator
|
||||
* @addr: starting address of the range
|
||||
@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
static void __init __free_pages_memory(unsigned long start, unsigned long end)
|
||||
{
|
||||
int i;
|
||||
unsigned long start_aligned, end_aligned;
|
||||
int order = ilog2(BITS_PER_LONG);
|
||||
|
||||
start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1);
|
||||
end_aligned = end & ~(BITS_PER_LONG - 1);
|
||||
|
||||
if (end_aligned <= start_aligned) {
|
||||
#if 1
|
||||
printk(KERN_DEBUG " %lx - %lx\n", start, end);
|
||||
#endif
|
||||
for (i = start; i < end; i++)
|
||||
__free_pages_bootmem(pfn_to_page(i), 0);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
#if 1
|
||||
printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
|
||||
start, start_aligned, end_aligned, end);
|
||||
#endif
|
||||
for (i = start; i < start_aligned; i++)
|
||||
__free_pages_bootmem(pfn_to_page(i), 0);
|
||||
|
||||
for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG)
|
||||
__free_pages_bootmem(pfn_to_page(i), order);
|
||||
|
||||
for (i = end_aligned; i < end; i++)
|
||||
__free_pages_bootmem(pfn_to_page(i), 0);
|
||||
}
|
||||
|
||||
unsigned long __init free_all_memory_core_early(int nodeid)
|
||||
{
|
||||
int i;
|
||||
u64 start, end;
|
||||
unsigned long count = 0;
|
||||
struct range *range = NULL;
|
||||
int nr_range;
|
||||
|
||||
nr_range = get_free_all_memory_range(&range, nodeid);
|
||||
|
||||
for (i = 0; i < nr_range; i++) {
|
||||
start = range[i].start;
|
||||
end = range[i].end;
|
||||
count += end - start;
|
||||
__free_pages_memory(start, end);
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
#else
|
||||
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
|
||||
{
|
||||
int aligned;
|
||||
@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
|
||||
|
||||
return count;
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* free_all_bootmem_node - release a node's free pages to the buddy allocator
|
||||
@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
|
||||
unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
|
||||
{
|
||||
register_page_bootmem_info_node(pgdat);
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
/* free_all_memory_core_early(MAX_NUMNODES) will be called later */
|
||||
return 0;
|
||||
#else
|
||||
return free_all_bootmem_core(pgdat->bdata);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
|
||||
*/
|
||||
unsigned long __init free_all_bootmem(void)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
return free_all_memory_core_early(NODE_DATA(0)->node_id);
|
||||
#else
|
||||
return free_all_bootmem_core(NODE_DATA(0)->bdata);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
static void __init __free(bootmem_data_t *bdata,
|
||||
unsigned long sidx, unsigned long eidx)
|
||||
{
|
||||
@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
|
||||
}
|
||||
BUG();
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* free_bootmem_node - mark a page range as usable
|
||||
@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end,
|
||||
void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
|
||||
unsigned long size)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
free_early(physaddr, physaddr + size);
|
||||
#if 0
|
||||
printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
|
||||
#endif
|
||||
#else
|
||||
unsigned long start, end;
|
||||
|
||||
kmemleak_free_part(__va(physaddr), size);
|
||||
@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
|
||||
end = PFN_DOWN(physaddr + size);
|
||||
|
||||
mark_bootmem_node(pgdat->bdata, start, end, 0, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
|
||||
*/
|
||||
void __init free_bootmem(unsigned long addr, unsigned long size)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
free_early(addr, addr + size);
|
||||
#if 0
|
||||
printk(KERN_DEBUG "free %lx %lx\n", addr, size);
|
||||
#endif
|
||||
#else
|
||||
unsigned long start, end;
|
||||
|
||||
kmemleak_free_part(__va(addr), size);
|
||||
@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
|
||||
end = PFN_DOWN(addr + size);
|
||||
|
||||
mark_bootmem(start, end, 0, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
|
||||
int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
|
||||
unsigned long size, int flags)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
panic("no bootmem");
|
||||
return 0;
|
||||
#else
|
||||
unsigned long start, end;
|
||||
|
||||
start = PFN_DOWN(physaddr);
|
||||
end = PFN_UP(physaddr + size);
|
||||
|
||||
return mark_bootmem_node(pgdat->bdata, start, end, 1, flags);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
|
||||
int __init reserve_bootmem(unsigned long addr, unsigned long size,
|
||||
int flags)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
panic("no bootmem");
|
||||
return 0;
|
||||
#else
|
||||
unsigned long start, end;
|
||||
|
||||
start = PFN_DOWN(addr);
|
||||
end = PFN_UP(addr + size);
|
||||
|
||||
return mark_bootmem(start, end, 1, flags);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
static unsigned long __init align_idx(struct bootmem_data *bdata,
|
||||
unsigned long idx, unsigned long step)
|
||||
{
|
||||
@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata,
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void * __init ___alloc_bootmem_nopanic(unsigned long size,
|
||||
unsigned long align,
|
||||
unsigned long goal,
|
||||
unsigned long limit)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
void *ptr;
|
||||
|
||||
if (WARN_ON_ONCE(slab_is_available()))
|
||||
return kzalloc(size, GFP_NOWAIT);
|
||||
|
||||
restart:
|
||||
|
||||
ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit);
|
||||
|
||||
if (ptr)
|
||||
return ptr;
|
||||
|
||||
if (goal != 0) {
|
||||
goal = 0;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
#else
|
||||
bootmem_data_t *bdata;
|
||||
void *region;
|
||||
|
||||
@ -613,6 +727,7 @@ restart:
|
||||
}
|
||||
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@ -631,7 +746,13 @@ restart:
|
||||
void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align,
|
||||
unsigned long goal)
|
||||
{
|
||||
return ___alloc_bootmem_nopanic(size, align, goal, 0);
|
||||
unsigned long limit = 0;
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
limit = -1UL;
|
||||
#endif
|
||||
|
||||
return ___alloc_bootmem_nopanic(size, align, goal, limit);
|
||||
}
|
||||
|
||||
static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
|
||||
@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align,
|
||||
void * __init __alloc_bootmem(unsigned long size, unsigned long align,
|
||||
unsigned long goal)
|
||||
{
|
||||
return ___alloc_bootmem(size, align, goal, 0);
|
||||
unsigned long limit = 0;
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
limit = -1UL;
|
||||
#endif
|
||||
|
||||
return ___alloc_bootmem(size, align, goal, limit);
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
|
||||
unsigned long size, unsigned long align,
|
||||
unsigned long goal, unsigned long limit)
|
||||
@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata,
|
||||
|
||||
return ___alloc_bootmem(size, align, goal, limit);
|
||||
}
|
||||
#endif
|
||||
|
||||
/**
|
||||
* __alloc_bootmem_node - allocate boot memory from a specific node
|
||||
@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
|
||||
if (WARN_ON_ONCE(slab_is_available()))
|
||||
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
return __alloc_memory_core_early(pgdat->node_id, size, align,
|
||||
goal, -1ULL);
|
||||
#else
|
||||
return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size,
|
||||
unsigned long align, unsigned long goal)
|
||||
{
|
||||
#ifdef MAX_DMA32_PFN
|
||||
unsigned long end_pfn;
|
||||
|
||||
if (WARN_ON_ONCE(slab_is_available()))
|
||||
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
|
||||
|
||||
/* update goal according ...MAX_DMA32_PFN */
|
||||
end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages;
|
||||
|
||||
if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) &&
|
||||
(goal >> PAGE_SHIFT) < MAX_DMA32_PFN) {
|
||||
void *ptr;
|
||||
unsigned long new_goal;
|
||||
|
||||
new_goal = MAX_DMA32_PFN << PAGE_SHIFT;
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
|
||||
new_goal, -1ULL);
|
||||
#else
|
||||
ptr = alloc_bootmem_core(pgdat->bdata, size, align,
|
||||
new_goal, 0);
|
||||
#endif
|
||||
if (ptr)
|
||||
return ptr;
|
||||
}
|
||||
#endif
|
||||
|
||||
return __alloc_bootmem_node(pgdat, size, align, goal);
|
||||
|
||||
}
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM
|
||||
@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size,
|
||||
void * __init alloc_bootmem_section(unsigned long size,
|
||||
unsigned long section_nr)
|
||||
{
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
unsigned long pfn, goal, limit;
|
||||
|
||||
pfn = section_nr_to_pfn(section_nr);
|
||||
goal = pfn << PAGE_SHIFT;
|
||||
limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT;
|
||||
|
||||
return __alloc_memory_core_early(early_pfn_to_nid(pfn), size,
|
||||
SMP_CACHE_BYTES, goal, limit);
|
||||
#else
|
||||
bootmem_data_t *bdata;
|
||||
unsigned long pfn, goal, limit;
|
||||
|
||||
@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size,
|
||||
bdata = &bootmem_node_data[early_pfn_to_nid(pfn)];
|
||||
|
||||
return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size,
|
||||
if (WARN_ON_ONCE(slab_is_available()))
|
||||
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
ptr = __alloc_memory_core_early(pgdat->node_id, size, align,
|
||||
goal, -1ULL);
|
||||
#else
|
||||
ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0);
|
||||
if (ptr)
|
||||
return ptr;
|
||||
|
||||
ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
|
||||
#endif
|
||||
if (ptr)
|
||||
return ptr;
|
||||
|
||||
@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
|
||||
if (WARN_ON_ONCE(slab_is_available()))
|
||||
return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id);
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
return __alloc_memory_core_early(pgdat->node_id, size, align,
|
||||
goal, ARCH_LOW_ADDRESS_LIMIT);
|
||||
#else
|
||||
return ___alloc_bootmem_node(pgdat->bdata, size, align,
|
||||
goal, ARCH_LOW_ADDRESS_LIMIT);
|
||||
#endif
|
||||
}
|
||||
|
@ -3374,6 +3374,61 @@ void __init free_bootmem_with_active_regions(int nid,
|
||||
}
|
||||
}
|
||||
|
||||
int __init add_from_early_node_map(struct range *range, int az,
|
||||
int nr_range, int nid)
|
||||
{
|
||||
int i;
|
||||
u64 start, end;
|
||||
|
||||
/* need to go over early_node_map to find out good range for node */
|
||||
for_each_active_range_index_in_nid(i, nid) {
|
||||
start = early_node_map[i].start_pfn;
|
||||
end = early_node_map[i].end_pfn;
|
||||
nr_range = add_range(range, az, nr_range, start, end);
|
||||
}
|
||||
return nr_range;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
void * __init __alloc_memory_core_early(int nid, u64 size, u64 align,
|
||||
u64 goal, u64 limit)
|
||||
{
|
||||
int i;
|
||||
void *ptr;
|
||||
|
||||
/* need to go over early_node_map to find out good range for node */
|
||||
for_each_active_range_index_in_nid(i, nid) {
|
||||
u64 addr;
|
||||
u64 ei_start, ei_last;
|
||||
|
||||
ei_last = early_node_map[i].end_pfn;
|
||||
ei_last <<= PAGE_SHIFT;
|
||||
ei_start = early_node_map[i].start_pfn;
|
||||
ei_start <<= PAGE_SHIFT;
|
||||
addr = find_early_area(ei_start, ei_last,
|
||||
goal, limit, size, align);
|
||||
|
||||
if (addr == -1ULL)
|
||||
continue;
|
||||
|
||||
#if 0
|
||||
printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n",
|
||||
nid,
|
||||
ei_start, ei_last, goal, limit, size,
|
||||
align, addr);
|
||||
#endif
|
||||
|
||||
ptr = phys_to_virt(addr);
|
||||
memset(ptr, 0, size);
|
||||
reserve_early_without_check(addr, addr + size, "BOOTMEM");
|
||||
return ptr;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data)
|
||||
{
|
||||
int i;
|
||||
@ -4406,7 +4461,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve)
|
||||
}
|
||||
|
||||
#ifndef CONFIG_NEED_MULTIPLE_NODES
|
||||
struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] };
|
||||
struct pglist_data __refdata contig_page_data = {
|
||||
#ifndef CONFIG_NO_BOOTMEM
|
||||
.bdata = &bootmem_node_data[0]
|
||||
#endif
|
||||
};
|
||||
EXPORT_SYMBOL(contig_page_data);
|
||||
#endif
|
||||
|
||||
|
@ -40,9 +40,11 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node,
|
||||
unsigned long align,
|
||||
unsigned long goal)
|
||||
{
|
||||
return __alloc_bootmem_node(NODE_DATA(node), size, align, goal);
|
||||
return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal);
|
||||
}
|
||||
|
||||
static void *vmemmap_buf;
|
||||
static void *vmemmap_buf_end;
|
||||
|
||||
void * __meminit vmemmap_alloc_block(unsigned long size, int node)
|
||||
{
|
||||
@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
|
||||
__pa(MAX_DMA_ADDRESS));
|
||||
}
|
||||
|
||||
/* need to make sure size is all the same during early stage */
|
||||
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
|
||||
{
|
||||
void *ptr;
|
||||
|
||||
if (!vmemmap_buf)
|
||||
return vmemmap_alloc_block(size, node);
|
||||
|
||||
/* take the from buf */
|
||||
ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
|
||||
if (ptr + size > vmemmap_buf_end)
|
||||
return vmemmap_alloc_block(size, node);
|
||||
|
||||
vmemmap_buf = ptr + size;
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void __meminit vmemmap_verify(pte_t *pte, int node,
|
||||
unsigned long start, unsigned long end)
|
||||
{
|
||||
@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
|
||||
pte_t *pte = pte_offset_kernel(pmd, addr);
|
||||
if (pte_none(*pte)) {
|
||||
pte_t entry;
|
||||
void *p = vmemmap_alloc_block(PAGE_SIZE, node);
|
||||
void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
|
||||
if (!p)
|
||||
return NULL;
|
||||
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
|
||||
@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
void __init sparse_mem_maps_populate_node(struct page **map_map,
|
||||
unsigned long pnum_begin,
|
||||
unsigned long pnum_end,
|
||||
unsigned long map_count, int nodeid)
|
||||
{
|
||||
unsigned long pnum;
|
||||
unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
|
||||
void *vmemmap_buf_start;
|
||||
|
||||
size = ALIGN(size, PMD_SIZE);
|
||||
vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
|
||||
PMD_SIZE, __pa(MAX_DMA_ADDRESS));
|
||||
|
||||
if (vmemmap_buf_start) {
|
||||
vmemmap_buf = vmemmap_buf_start;
|
||||
vmemmap_buf_end = vmemmap_buf_start + size * map_count;
|
||||
}
|
||||
|
||||
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
|
||||
struct mem_section *ms;
|
||||
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
|
||||
map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
|
||||
if (map_map[pnum])
|
||||
continue;
|
||||
ms = __nr_to_section(pnum);
|
||||
printk(KERN_ERR "%s: sparsemem memory map backing failed "
|
||||
"some memory will not be available.\n", __func__);
|
||||
ms->section_mem_map = 0;
|
||||
}
|
||||
|
||||
if (vmemmap_buf_start) {
|
||||
/* need to free left buf */
|
||||
#ifdef CONFIG_NO_BOOTMEM
|
||||
free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end));
|
||||
if (vmemmap_buf_start < vmemmap_buf) {
|
||||
char name[15];
|
||||
|
||||
snprintf(name, sizeof(name), "MEMMAP %d", nodeid);
|
||||
reserve_early_without_check(__pa(vmemmap_buf_start),
|
||||
__pa(vmemmap_buf), name);
|
||||
}
|
||||
#else
|
||||
free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf);
|
||||
#endif
|
||||
vmemmap_buf = NULL;
|
||||
vmemmap_buf_end = NULL;
|
||||
}
|
||||
}
|
||||
|
196
mm/sparse.c
196
mm/sparse.c
@ -271,7 +271,8 @@ static unsigned long *__kmalloc_section_usemap(void)
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
static unsigned long * __init
|
||||
sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
|
||||
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
|
||||
unsigned long count)
|
||||
{
|
||||
unsigned long section_nr;
|
||||
|
||||
@ -286,7 +287,7 @@ sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
|
||||
* this problem.
|
||||
*/
|
||||
section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
|
||||
return alloc_bootmem_section(usemap_size(), section_nr);
|
||||
return alloc_bootmem_section(usemap_size() * count, section_nr);
|
||||
}
|
||||
|
||||
static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
|
||||
@ -329,7 +330,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
|
||||
}
|
||||
#else
|
||||
static unsigned long * __init
|
||||
sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat)
|
||||
sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
|
||||
unsigned long count)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
@ -339,27 +341,40 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
|
||||
}
|
||||
#endif /* CONFIG_MEMORY_HOTREMOVE */
|
||||
|
||||
static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum)
|
||||
static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
|
||||
unsigned long pnum_begin,
|
||||
unsigned long pnum_end,
|
||||
unsigned long usemap_count, int nodeid)
|
||||
{
|
||||
unsigned long *usemap;
|
||||
struct mem_section *ms = __nr_to_section(pnum);
|
||||
int nid = sparse_early_nid(ms);
|
||||
void *usemap;
|
||||
unsigned long pnum;
|
||||
int size = usemap_size();
|
||||
|
||||
usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid));
|
||||
if (usemap)
|
||||
return usemap;
|
||||
|
||||
usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size());
|
||||
usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
|
||||
usemap_count);
|
||||
if (usemap) {
|
||||
check_usemap_section_nr(nid, usemap);
|
||||
return usemap;
|
||||
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
usemap_map[pnum] = usemap;
|
||||
usemap += size;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */
|
||||
nid = 0;
|
||||
usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count);
|
||||
if (usemap) {
|
||||
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
usemap_map[pnum] = usemap;
|
||||
usemap += size;
|
||||
check_usemap_section_nr(nodeid, usemap_map[pnum]);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
printk(KERN_WARNING "%s: allocation failed\n", __func__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifndef CONFIG_SPARSEMEM_VMEMMAP
|
||||
@ -375,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid)
|
||||
PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION));
|
||||
return map;
|
||||
}
|
||||
void __init sparse_mem_maps_populate_node(struct page **map_map,
|
||||
unsigned long pnum_begin,
|
||||
unsigned long pnum_end,
|
||||
unsigned long map_count, int nodeid)
|
||||
{
|
||||
void *map;
|
||||
unsigned long pnum;
|
||||
unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
|
||||
|
||||
map = alloc_remap(nodeid, size * map_count);
|
||||
if (map) {
|
||||
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
map_map[pnum] = map;
|
||||
map += size;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
size = PAGE_ALIGN(size);
|
||||
map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count);
|
||||
if (map) {
|
||||
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
map_map[pnum] = map;
|
||||
map += size;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
/* fallback */
|
||||
for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
|
||||
struct mem_section *ms;
|
||||
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
map_map[pnum] = sparse_mem_map_populate(pnum, nodeid);
|
||||
if (map_map[pnum])
|
||||
continue;
|
||||
ms = __nr_to_section(pnum);
|
||||
printk(KERN_ERR "%s: sparsemem memory map backing failed "
|
||||
"some memory will not be available.\n", __func__);
|
||||
ms->section_mem_map = 0;
|
||||
}
|
||||
}
|
||||
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
|
||||
static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
|
||||
unsigned long pnum_begin,
|
||||
unsigned long pnum_end,
|
||||
unsigned long map_count, int nodeid)
|
||||
{
|
||||
sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
|
||||
map_count, nodeid);
|
||||
}
|
||||
#else
|
||||
static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
|
||||
{
|
||||
struct page *map;
|
||||
@ -392,10 +464,12 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
|
||||
ms->section_mem_map = 0;
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
|
||||
{
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate the accumulated non-linear sections, allocate a mem_map
|
||||
* for each and record the physical to section mapping.
|
||||
@ -407,6 +481,14 @@ void __init sparse_init(void)
|
||||
unsigned long *usemap;
|
||||
unsigned long **usemap_map;
|
||||
int size;
|
||||
int nodeid_begin = 0;
|
||||
unsigned long pnum_begin = 0;
|
||||
unsigned long usemap_count;
|
||||
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
|
||||
unsigned long map_count;
|
||||
int size2;
|
||||
struct page **map_map;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* map is using big page (aka 2M in x86 64 bit)
|
||||
@ -425,10 +507,81 @@ void __init sparse_init(void)
|
||||
panic("can not allocate usemap_map\n");
|
||||
|
||||
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
|
||||
struct mem_section *ms;
|
||||
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
usemap_map[pnum] = sparse_early_usemap_alloc(pnum);
|
||||
ms = __nr_to_section(pnum);
|
||||
nodeid_begin = sparse_early_nid(ms);
|
||||
pnum_begin = pnum;
|
||||
break;
|
||||
}
|
||||
usemap_count = 1;
|
||||
for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
|
||||
struct mem_section *ms;
|
||||
int nodeid;
|
||||
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
ms = __nr_to_section(pnum);
|
||||
nodeid = sparse_early_nid(ms);
|
||||
if (nodeid == nodeid_begin) {
|
||||
usemap_count++;
|
||||
continue;
|
||||
}
|
||||
/* ok, we need to take cake of from pnum_begin to pnum - 1*/
|
||||
sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
|
||||
usemap_count, nodeid_begin);
|
||||
/* new start, update count etc*/
|
||||
nodeid_begin = nodeid;
|
||||
pnum_begin = pnum;
|
||||
usemap_count = 1;
|
||||
}
|
||||
/* ok, last chunk */
|
||||
sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
|
||||
usemap_count, nodeid_begin);
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
|
||||
size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
|
||||
map_map = alloc_bootmem(size2);
|
||||
if (!map_map)
|
||||
panic("can not allocate map_map\n");
|
||||
|
||||
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
|
||||
struct mem_section *ms;
|
||||
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
ms = __nr_to_section(pnum);
|
||||
nodeid_begin = sparse_early_nid(ms);
|
||||
pnum_begin = pnum;
|
||||
break;
|
||||
}
|
||||
map_count = 1;
|
||||
for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
|
||||
struct mem_section *ms;
|
||||
int nodeid;
|
||||
|
||||
if (!present_section_nr(pnum))
|
||||
continue;
|
||||
ms = __nr_to_section(pnum);
|
||||
nodeid = sparse_early_nid(ms);
|
||||
if (nodeid == nodeid_begin) {
|
||||
map_count++;
|
||||
continue;
|
||||
}
|
||||
/* ok, we need to take cake of from pnum_begin to pnum - 1*/
|
||||
sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
|
||||
map_count, nodeid_begin);
|
||||
/* new start, update count etc*/
|
||||
nodeid_begin = nodeid;
|
||||
pnum_begin = pnum;
|
||||
map_count = 1;
|
||||
}
|
||||
/* ok, last chunk */
|
||||
sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
|
||||
map_count, nodeid_begin);
|
||||
#endif
|
||||
|
||||
for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
|
||||
if (!present_section_nr(pnum))
|
||||
@ -438,7 +591,11 @@ void __init sparse_init(void)
|
||||
if (!usemap)
|
||||
continue;
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
|
||||
map = map_map[pnum];
|
||||
#else
|
||||
map = sparse_early_mem_map_alloc(pnum);
|
||||
#endif
|
||||
if (!map)
|
||||
continue;
|
||||
|
||||
@ -448,6 +605,9 @@ void __init sparse_init(void)
|
||||
|
||||
vmemmap_populate_print_last();
|
||||
|
||||
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
|
||||
free_bootmem(__pa(map_map), size2);
|
||||
#endif
|
||||
free_bootmem(__pa(usemap_map), size);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user