Merge branch 'akpm' (patches from Andrew)

Merge misc updates from Andrew Morton:
 "181 patches.

  Subsystems affected by this patch series: kbuild, scripts, ntfs,
  ocfs2, vfs, mm (slab, slub, kmemleak, dax, debug, pagecache, fadvise,
  gup, swap, memremap, memcg, selftests, pagemap, mincore, hmm, dma,
  memory-failure, vmallo and migration)"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (181 commits)
  mm/migrate: remove obsolete comment about device public
  mm/migrate: remove cpages-- in migrate_vma_finalize()
  mm, oom_adj: don't loop through tasks in __set_oom_adj when not necessary
  memblock: use separate iterators for memory and reserved regions
  memblock: implement for_each_reserved_mem_region() using __next_mem_region()
  memblock: remove unused memblock_mem_size()
  x86/setup: simplify reserve_crashkernel()
  x86/setup: simplify initrd relocation and reservation
  arch, drivers: replace for_each_membock() with for_each_mem_range()
  arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
  memblock: reduce number of parameters in for_each_mem_range()
  memblock: make memblock_debug and related functionality private
  memblock: make for_each_memblock_type() iterator private
  mircoblaze: drop unneeded NUMA and sparsemem initializations
  riscv: drop unneeded node initialization
  h8300, nds32, openrisc: simplify detection of memory extents
  arm64: numa: simplify dummy_numa_init()
  arm, xtensa: simplify initialization of high memory pages
  dma-contiguous: simplify cma_early_percent_memory()
  KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
  ...
This commit is contained in:
Linus Torvalds 2020-10-14 09:57:24 -07:00
commit d5660df4a5
192 changed files with 3926 additions and 2430 deletions

View File

@ -203,11 +203,13 @@ ForEachMacros:
- 'for_each_matching_node' - 'for_each_matching_node'
- 'for_each_matching_node_and_match' - 'for_each_matching_node_and_match'
- 'for_each_member' - 'for_each_member'
- 'for_each_memblock' - 'for_each_mem_region'
- 'for_each_memblock_type' - 'for_each_memblock_type'
- 'for_each_memcg_cache_index' - 'for_each_memcg_cache_index'
- 'for_each_mem_pfn_range' - 'for_each_mem_pfn_range'
- '__for_each_mem_range'
- 'for_each_mem_range' - 'for_each_mem_range'
- '__for_each_mem_range_rev'
- 'for_each_mem_range_rev' - 'for_each_mem_range_rev'
- 'for_each_migratetype_order' - 'for_each_migratetype_order'
- 'for_each_msi_entry' - 'for_each_msi_entry'
@ -271,6 +273,7 @@ ForEachMacros:
- 'for_each_registered_fb' - 'for_each_registered_fb'
- 'for_each_requested_gpio' - 'for_each_requested_gpio'
- 'for_each_requested_gpio_in_range' - 'for_each_requested_gpio_in_range'
- 'for_each_reserved_mem_range'
- 'for_each_reserved_mem_region' - 'for_each_reserved_mem_region'
- 'for_each_rtd_codec_dais' - 'for_each_rtd_codec_dais'
- 'for_each_rtd_codec_dais_rollback' - 'for_each_rtd_codec_dais_rollback'

View File

@ -1259,6 +1259,10 @@ PAGE_SIZE multiple when read back.
can show up in the middle. Don't rely on items remaining in a can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values! fixed position; use the keys to look up specific values!
If the entry has no per-node counter(or not show in the
mempry.numa_stat). We use 'npn'(non-per-node) as the tag
to indicate that it will not show in the mempry.numa_stat.
anon anon
Amount of memory used in anonymous mappings such as Amount of memory used in anonymous mappings such as
brk(), sbrk(), and mmap(MAP_ANONYMOUS) brk(), sbrk(), and mmap(MAP_ANONYMOUS)
@ -1270,15 +1274,11 @@ PAGE_SIZE multiple when read back.
kernel_stack kernel_stack
Amount of memory allocated to kernel stacks. Amount of memory allocated to kernel stacks.
slab percpu(npn)
Amount of memory used for storing in-kernel data
structures.
percpu
Amount of memory used for storing per-cpu kernel Amount of memory used for storing per-cpu kernel
data structures. data structures.
sock sock(npn)
Amount of memory used in network transmission buffers Amount of memory used in network transmission buffers
shmem shmem
@ -1318,11 +1318,9 @@ PAGE_SIZE multiple when read back.
Part of "slab" that cannot be reclaimed on memory Part of "slab" that cannot be reclaimed on memory
pressure. pressure.
pgfault slab(npn)
Total number of page faults incurred Amount of memory used for storing in-kernel data
structures.
pgmajfault
Number of major page faults incurred
workingset_refault_anon workingset_refault_anon
Number of refaults of previously evicted anonymous pages. Number of refaults of previously evicted anonymous pages.
@ -1348,37 +1346,68 @@ PAGE_SIZE multiple when read back.
workingset_nodereclaim workingset_nodereclaim
Number of times a shadow node has been reclaimed Number of times a shadow node has been reclaimed
pgrefill pgfault(npn)
Total number of page faults incurred
pgmajfault(npn)
Number of major page faults incurred
pgrefill(npn)
Amount of scanned pages (in an active LRU list) Amount of scanned pages (in an active LRU list)
pgscan pgscan(npn)
Amount of scanned pages (in an inactive LRU list) Amount of scanned pages (in an inactive LRU list)
pgsteal pgsteal(npn)
Amount of reclaimed pages Amount of reclaimed pages
pgactivate pgactivate(npn)
Amount of pages moved to the active LRU list Amount of pages moved to the active LRU list
pgdeactivate pgdeactivate(npn)
Amount of pages moved to the inactive LRU list Amount of pages moved to the inactive LRU list
pglazyfree pglazyfree(npn)
Amount of pages postponed to be freed under memory pressure Amount of pages postponed to be freed under memory pressure
pglazyfreed pglazyfreed(npn)
Amount of reclaimed lazyfree pages Amount of reclaimed lazyfree pages
thp_fault_alloc thp_fault_alloc(npn)
Number of transparent hugepages which were allocated to satisfy Number of transparent hugepages which were allocated to satisfy
a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE a page fault. This counter is not present when CONFIG_TRANSPARENT_HUGEPAGE
is not set. is not set.
thp_collapse_alloc thp_collapse_alloc(npn)
Number of transparent hugepages which were allocated to allow Number of transparent hugepages which were allocated to allow
collapsing an existing range of pages. This counter is not collapsing an existing range of pages. This counter is not
present when CONFIG_TRANSPARENT_HUGEPAGE is not set. present when CONFIG_TRANSPARENT_HUGEPAGE is not set.
memory.numa_stat
A read-only nested-keyed file which exists on non-root cgroups.
This breaks down the cgroup's memory footprint into different
types of memory, type-specific details, and other information
per node on the state of the memory management system.
This is useful for providing visibility into the NUMA locality
information within an memcg since the pages are allowed to be
allocated from any physical node. One of the use case is evaluating
application performance by combining this information with the
application's CPU allocation.
All memory amounts are in bytes.
The output format of memory.numa_stat is::
type N0=<bytes in node 0> N1=<bytes in node 1> ...
The entries are ordered to be human readable, and new entries
can show up in the middle. Don't rely on items remaining in a
fixed position; use the keys to look up specific values!
The entries can refer to the memory.stat.
memory.swap.current memory.swap.current
A read-only single value file which exists on non-root A read-only single value file which exists on non-root
cgroups. cgroups.

View File

@ -131,7 +131,7 @@ hugepages
parameter is preceded by an invalid hugepagesz parameter, it will parameter is preceded by an invalid hugepagesz parameter, it will
be ignored. be ignored.
default_hugepagesz default_hugepagesz
pecify the default huge page size. This parameter can Specify the default huge page size. This parameter can
only be specified once on the command line. default_hugepagesz can only be specified once on the command line. default_hugepagesz can
optionally be followed by the hugepages parameter to preallocate a optionally be followed by the hugepages parameter to preallocate a
specific number of huge pages of default size. The number of default specific number of huge pages of default size. The number of default

View File

@ -13,10 +13,10 @@ KASAN uses compile-time instrumentation to insert validity checks before every
memory access, and therefore requires a compiler version that supports that. memory access, and therefore requires a compiler version that supports that.
Generic KASAN is supported in both GCC and Clang. With GCC it requires version Generic KASAN is supported in both GCC and Clang. With GCC it requires version
8.3.0 or later. With Clang it requires version 7.0.0 or later, but detection of 8.3.0 or later. Any supported Clang version is compatible, but detection of
out-of-bounds accesses for global variables is only supported since Clang 11. out-of-bounds accesses for global variables is only supported since Clang 11.
Tag-based KASAN is only supported in Clang and requires version 7.0.0 or later. Tag-based KASAN is only supported in Clang.
Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and
riscv architectures, and tag-based KASAN is supported only for arm64. riscv architectures, and tag-based KASAN is supported only for arm64.
@ -281,3 +281,73 @@ unmapped. This will require changes in arch-specific code.
This allows ``VMAP_STACK`` support on x86, and can simplify support of This allows ``VMAP_STACK`` support on x86, and can simplify support of
architectures that do not have a fixed module region. architectures that do not have a fixed module region.
CONFIG_KASAN_KUNIT_TEST & CONFIG_TEST_KASAN_MODULE
--------------------------------------------------
``CONFIG_KASAN_KUNIT_TEST`` utilizes the KUnit Test Framework for testing.
This means each test focuses on a small unit of functionality and
there are a few ways these tests can be run.
Each test will print the KASAN report if an error is detected and then
print the number of the test and the status of the test:
pass::
ok 28 - kmalloc_double_kzfree
or, if kmalloc failed::
# kmalloc_large_oob_right: ASSERTION FAILED at lib/test_kasan.c:163
Expected ptr is not null, but is
not ok 4 - kmalloc_large_oob_right
or, if a KASAN report was expected, but not found::
# kmalloc_double_kzfree: EXPECTATION FAILED at lib/test_kasan.c:629
Expected kasan_data->report_expected == kasan_data->report_found, but
kasan_data->report_expected == 1
kasan_data->report_found == 0
not ok 28 - kmalloc_double_kzfree
All test statuses are tracked as they run and an overall status will
be printed at the end::
ok 1 - kasan
or::
not ok 1 - kasan
(1) Loadable Module
~~~~~~~~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` enabled, ``CONFIG_KASAN_KUNIT_TEST`` can be built as
a loadable module and run on any architecture that supports KASAN
using something like insmod or modprobe. The module is called ``test_kasan``.
(2) Built-In
~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` built-in, ``CONFIG_KASAN_KUNIT_TEST`` can be built-in
on any architecure that supports KASAN. These and any other KUnit
tests enabled will run and print the results at boot as a late-init
call.
(3) Using kunit_tool
~~~~~~~~~~~~~~~~~~~~~
With ``CONFIG_KUNIT`` and ``CONFIG_KASAN_KUNIT_TEST`` built-in, we can also
use kunit_tool to see the results of these along with other KUnit
tests in a more readable way. This will not print the KASAN reports
of tests that passed. Use `KUnit documentation <https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html>`_ for more up-to-date
information on kunit_tool.
.. _KUnit: https://www.kernel.org/doc/html/latest/dev-tools/kunit/index.html
``CONFIG_TEST_KASAN_MODULE`` is a set of KASAN tests that could not be
converted to KUnit. These tests can be run only as a module with
``CONFIG_TEST_KASAN_MODULE`` built as a loadable module and
``CONFIG_KASAN`` built-in. The type of error expected and the
function being run is printed before the expression expected to give
an error. Then the error is printed, if found, and that test
should be interpretted to pass only if the error was the one expected
by the test.

View File

@ -229,7 +229,7 @@ Testing with kmemleak-test
To check if you have all set up to use kmemleak, you can use the kmemleak-test To check if you have all set up to use kmemleak, you can use the kmemleak-test
module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST module, a module that deliberately leaks memory. Set CONFIG_DEBUG_KMEMLEAK_TEST
as module (it can't be used as bult-in) and boot the kernel with kmemleak as module (it can't be used as built-in) and boot the kernel with kmemleak
enabled. Load the module and perform a scan with:: enabled. Load the module and perform a scan with::
# modprobe kmemleak-test # modprobe kmemleak-test

View File

@ -21,6 +21,7 @@ This document describes the Linux kernel Makefiles.
--- 3.10 Special Rules --- 3.10 Special Rules
--- 3.11 $(CC) support functions --- 3.11 $(CC) support functions
--- 3.12 $(LD) support functions --- 3.12 $(LD) support functions
--- 3.13 Script Invocation
=== 4 Host Program support === 4 Host Program support
--- 4.1 Simple Host Program --- 4.1 Simple Host Program
@ -605,6 +606,25 @@ more details, with real examples.
#Makefile #Makefile
LDFLAGS_vmlinux += $(call ld-option, -X) LDFLAGS_vmlinux += $(call ld-option, -X)
3.13 Script invocation
----------------------
Make rules may invoke scripts to build the kernel. The rules shall
always provide the appropriate interpreter to execute the script. They
shall not rely on the execute bits being set, and shall not invoke the
script directly. For the convenience of manual script invocation, such
as invoking ./scripts/checkpatch.pl, it is recommended to set execute
bits on the scripts nonetheless.
Kbuild provides variables $(CONFIG_SHELL), $(AWK), $(PERL),
$(PYTHON) and $(PYTHON3) to refer to interpreters for the respective
scripts.
Example::
#Makefile
cmd_depmod = $(CONFIG_SHELL) $(srctree)/scripts/depmod.sh $(DEPMOD) \
$(KERNELRELEASE)
4 Host Program support 4 Host Program support
====================== ======================

View File

@ -64,7 +64,7 @@ Active MM
actually get cases where you have a address space that is _only_ used by actually get cases where you have a address space that is _only_ used by
lazy users. That is often a short-lived state, because once that thread lazy users. That is often a short-lived state, because once that thread
gets scheduled away in favour of a real thread, the "zombie" mm gets gets scheduled away in favour of a real thread, the "zombie" mm gets
released because "mm_users" becomes zero. released because "mm_count" becomes zero.
Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any Also, a new rule is that _nobody_ ever has "init_mm" as a real MM any
more. "init_mm" should be considered just a "lazy context when no other more. "init_mm" should be considered just a "lazy context when no other

View File

@ -173,6 +173,10 @@ NUMA
numa=noacpi numa=noacpi
Don't parse the SRAT table for NUMA setup Don't parse the SRAT table for NUMA setup
numa=nohmat
Don't parse the HMAT table for NUMA setup, or soft-reserved memory
partitioning.
numa=fake=<size>[MG] numa=fake=<size>[MG]
If given as a memory unit, fills all system RAM with nodes of If given as a memory unit, fills all system RAM with nodes of
size interleaved over physical nodes. size interleaved over physical nodes.

View File

@ -9727,8 +9727,8 @@ M: Catalin Marinas <catalin.marinas@arm.com>
S: Maintained S: Maintained
F: Documentation/dev-tools/kmemleak.rst F: Documentation/dev-tools/kmemleak.rst
F: include/linux/kmemleak.h F: include/linux/kmemleak.h
F: mm/kmemleak-test.c
F: mm/kmemleak.c F: mm/kmemleak.c
F: samples/kmemleak/kmemleak-test.c
KMOD KERNEL MODULE LOADER - USERMODE HELPER KMOD KERNEL MODULE LOADER - USERMODE HELPER
M: Luis Chamberlain <mcgrof@kernel.org> M: Luis Chamberlain <mcgrof@kernel.org>

View File

@ -921,15 +921,6 @@ KBUILD_CFLAGS += $(call cc-disable-warning, maybe-uninitialized)
# disable invalid "can't wrap" optimizations for signed / pointers # disable invalid "can't wrap" optimizations for signed / pointers
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow) KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
# clang sets -fmerge-all-constants by default as optimization, but this
# is non-conforming behavior for C and in fact breaks the kernel, so we
# need to disable it here generally.
KBUILD_CFLAGS += $(call cc-option,-fno-merge-all-constants)
# for gcc -fno-merge-all-constants disables everything, but it is fine
# to have actual conforming behavior enabled.
KBUILD_CFLAGS += $(call cc-option,-fmerge-constants)
# Make sure -fstack-check isn't enabled (like gentoo apparently did) # Make sure -fstack-check isn't enabled (like gentoo apparently did)
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,) KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)

View File

@ -85,7 +85,7 @@ config ARM
select HAVE_FAST_GUP if ARM_LPAE select HAVE_FAST_GUP if ARM_LPAE
select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG
select HAVE_FUNCTION_TRACER if !XIP_KERNEL && (CC_IS_GCC || CLANG_VERSION >= 100000) select HAVE_FUNCTION_TRACER if !XIP_KERNEL
select HAVE_GCC_PLUGINS select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7)
select HAVE_IDE if PCI || ISA || PCMCIA select HAVE_IDE if PCI || ISA || PCMCIA

View File

@ -59,6 +59,7 @@ __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
#ifdef CONFIG_ARM_LPAE #ifdef CONFIG_ARM_LPAE
struct page *page = virt_to_page(pmdp); struct page *page = virt_to_page(pmdp);
pgtable_pmd_page_dtor(page);
tlb_remove_table(tlb, page); tlb_remove_table(tlb, page);
#endif #endif
} }

View File

@ -843,19 +843,25 @@ early_param("mem", early_mem);
static void __init request_standard_resources(const struct machine_desc *mdesc) static void __init request_standard_resources(const struct machine_desc *mdesc)
{ {
struct memblock_region *region; phys_addr_t start, end, res_end;
struct resource *res; struct resource *res;
u64 i;
kernel_code.start = virt_to_phys(_text); kernel_code.start = virt_to_phys(_text);
kernel_code.end = virt_to_phys(__init_begin - 1); kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start = virt_to_phys(_sdata); kernel_data.start = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1); kernel_data.end = virt_to_phys(_end - 1);
for_each_memblock(memory, region) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = __pfn_to_phys(memblock_region_memory_base_pfn(region));
phys_addr_t end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
unsigned long boot_alias_start; unsigned long boot_alias_start;
/*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res_end = end - 1;
/* /*
* Some systems have a special memory alias which is only * Some systems have a special memory alias which is only
* used for booting. We need to advertise this region to * used for booting. We need to advertise this region to
@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
__func__, sizeof(*res)); __func__, sizeof(*res));
res->name = "System RAM (boot alias)"; res->name = "System RAM (boot alias)";
res->start = boot_alias_start; res->start = boot_alias_start;
res->end = phys_to_idmap(end); res->end = phys_to_idmap(res_end);
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res); request_resource(&iomem_resource, res);
} }
@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc)
sizeof(*res)); sizeof(*res));
res->name = "System RAM"; res->name = "System RAM";
res->start = start; res->start = start;
res->end = end; res->end = res_end;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res); request_resource(&iomem_resource, res);

View File

@ -299,16 +299,14 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn)
*/ */
static void __init free_unused_memmap(void) static void __init free_unused_memmap(void)
{ {
unsigned long start, prev_end = 0; unsigned long start, end, prev_end = 0;
struct memblock_region *reg; int i;
/* /*
* This relies on each bank being in address order. * This relies on each bank being in address order.
* The banks are sorted previously in bootmem_init(). * The banks are sorted previously in bootmem_init().
*/ */
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
start = memblock_region_memory_base_pfn(reg);
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
/* /*
* Take care not to free memmap entries that don't exist * Take care not to free memmap entries that don't exist
@ -336,8 +334,7 @@ static void __init free_unused_memmap(void)
* memmap entries are valid from the bank end aligned to * memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES. * MAX_ORDER_NR_PAGES.
*/ */
prev_end = ALIGN(memblock_region_memory_end_pfn(reg), prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
MAX_ORDER_NR_PAGES);
} }
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
@ -347,61 +344,29 @@ static void __init free_unused_memmap(void)
#endif #endif
} }
#ifdef CONFIG_HIGHMEM
static inline void free_area_high(unsigned long pfn, unsigned long end)
{
for (; pfn < end; pfn++)
free_highmem_page(pfn_to_page(pfn));
}
#endif
static void __init free_highpages(void) static void __init free_highpages(void)
{ {
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn; unsigned long max_low = max_low_pfn;
struct memblock_region *mem, *res; phys_addr_t range_start, range_end;
u64 i;
/* set highmem page free */ /* set highmem page free */
for_each_memblock(memory, mem) { for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
unsigned long start = memblock_region_memory_base_pfn(mem); &range_start, &range_end, NULL) {
unsigned long end = memblock_region_memory_end_pfn(mem); unsigned long start = PHYS_PFN(range_start);
unsigned long end = PHYS_PFN(range_end);
/* Ignore complete lowmem entries */ /* Ignore complete lowmem entries */
if (end <= max_low) if (end <= max_low)
continue; continue;
if (memblock_is_nomap(mem))
continue;
/* Truncate partial highmem entries */ /* Truncate partial highmem entries */
if (start < max_low) if (start < max_low)
start = max_low; start = max_low;
/* Find and exclude any reserved regions */ for (; start < end; start++)
for_each_memblock(reserved, res) { free_highmem_page(pfn_to_page(start));
unsigned long res_start, res_end;
res_start = memblock_region_reserved_base_pfn(res);
res_end = memblock_region_reserved_end_pfn(res);
if (res_end < start)
continue;
if (res_start < start)
res_start = start;
if (res_start > end)
res_start = end;
if (res_end > end)
res_end = end;
if (res_start != start)
free_area_high(start, res_start);
start = res_end;
if (start == end)
break;
}
/* And now free anything which remains */
if (start < end)
free_area_high(start, end);
} }
#endif #endif
} }

View File

@ -1154,9 +1154,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
void __init adjust_lowmem_bounds(void) void __init adjust_lowmem_bounds(void)
{ {
phys_addr_t memblock_limit = 0; phys_addr_t block_start, block_end, memblock_limit = 0;
u64 vmalloc_limit; u64 vmalloc_limit, i;
struct memblock_region *reg;
phys_addr_t lowmem_limit = 0; phys_addr_t lowmem_limit = 0;
/* /*
@ -1172,26 +1171,18 @@ void __init adjust_lowmem_bounds(void)
* The first usable region must be PMD aligned. Mark its start * The first usable region must be PMD aligned. Mark its start
* as MEMBLOCK_NOMAP if it isn't * as MEMBLOCK_NOMAP if it isn't
*/ */
for_each_memblock(memory, reg) { for_each_mem_range(i, &block_start, &block_end) {
if (!memblock_is_nomap(reg)) { if (!IS_ALIGNED(block_start, PMD_SIZE)) {
if (!IS_ALIGNED(reg->base, PMD_SIZE)) { phys_addr_t len;
phys_addr_t len;
len = round_up(reg->base, PMD_SIZE) - reg->base; len = round_up(block_start, PMD_SIZE) - block_start;
memblock_mark_nomap(reg->base, len); memblock_mark_nomap(block_start, len);
}
break;
} }
break;
} }
for_each_memblock(memory, reg) { for_each_mem_range(i, &block_start, &block_end) {
phys_addr_t block_start = reg->base; if (block_start < vmalloc_limit) {
phys_addr_t block_end = reg->base + reg->size;
if (memblock_is_nomap(reg))
continue;
if (reg->base < vmalloc_limit) {
if (block_end > lowmem_limit) if (block_end > lowmem_limit)
/* /*
* Compare as u64 to ensure vmalloc_limit does * Compare as u64 to ensure vmalloc_limit does
@ -1440,19 +1431,15 @@ static void __init kmap_init(void)
static void __init map_lowmem(void) static void __init map_lowmem(void)
{ {
struct memblock_region *reg;
phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE); phys_addr_t kernel_x_start = round_down(__pa(KERNEL_START), SECTION_SIZE);
phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE); phys_addr_t kernel_x_end = round_up(__pa(__init_end), SECTION_SIZE);
phys_addr_t start, end;
u64 i;
/* Map all the lowmem memory banks. */ /* Map all the lowmem memory banks. */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = reg->base;
phys_addr_t end = start + reg->size;
struct map_desc map; struct map_desc map;
if (memblock_is_nomap(reg))
continue;
if (end > arm_lowmem_limit) if (end > arm_lowmem_limit)
end = arm_lowmem_limit; end = arm_lowmem_limit;
if (start >= end) if (start >= end)

View File

@ -231,12 +231,12 @@ static int __init allocate_region(phys_addr_t base, phys_addr_t size,
void __init pmsav7_adjust_lowmem_bounds(void) void __init pmsav7_adjust_lowmem_bounds(void)
{ {
phys_addr_t specified_mem_size = 0, total_mem_size = 0; phys_addr_t specified_mem_size = 0, total_mem_size = 0;
struct memblock_region *reg;
bool first = true;
phys_addr_t mem_start; phys_addr_t mem_start;
phys_addr_t mem_end; phys_addr_t mem_end;
phys_addr_t reg_start, reg_end;
unsigned int mem_max_regions; unsigned int mem_max_regions;
int num, i; int num;
u64 i;
/* Free-up PMSAv7_PROBE_REGION */ /* Free-up PMSAv7_PROBE_REGION */
mpu_min_region_order = __mpu_min_region_order(); mpu_min_region_order = __mpu_min_region_order();
@ -262,20 +262,19 @@ void __init pmsav7_adjust_lowmem_bounds(void)
mem_max_regions -= num; mem_max_regions -= num;
#endif #endif
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
if (first) { if (i == 0) {
phys_addr_t phys_offset = PHYS_OFFSET; phys_addr_t phys_offset = PHYS_OFFSET;
/* /*
* Initially only use memory continuous from * Initially only use memory continuous from
* PHYS_OFFSET */ * PHYS_OFFSET */
if (reg->base != phys_offset) if (reg_start != phys_offset)
panic("First memory bank must be contiguous from PHYS_OFFSET"); panic("First memory bank must be contiguous from PHYS_OFFSET");
mem_start = reg->base; mem_start = reg_start;
mem_end = reg->base + reg->size; mem_end = reg_end;
specified_mem_size = reg->size; specified_mem_size = mem_end - mem_start;
first = false;
} else { } else {
/* /*
* memblock auto merges contiguous blocks, remove * memblock auto merges contiguous blocks, remove
@ -283,8 +282,8 @@ void __init pmsav7_adjust_lowmem_bounds(void)
* blocks separately while iterating) * blocks separately while iterating)
*/ */
pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
&mem_end, &reg->base); &mem_end, &reg_start);
memblock_remove(reg->base, 0 - reg->base); memblock_remove(reg_start, 0 - reg_start);
break; break;
} }
} }

View File

@ -94,20 +94,19 @@ static __init bool is_region_fixed(int number)
void __init pmsav8_adjust_lowmem_bounds(void) void __init pmsav8_adjust_lowmem_bounds(void)
{ {
phys_addr_t mem_end; phys_addr_t mem_end;
struct memblock_region *reg; phys_addr_t reg_start, reg_end;
bool first = true; u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
if (first) { if (i == 0) {
phys_addr_t phys_offset = PHYS_OFFSET; phys_addr_t phys_offset = PHYS_OFFSET;
/* /*
* Initially only use memory continuous from * Initially only use memory continuous from
* PHYS_OFFSET */ * PHYS_OFFSET */
if (reg->base != phys_offset) if (reg_start != phys_offset)
panic("First memory bank must be contiguous from PHYS_OFFSET"); panic("First memory bank must be contiguous from PHYS_OFFSET");
mem_end = reg->base + reg->size; mem_end = reg_end;
first = false;
} else { } else {
/* /*
* memblock auto merges contiguous blocks, remove * memblock auto merges contiguous blocks, remove
@ -115,8 +114,8 @@ void __init pmsav8_adjust_lowmem_bounds(void)
* blocks separately while iterating) * blocks separately while iterating)
*/ */
pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n", pr_notice("Ignoring RAM after %pa, memory at %pa ignored\n",
&mem_end, &reg->base); &mem_end, &reg_start);
memblock_remove(reg->base, 0 - reg->base); memblock_remove(reg_start, 0 - reg_start);
break; break;
} }
} }

View File

@ -25,11 +25,12 @@
unsigned long xen_get_swiotlb_free_pages(unsigned int order) unsigned long xen_get_swiotlb_free_pages(unsigned int order)
{ {
struct memblock_region *reg; phys_addr_t base;
gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM; gfp_t flags = __GFP_NOWARN|__GFP_KSWAPD_RECLAIM;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, NULL) {
if (reg->base < (phys_addr_t)0xffffffff) { if (base < (phys_addr_t)0xffffffff) {
if (IS_ENABLED(CONFIG_ZONE_DMA32)) if (IS_ENABLED(CONFIG_ZONE_DMA32))
flags |= __GFP_DMA32; flags |= __GFP_DMA32;
else else

View File

@ -1599,8 +1599,6 @@ config ARM64_BTI_KERNEL
depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697
depends on !CC_IS_GCC || GCC_VERSION >= 100100 depends on !CC_IS_GCC || GCC_VERSION >= 100100
# https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55
depends on !CC_IS_CLANG || CLANG_VERSION >= 100001
depends on !(CC_IS_CLANG && GCOV_KERNEL) depends on !(CC_IS_CLANG && GCOV_KERNEL)
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
help help

View File

@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
phys_addr_t start, end; phys_addr_t start, end;
nr_ranges = 1; /* for exclusion of crashkernel region */ nr_ranges = 1; /* for exclusion of crashkernel region */
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range(i, &start, &end)
MEMBLOCK_NONE, &start, &end, NULL)
nr_ranges++; nr_ranges++;
cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
cmem->max_nr_ranges = nr_ranges; cmem->max_nr_ranges = nr_ranges;
cmem->nr_ranges = 0; cmem->nr_ranges = 0;
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range(i, &start, &end) {
MEMBLOCK_NONE, &start, &end, NULL) {
cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].start = start;
cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->ranges[cmem->nr_ranges].end = end - 1;
cmem->nr_ranges++; cmem->nr_ranges++;

View File

@ -217,7 +217,7 @@ static void __init request_standard_resources(void)
if (!standard_resources) if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size); panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
for_each_memblock(memory, region) { for_each_mem_region(region) {
res = &standard_resources[i++]; res = &standard_resources[i++];
if (memblock_is_nomap(region)) { if (memblock_is_nomap(region)) {
res->name = "reserved"; res->name = "reserved";
@ -257,7 +257,7 @@ static int __init reserve_memblock_reserved_regions(void)
if (!memblock_is_region_reserved(mem->start, mem_size)) if (!memblock_is_region_reserved(mem->start, mem_size))
continue; continue;
for_each_reserved_mem_region(j, &r_start, &r_end) { for_each_reserved_mem_range(j, &r_start, &r_end) {
resource_size_t start, end; resource_size_t start, end;
start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);

View File

@ -43,13 +43,6 @@ ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y) CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif endif
# Clang versions less than 8 do not support -mcmodel=tiny
ifeq ($(CONFIG_CC_IS_CLANG), y)
ifeq ($(shell test $(CONFIG_CLANG_VERSION) -lt 80000; echo $$?),0)
CFLAGS_REMOVE_vgettimeofday.o += -mcmodel=tiny
endif
endif
# Disable gcov profiling for VDSO code # Disable gcov profiling for VDSO code
GCOV_PROFILE := n GCOV_PROFILE := n

View File

@ -471,12 +471,10 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn)
*/ */
static void __init free_unused_memmap(void) static void __init free_unused_memmap(void)
{ {
unsigned long start, prev_end = 0; unsigned long start, end, prev_end = 0;
struct memblock_region *reg; int i;
for_each_memblock(memory, reg) {
start = __phys_to_pfn(reg->base);
for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM
/* /*
* Take care not to free memmap entries that don't exist due * Take care not to free memmap entries that don't exist due
@ -496,8 +494,7 @@ static void __init free_unused_memmap(void)
* memmap entries are valid from the bank end aligned to * memmap entries are valid from the bank end aligned to
* MAX_ORDER_NR_PAGES. * MAX_ORDER_NR_PAGES.
*/ */
prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
MAX_ORDER_NR_PAGES);
} }
#ifdef CONFIG_SPARSEMEM #ifdef CONFIG_SPARSEMEM

View File

@ -212,8 +212,8 @@ void __init kasan_init(void)
{ {
u64 kimg_shadow_start, kimg_shadow_end; u64 kimg_shadow_start, kimg_shadow_end;
u64 mod_shadow_start, mod_shadow_end; u64 mod_shadow_start, mod_shadow_end;
struct memblock_region *reg; phys_addr_t pa_start, pa_end;
int i; u64 i;
kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK; kimg_shadow_start = (u64)kasan_mem_to_shadow(_text) & PAGE_MASK;
kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end)); kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(_end));
@ -246,9 +246,9 @@ void __init kasan_init(void)
kasan_populate_early_shadow((void *)mod_shadow_end, kasan_populate_early_shadow((void *)mod_shadow_end,
(void *)kimg_shadow_start); (void *)kimg_shadow_start);
for_each_memblock(memory, reg) { for_each_mem_range(i, &pa_start, &pa_end) {
void *start = (void *)__phys_to_virt(reg->base); void *start = (void *)__phys_to_virt(pa_start);
void *end = (void *)__phys_to_virt(reg->base + reg->size); void *end = (void *)__phys_to_virt(pa_end);
if (start >= end) if (start >= end)
break; break;

View File

@ -473,8 +473,9 @@ static void __init map_mem(pgd_t *pgdp)
{ {
phys_addr_t kernel_start = __pa_symbol(_text); phys_addr_t kernel_start = __pa_symbol(_text);
phys_addr_t kernel_end = __pa_symbol(__init_begin); phys_addr_t kernel_end = __pa_symbol(__init_begin);
struct memblock_region *reg; phys_addr_t start, end;
int flags = 0; int flags = 0;
u64 i;
if (rodata_full || debug_pagealloc_enabled()) if (rodata_full || debug_pagealloc_enabled())
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
@ -493,15 +494,9 @@ static void __init map_mem(pgd_t *pgdp)
#endif #endif
/* map all the memory banks */ /* map all the memory banks */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = reg->base;
phys_addr_t end = start + reg->size;
if (start >= end) if (start >= end)
break; break;
if (memblock_is_nomap(reg))
continue;
/* /*
* The linear map must allow allocation tags reading/writing * The linear map must allow allocation tags reading/writing
* if MTE is present. Otherwise, it has the same attributes as * if MTE is present. Otherwise, it has the same attributes as

View File

@ -354,7 +354,7 @@ static int __init numa_register_nodes(void)
struct memblock_region *mblk; struct memblock_region *mblk;
/* Check that valid nid is set to memblks */ /* Check that valid nid is set to memblks */
for_each_memblock(memory, mblk) { for_each_mem_region(mblk) {
int mblk_nid = memblock_get_region_node(mblk); int mblk_nid = memblock_get_region_node(mblk);
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) { if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
@ -427,19 +427,16 @@ out_free_distance:
*/ */
static int __init dummy_numa_init(void) static int __init dummy_numa_init(void)
{ {
phys_addr_t start = memblock_start_of_DRAM();
phys_addr_t end = memblock_end_of_DRAM();
int ret; int ret;
struct memblock_region *mblk;
if (numa_off) if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */ pr_info("NUMA disabled\n"); /* Forced off on command line. */
pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
for_each_memblock(memory, mblk) {
ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
if (!ret)
continue;
ret = numa_add_memblk(0, start, end);
if (ret) {
pr_err("NUMA init failed\n"); pr_err("NUMA init failed\n");
return ret; return ret;
} }

View File

@ -287,7 +287,8 @@ notrace void __init machine_init(unsigned long dt_ptr)
void __init setup_arch(char **cmdline_p) void __init setup_arch(char **cmdline_p)
{ {
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
printk(KERN_INFO "Initializing kernel\n"); printk(KERN_INFO "Initializing kernel\n");
@ -351,9 +352,9 @@ void __init setup_arch(char **cmdline_p)
disable_caching(ram_start, ram_end - 1); disable_caching(ram_start, ram_end - 1);
/* Set caching of external RAM used by Linux */ /* Set caching of external RAM used by Linux */
for_each_memblock(memory, reg) for_each_mem_range(i, &start, &end)
enable_caching(CACHE_REGION_START(reg->base), enable_caching(CACHE_REGION_START(start),
CACHE_REGION_START(reg->base + reg->size - 1)); CACHE_REGION_START(end - 1));
#ifdef CONFIG_BLK_DEV_INITRD #ifdef CONFIG_BLK_DEV_INITRD
/* /*

View File

@ -74,17 +74,15 @@ static void __init bootmem_init(void)
memory_end = memory_start = 0; memory_end = memory_start = 0;
/* Find main memory where is the kernel */ /* Find main memory where is the kernel */
for_each_memblock(memory, region) { memory_start = memblock_start_of_DRAM();
memory_start = region->base; memory_end = memblock_end_of_DRAM();
memory_end = region->base + region->size;
}
if (!memory_end) if (!memory_end)
panic("No memory!"); panic("No memory!");
/* setup bootmem globals (we use no_bootmem, but mm still depends on this) */ /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */
min_low_pfn = PFN_UP(memory_start); min_low_pfn = PFN_UP(memory_start);
max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); max_low_pfn = PFN_DOWN(memory_end);
max_pfn = max_low_pfn; max_pfn = max_low_pfn;
memblock_reserve(__pa(_stext), _end - _stext); memblock_reserve(__pa(_stext), _end - _stext);

View File

@ -108,15 +108,15 @@ static void __init paging_init(void)
void __init setup_memory(void) void __init setup_memory(void)
{ {
struct memblock_region *reg;
#ifndef CONFIG_MMU #ifndef CONFIG_MMU
u32 kernel_align_start, kernel_align_size; u32 kernel_align_start, kernel_align_size;
phys_addr_t start, end;
u64 i;
/* Find main memory where is the kernel */ /* Find main memory where is the kernel */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
memory_start = (u32)reg->base; memory_start = start;
lowmem_size = reg->size; lowmem_size = end - start;
if ((memory_start <= (u32)_text) && if ((memory_start <= (u32)_text) &&
((u32)_text <= (memory_start + lowmem_size - 1))) { ((u32)_text <= (memory_start + lowmem_size - 1))) {
memory_size = lowmem_size; memory_size = lowmem_size;
@ -164,17 +164,6 @@ void __init setup_memory(void)
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
/* Add active regions with valid PFNs */
for_each_memblock(memory, reg) {
unsigned long start_pfn, end_pfn;
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
memblock_set_node(start_pfn << PAGE_SHIFT,
(end_pfn - start_pfn) << PAGE_SHIFT,
&memblock.memory, 0);
}
paging_init(); paging_init();
} }

View File

@ -190,25 +190,25 @@ char *octeon_swiotlb;
void __init plat_swiotlb_setup(void) void __init plat_swiotlb_setup(void)
{ {
struct memblock_region *mem; phys_addr_t start, end;
phys_addr_t max_addr; phys_addr_t max_addr;
phys_addr_t addr_size; phys_addr_t addr_size;
size_t swiotlbsize; size_t swiotlbsize;
unsigned long swiotlb_nslabs; unsigned long swiotlb_nslabs;
u64 i;
max_addr = 0; max_addr = 0;
addr_size = 0; addr_size = 0;
for_each_memblock(memory, mem) { for_each_mem_range(i, &start, &end) {
/* These addresses map low for PCI. */ /* These addresses map low for PCI. */
if (mem->base > 0x410000000ull && !OCTEON_IS_OCTEON2()) if (start > 0x410000000ull && !OCTEON_IS_OCTEON2())
continue; continue;
addr_size += mem->size; addr_size += (end - start);
if (max_addr < mem->base + mem->size)
max_addr = mem->base + mem->size;
if (max_addr < end)
max_addr = end;
} }
swiotlbsize = PAGE_SIZE; swiotlbsize = PAGE_SIZE;

View File

@ -300,8 +300,9 @@ static void __init bootmem_init(void)
static void __init bootmem_init(void) static void __init bootmem_init(void)
{ {
struct memblock_region *mem;
phys_addr_t ramstart, ramend; phys_addr_t ramstart, ramend;
phys_addr_t start, end;
u64 i;
ramstart = memblock_start_of_DRAM(); ramstart = memblock_start_of_DRAM();
ramend = memblock_end_of_DRAM(); ramend = memblock_end_of_DRAM();
@ -338,18 +339,13 @@ static void __init bootmem_init(void)
min_low_pfn = ARCH_PFN_OFFSET; min_low_pfn = ARCH_PFN_OFFSET;
max_pfn = PFN_DOWN(ramend); max_pfn = PFN_DOWN(ramend);
for_each_memblock(memory, mem) { for_each_mem_range(i, &start, &end) {
unsigned long start = memblock_region_memory_base_pfn(mem);
unsigned long end = memblock_region_memory_end_pfn(mem);
/* /*
* Skip highmem here so we get an accurate max_low_pfn if low * Skip highmem here so we get an accurate max_low_pfn if low
* memory stops short of high memory. * memory stops short of high memory.
* If the region overlaps HIGHMEM_START, end is clipped so * If the region overlaps HIGHMEM_START, end is clipped so
* max_pfn excludes the highmem portion. * max_pfn excludes the highmem portion.
*/ */
if (memblock_is_nomap(mem))
continue;
if (start >= PFN_DOWN(HIGHMEM_START)) if (start >= PFN_DOWN(HIGHMEM_START))
continue; continue;
if (end > PFN_DOWN(HIGHMEM_START)) if (end > PFN_DOWN(HIGHMEM_START))
@ -450,13 +446,12 @@ early_param("memmap", early_parse_memmap);
unsigned long setup_elfcorehdr, setup_elfcorehdr_size; unsigned long setup_elfcorehdr, setup_elfcorehdr_size;
static int __init early_parse_elfcorehdr(char *p) static int __init early_parse_elfcorehdr(char *p)
{ {
struct memblock_region *mem; phys_addr_t start, end;
u64 i;
setup_elfcorehdr = memparse(p, &p); setup_elfcorehdr = memparse(p, &p);
for_each_memblock(memory, mem) { for_each_mem_range(i, &start, &end) {
unsigned long start = mem->base;
unsigned long end = start + mem->size;
if (setup_elfcorehdr >= start && setup_elfcorehdr < end) { if (setup_elfcorehdr >= start && setup_elfcorehdr < end) {
/* /*
* Reserve from the elf core header to the end of * Reserve from the elf core header to the end of
@ -720,7 +715,8 @@ static void __init arch_mem_init(char **cmdline_p)
static void __init resource_init(void) static void __init resource_init(void)
{ {
struct memblock_region *region; phys_addr_t start, end;
u64 i;
if (UNCAC_BASE != IO_BASE) if (UNCAC_BASE != IO_BASE)
return; return;
@ -732,9 +728,7 @@ static void __init resource_init(void)
bss_resource.start = __pa_symbol(&__bss_start); bss_resource.start = __pa_symbol(&__bss_start);
bss_resource.end = __pa_symbol(&__bss_stop) - 1; bss_resource.end = __pa_symbol(&__bss_stop) - 1;
for_each_memblock(memory, region) { for_each_mem_range(i, &start, &end) {
phys_addr_t start = PFN_PHYS(memblock_region_memory_base_pfn(region));
phys_addr_t end = PFN_PHYS(memblock_region_memory_end_pfn(region)) - 1;
struct resource *res; struct resource *res;
res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);
@ -743,7 +737,12 @@ static void __init resource_init(void)
sizeof(struct resource)); sizeof(struct resource));
res->start = start; res->start = start;
res->end = end; /*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res->end = end - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
res->name = "System RAM"; res->name = "System RAM";

View File

@ -70,7 +70,7 @@ static void nlm_fixup_mem(void)
const int pref_backup = 512; const int pref_backup = 512;
struct memblock_region *mem; struct memblock_region *mem;
for_each_memblock(memory, mem) { for_each_mem_region(mem) {
memblock_remove(mem->base + mem->size - pref_backup, memblock_remove(mem->base + mem->size - pref_backup,
pref_backup); pref_backup);
} }

View File

@ -249,12 +249,8 @@ static void __init setup_memory(void)
memory_end = memory_start = 0; memory_end = memory_start = 0;
/* Find main memory where is the kernel */ /* Find main memory where is the kernel */
for_each_memblock(memory, region) { memory_start = memblock_start_of_DRAM();
memory_start = region->base; memory_end = memblock_end_of_DRAM();
memory_end = region->base + region->size;
pr_info("%s: Memory: 0x%x-0x%x\n", __func__,
memory_start, memory_end);
}
if (!memory_end) { if (!memory_end) {
panic("No memory!"); panic("No memory!");

View File

@ -48,17 +48,12 @@ static void __init setup_memory(void)
unsigned long ram_start_pfn; unsigned long ram_start_pfn;
unsigned long ram_end_pfn; unsigned long ram_end_pfn;
phys_addr_t memory_start, memory_end; phys_addr_t memory_start, memory_end;
struct memblock_region *region;
memory_end = memory_start = 0; memory_end = memory_start = 0;
/* Find main memory where is the kernel, we assume its the only one */ /* Find main memory where is the kernel, we assume its the only one */
for_each_memblock(memory, region) { memory_start = memblock_start_of_DRAM();
memory_start = region->base; memory_end = memblock_end_of_DRAM();
memory_end = region->base + region->size;
printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
memory_start, memory_end);
}
if (!memory_end) { if (!memory_end) {
panic("No memory!"); panic("No memory!");

View File

@ -64,6 +64,7 @@ extern const char _s_kernel_ro[], _e_kernel_ro[];
*/ */
static void __init map_ram(void) static void __init map_ram(void)
{ {
phys_addr_t start, end;
unsigned long v, p, e; unsigned long v, p, e;
pgprot_t prot; pgprot_t prot;
pgd_t *pge; pgd_t *pge;
@ -71,6 +72,7 @@ static void __init map_ram(void)
pud_t *pue; pud_t *pue;
pmd_t *pme; pmd_t *pme;
pte_t *pte; pte_t *pte;
u64 i;
/* These mark extents of read-only kernel pages... /* These mark extents of read-only kernel pages...
* ...from vmlinux.lds.S * ...from vmlinux.lds.S
*/ */
@ -78,9 +80,9 @@ static void __init map_ram(void)
v = PAGE_OFFSET; v = PAGE_OFFSET;
for_each_memblock(memory, region) { for_each_mem_range(i, &start, &end) {
p = (u32) region->base & PAGE_MASK; p = (u32) start & PAGE_MASK;
e = p + (u32) region->size; e = (u32) end;
v = (u32) __va(p); v = (u32) __va(p);
pge = pgd_offset_k(v); pge = pgd_offset_k(v);

View File

@ -191,13 +191,13 @@ int is_fadump_active(void)
*/ */
static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end) static bool is_fadump_mem_area_contiguous(u64 d_start, u64 d_end)
{ {
struct memblock_region *reg; phys_addr_t reg_start, reg_end;
bool ret = false; bool ret = false;
u64 start, end; u64 i, start, end;
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
start = max_t(u64, d_start, reg->base); start = max_t(u64, d_start, reg_start);
end = min_t(u64, d_end, (reg->base + reg->size)); end = min_t(u64, d_end, reg_end);
if (d_start < end) { if (d_start < end) {
/* Memory hole from d_start to start */ /* Memory hole from d_start to start */
if (start > d_start) if (start > d_start)
@ -422,34 +422,34 @@ static int __init add_boot_mem_regions(unsigned long mstart,
static int __init fadump_get_boot_mem_regions(void) static int __init fadump_get_boot_mem_regions(void)
{ {
unsigned long base, size, cur_size, hole_size, last_end; unsigned long size, cur_size, hole_size, last_end;
unsigned long mem_size = fw_dump.boot_memory_size; unsigned long mem_size = fw_dump.boot_memory_size;
struct memblock_region *reg; phys_addr_t reg_start, reg_end;
int ret = 1; int ret = 1;
u64 i;
fw_dump.boot_mem_regs_cnt = 0; fw_dump.boot_mem_regs_cnt = 0;
last_end = 0; last_end = 0;
hole_size = 0; hole_size = 0;
cur_size = 0; cur_size = 0;
for_each_memblock(memory, reg) { for_each_mem_range(i, &reg_start, &reg_end) {
base = reg->base; size = reg_end - reg_start;
size = reg->size; hole_size += (reg_start - last_end);
hole_size += (base - last_end);
if ((cur_size + size) >= mem_size) { if ((cur_size + size) >= mem_size) {
size = (mem_size - cur_size); size = (mem_size - cur_size);
ret = add_boot_mem_regions(base, size); ret = add_boot_mem_regions(reg_start, size);
break; break;
} }
mem_size -= size; mem_size -= size;
cur_size += size; cur_size += size;
ret = add_boot_mem_regions(base, size); ret = add_boot_mem_regions(reg_start, size);
if (!ret) if (!ret)
break; break;
last_end = base + size; last_end = reg_end;
} }
fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size); fw_dump.boot_mem_top = PAGE_ALIGN(fw_dump.boot_memory_size + hole_size);
@ -985,9 +985,8 @@ static int fadump_init_elfcore_header(char *bufp)
*/ */
static int fadump_setup_crash_memory_ranges(void) static int fadump_setup_crash_memory_ranges(void)
{ {
struct memblock_region *reg; u64 i, start, end;
u64 start, end; int ret;
int i, ret;
pr_debug("Setup crash memory ranges.\n"); pr_debug("Setup crash memory ranges.\n");
crash_mrange_info.mem_range_cnt = 0; crash_mrange_info.mem_range_cnt = 0;
@ -1005,10 +1004,7 @@ static int fadump_setup_crash_memory_ranges(void)
return ret; return ret;
} }
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
start = (u64)reg->base;
end = start + (u64)reg->size;
/* /*
* skip the memory chunk that is already added * skip the memory chunk that is already added
* (0 through boot_memory_top). * (0 through boot_memory_top).
@ -1242,14 +1238,17 @@ static void fadump_free_reserved_memory(unsigned long start_pfn,
*/ */
static void fadump_release_reserved_area(u64 start, u64 end) static void fadump_release_reserved_area(u64 start, u64 end)
{ {
unsigned long reg_spfn, reg_epfn;
u64 tstart, tend, spfn, epfn; u64 tstart, tend, spfn, epfn;
struct memblock_region *reg; int i;
spfn = PHYS_PFN(start); spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end); epfn = PHYS_PFN(end);
for_each_memblock(memory, reg) {
tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg)); for_each_mem_pfn_range(i, MAX_NUMNODES, &reg_spfn, &reg_epfn, NULL) {
tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg)); tstart = max_t(u64, spfn, reg_spfn);
tend = min_t(u64, epfn, reg_epfn);
if (tstart < tend) { if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend); fadump_free_reserved_memory(tstart, tend);
@ -1684,12 +1683,10 @@ int __init fadump_reserve_mem(void)
/* Preserve everything above the base address */ /* Preserve everything above the base address */
static void __init fadump_reserve_crash_area(u64 base) static void __init fadump_reserve_crash_area(u64 base)
{ {
struct memblock_region *reg; u64 i, mstart, mend, msize;
u64 mstart, msize;
for_each_memblock(memory, reg) { for_each_mem_range(i, &mstart, &mend) {
mstart = reg->base; msize = mend - mstart;
msize = reg->size;
if ((mstart + msize) < base) if ((mstart + msize) < base)
continue; continue;

View File

@ -138,15 +138,13 @@ out:
*/ */
static int get_crash_memory_ranges(struct crash_mem **mem_ranges) static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
{ {
struct memblock_region *reg; phys_addr_t base, end;
struct crash_mem *tmem; struct crash_mem *tmem;
u64 i;
int ret; int ret;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
u64 base, size; u64 size = end - base;
base = (u64)reg->base;
size = (u64)reg->size;
/* Skip backup memory region, which needs a separate entry */ /* Skip backup memory region, which needs a separate entry */
if (base == BACKUP_SRC_START) { if (base == BACKUP_SRC_START) {
@ -250,8 +248,7 @@ static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
phys_addr_t start, end; phys_addr_t start, end;
u64 i; u64 i;
for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range_rev(i, &start, &end) {
MEMBLOCK_NONE, &start, &end, NULL) {
/* /*
* memblock uses [start, end) convention while it is * memblock uses [start, end) convention while it is
* [start, end] here. Fix the off-by-one to have the * [start, end] here. Fix the off-by-one to have the
@ -350,8 +347,7 @@ static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
phys_addr_t start, end; phys_addr_t start, end;
u64 i; u64 i;
for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, for_each_mem_range(i, &start, &end) {
MEMBLOCK_NONE, &start, &end, NULL) {
/* /*
* memblock uses [start, end) convention while it is * memblock uses [start, end) convention while it is
* [start, end] here. Fix the off-by-one to have the * [start, end] here. Fix the off-by-one to have the

View File

@ -95,23 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
void __init kvm_cma_reserve(void) void __init kvm_cma_reserve(void)
{ {
unsigned long align_size; unsigned long align_size;
struct memblock_region *reg; phys_addr_t selected_size;
phys_addr_t selected_size = 0;
/* /*
* We need CMA reservation only when we are in HV mode * We need CMA reservation only when we are in HV mode
*/ */
if (!cpu_has_feature(CPU_FTR_HVMODE)) if (!cpu_has_feature(CPU_FTR_HVMODE))
return; return;
/*
* We cannot use memblock_phys_mem_size() here, because
* memblock_analyze() has not been called yet.
*/
for_each_memblock(memory, reg)
selected_size += memblock_region_memory_end_pfn(reg) -
memblock_region_memory_base_pfn(reg);
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100);
if (selected_size) { if (selected_size) {
pr_info("%s: reserving %ld MiB for global area\n", __func__, pr_info("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M); (unsigned long)selected_size / SZ_1M);

View File

@ -687,9 +687,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
struct kvmppc_uvmem_page_pvt *pvt; struct kvmppc_uvmem_page_pvt *pvt;
unsigned long pfn_last, pfn_first; unsigned long pfn_last, pfn_first;
pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT; pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
pfn_last = pfn_first + pfn_last = pfn_first +
(resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT); (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
spin_lock(&kvmppc_uvmem_bitmap_lock); spin_lock(&kvmppc_uvmem_bitmap_lock);
bit = find_first_zero_bit(kvmppc_uvmem_bitmap, bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
@ -1007,7 +1007,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
static void kvmppc_uvmem_page_free(struct page *page) static void kvmppc_uvmem_page_free(struct page *page)
{ {
unsigned long pfn = page_to_pfn(page) - unsigned long pfn = page_to_pfn(page) -
(kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT); (kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
struct kvmppc_uvmem_page_pvt *pvt; struct kvmppc_uvmem_page_pvt *pvt;
spin_lock(&kvmppc_uvmem_bitmap_lock); spin_lock(&kvmppc_uvmem_bitmap_lock);
@ -1170,7 +1170,9 @@ int kvmppc_uvmem_init(void)
} }
kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE; kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
kvmppc_uvmem_pgmap.res = *res; kvmppc_uvmem_pgmap.range.start = res->start;
kvmppc_uvmem_pgmap.range.end = res->end;
kvmppc_uvmem_pgmap.nr_range = 1;
kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops; kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
/* just one global instance: */ /* just one global instance: */
kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap; kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
@ -1205,7 +1207,7 @@ void kvmppc_uvmem_free(void)
return; return;
memunmap_pages(&kvmppc_uvmem_pgmap); memunmap_pages(&kvmppc_uvmem_pgmap);
release_mem_region(kvmppc_uvmem_pgmap.res.start, release_mem_region(kvmppc_uvmem_pgmap.range.start,
resource_size(&kvmppc_uvmem_pgmap.res)); range_len(&kvmppc_uvmem_pgmap.range));
kfree(kvmppc_uvmem_bitmap); kfree(kvmppc_uvmem_bitmap);
} }

View File

@ -7,7 +7,7 @@
* *
* SMP scalability work: * SMP scalability work:
* Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
* *
* Module name: htab.c * Module name: htab.c
* *
* Description: * Description:
@ -867,8 +867,8 @@ static void __init htab_initialize(void)
unsigned long table; unsigned long table;
unsigned long pteg_count; unsigned long pteg_count;
unsigned long prot; unsigned long prot;
unsigned long base = 0, size = 0; phys_addr_t base = 0, size = 0, end;
struct memblock_region *reg; u64 i;
DBG(" -> htab_initialize()\n"); DBG(" -> htab_initialize()\n");
@ -884,7 +884,7 @@ static void __init htab_initialize(void)
/* /*
* Calculate the required size of the htab. We want the number of * Calculate the required size of the htab. We want the number of
* PTEGs to equal one half the number of real pages. * PTEGs to equal one half the number of real pages.
*/ */
htab_size_bytes = htab_get_table_size(); htab_size_bytes = htab_get_table_size();
pteg_count = htab_size_bytes >> 7; pteg_count = htab_size_bytes >> 7;
@ -894,7 +894,7 @@ static void __init htab_initialize(void)
firmware_has_feature(FW_FEATURE_PS3_LV1)) { firmware_has_feature(FW_FEATURE_PS3_LV1)) {
/* Using a hypervisor which owns the htab */ /* Using a hypervisor which owns the htab */
htab_address = NULL; htab_address = NULL;
_SDR1 = 0; _SDR1 = 0;
#ifdef CONFIG_FA_DUMP #ifdef CONFIG_FA_DUMP
/* /*
* If firmware assisted dump is active firmware preserves * If firmware assisted dump is active firmware preserves
@ -960,9 +960,9 @@ static void __init htab_initialize(void)
#endif /* CONFIG_DEBUG_PAGEALLOC */ #endif /* CONFIG_DEBUG_PAGEALLOC */
/* create bolted the linear mapping in the hash table */ /* create bolted the linear mapping in the hash table */
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
base = (unsigned long)__va(reg->base); size = end - base;
size = reg->size; base = (unsigned long)__va(base);
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n", DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot); base, size, prot);

View File

@ -329,7 +329,8 @@ static int __meminit create_physical_mapping(unsigned long start,
static void __init radix_init_pgtable(void) static void __init radix_init_pgtable(void)
{ {
unsigned long rts_field; unsigned long rts_field;
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
/* We don't support slb for radix */ /* We don't support slb for radix */
mmu_slb_size = 0; mmu_slb_size = 0;
@ -337,20 +338,19 @@ static void __init radix_init_pgtable(void)
/* /*
* Create the linear mapping * Create the linear mapping
*/ */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
/* /*
* The memblock allocator is up at this point, so the * The memblock allocator is up at this point, so the
* page tables will be allocated within the range. No * page tables will be allocated within the range. No
* need or a node (which we don't have yet). * need or a node (which we don't have yet).
*/ */
if ((reg->base + reg->size) >= RADIX_VMALLOC_START) { if (end >= RADIX_VMALLOC_START) {
pr_warn("Outside the supported range\n"); pr_warn("Outside the supported range\n");
continue; continue;
} }
WARN_ON(create_physical_mapping(reg->base, WARN_ON(create_physical_mapping(start, end,
reg->base + reg->size,
radix_mem_block_size, radix_mem_block_size,
-1, PAGE_KERNEL)); -1, PAGE_KERNEL));
} }

View File

@ -138,11 +138,11 @@ void __init kasan_mmu_init(void)
void __init kasan_init(void) void __init kasan_init(void)
{ {
struct memblock_region *reg; phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
phys_addr_t base = reg->base; phys_addr_t top = min(end, total_lowmem);
phys_addr_t top = min(base + reg->size, total_lowmem);
int ret; int ret;
if (base >= top) if (base >= top)

View File

@ -184,15 +184,16 @@ void __init initmem_init(void)
/* mark pages that don't exist as nosave */ /* mark pages that don't exist as nosave */
static int __init mark_nonram_nosave(void) static int __init mark_nonram_nosave(void)
{ {
struct memblock_region *reg, *prev = NULL; unsigned long spfn, epfn, prev = 0;
int i;
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &spfn, &epfn, NULL) {
if (prev && if (prev && prev < spfn)
memblock_region_memory_end_pfn(prev) < memblock_region_memory_base_pfn(reg)) register_nosave_region(prev, spfn);
register_nosave_region(memblock_region_memory_end_pfn(prev),
memblock_region_memory_base_pfn(reg)); prev = epfn;
prev = reg;
} }
return 0; return 0;
} }
#else /* CONFIG_NEED_MULTIPLE_NODES */ #else /* CONFIG_NEED_MULTIPLE_NODES */
@ -584,20 +585,24 @@ void flush_icache_user_page(struct vm_area_struct *vma, struct page *page,
*/ */
static int __init add_system_ram_resources(void) static int __init add_system_ram_resources(void)
{ {
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
struct resource *res; struct resource *res;
unsigned long base = reg->base;
unsigned long size = reg->size;
res = kzalloc(sizeof(struct resource), GFP_KERNEL); res = kzalloc(sizeof(struct resource), GFP_KERNEL);
WARN_ON(!res); WARN_ON(!res);
if (res) { if (res) {
res->name = "System RAM"; res->name = "System RAM";
res->start = base; res->start = start;
res->end = base + size - 1; /*
* In memblock, end points to the first byte after
* the range while in resourses, end points to the
* last byte in the range.
*/
res->end = end - 1;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
WARN_ON(request_resource(&iomem_resource, res) < 0); WARN_ON(request_resource(&iomem_resource, res) < 0);
} }

View File

@ -804,17 +804,14 @@ static void __init setup_nonnuma(void)
unsigned long total_ram = memblock_phys_mem_size(); unsigned long total_ram = memblock_phys_mem_size();
unsigned long start_pfn, end_pfn; unsigned long start_pfn, end_pfn;
unsigned int nid = 0; unsigned int nid = 0;
struct memblock_region *reg; int i;
printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n", printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
top_of_ram, total_ram); top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n", printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20); (top_of_ram - total_ram) >> 20);
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL) {
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
fake_numa_create_new_node(end_pfn, &nid); fake_numa_create_new_node(end_pfn, &nid);
memblock_set_node(PFN_PHYS(start_pfn), memblock_set_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn), PFN_PHYS(end_pfn - start_pfn),

View File

@ -123,11 +123,11 @@ static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
void __init mapin_ram(void) void __init mapin_ram(void)
{ {
struct memblock_region *reg; phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) { for_each_mem_range(i, &base, &end) {
phys_addr_t base = reg->base; phys_addr_t top = min(end, total_lowmem);
phys_addr_t top = min(base + reg->size, total_lowmem);
if (base >= top) if (base >= top)
continue; continue;

View File

@ -145,21 +145,21 @@ static phys_addr_t dtb_early_pa __initdata;
void __init setup_bootmem(void) void __init setup_bootmem(void)
{ {
struct memblock_region *reg;
phys_addr_t mem_size = 0; phys_addr_t mem_size = 0;
phys_addr_t total_mem = 0; phys_addr_t total_mem = 0;
phys_addr_t mem_start, end = 0; phys_addr_t mem_start, start, end = 0;
phys_addr_t vmlinux_end = __pa_symbol(&_end); phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start); phys_addr_t vmlinux_start = __pa_symbol(&_start);
u64 i;
/* Find the memory region containing the kernel */ /* Find the memory region containing the kernel */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
end = reg->base + reg->size; phys_addr_t size = end - start;
if (!total_mem) if (!total_mem)
mem_start = reg->base; mem_start = start;
if (reg->base <= vmlinux_start && vmlinux_end <= end) if (start <= vmlinux_start && vmlinux_end <= end)
BUG_ON(reg->size == 0); BUG_ON(size == 0);
total_mem = total_mem + reg->size; total_mem = total_mem + size;
} }
/* /*
@ -191,15 +191,6 @@ void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem(); early_init_fdt_scan_reserved_mem();
memblock_allow_resize(); memblock_allow_resize();
memblock_dump_all(); memblock_dump_all();
for_each_memblock(memory, reg) {
unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
memblock_set_node(PFN_PHYS(start_pfn),
PFN_PHYS(end_pfn - start_pfn),
&memblock.memory, 0);
}
} }
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
@ -464,7 +455,7 @@ static void __init setup_vm_final(void)
{ {
uintptr_t va, map_size; uintptr_t va, map_size;
phys_addr_t pa, start, end; phys_addr_t pa, start, end;
struct memblock_region *reg; u64 i;
/* Set mmu_enabled flag */ /* Set mmu_enabled flag */
mmu_enabled = true; mmu_enabled = true;
@ -475,14 +466,9 @@ static void __init setup_vm_final(void)
PGDIR_SIZE, PAGE_TABLE); PGDIR_SIZE, PAGE_TABLE);
/* Map all memory banks */ /* Map all memory banks */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
start = reg->base;
end = start + reg->size;
if (start >= end) if (start >= end)
break; break;
if (memblock_is_nomap(reg))
continue;
if (start <= __pa(PAGE_OFFSET) && if (start <= __pa(PAGE_OFFSET) &&
__pa(PAGE_OFFSET) < end) __pa(PAGE_OFFSET) < end)
start = __pa(PAGE_OFFSET); start = __pa(PAGE_OFFSET);
@ -545,7 +531,7 @@ static void __init resource_init(void)
{ {
struct memblock_region *region; struct memblock_region *region;
for_each_memblock(memory, region) { for_each_mem_region(region) {
struct resource *res; struct resource *res;
res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES); res = memblock_alloc(sizeof(struct resource), SMP_CACHE_BYTES);

View File

@ -85,16 +85,16 @@ static void __init populate(void *start, void *end)
void __init kasan_init(void) void __init kasan_init(void)
{ {
struct memblock_region *reg; phys_addr_t _start, _end;
unsigned long i; u64 i;
kasan_populate_early_shadow((void *)KASAN_SHADOW_START, kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
(void *)kasan_mem_to_shadow((void *) (void *)kasan_mem_to_shadow((void *)
VMALLOC_END)); VMALLOC_END));
for_each_memblock(memory, reg) { for_each_mem_range(i, &_start, &_end) {
void *start = (void *)__va(reg->base); void *start = (void *)_start;
void *end = (void *)__va(reg->base + reg->size); void *end = (void *)_end;
if (start >= end) if (start >= end)
break; break;

View File

@ -484,8 +484,9 @@ static struct resource __initdata *standard_resources[] = {
static void __init setup_resources(void) static void __init setup_resources(void)
{ {
struct resource *res, *std_res, *sub_res; struct resource *res, *std_res, *sub_res;
struct memblock_region *reg; phys_addr_t start, end;
int j; int j;
u64 i;
code_resource.start = (unsigned long) _text; code_resource.start = (unsigned long) _text;
code_resource.end = (unsigned long) _etext - 1; code_resource.end = (unsigned long) _etext - 1;
@ -494,7 +495,7 @@ static void __init setup_resources(void)
bss_resource.start = (unsigned long) __bss_start; bss_resource.start = (unsigned long) __bss_start;
bss_resource.end = (unsigned long) __bss_stop - 1; bss_resource.end = (unsigned long) __bss_stop - 1;
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
res = memblock_alloc(sizeof(*res), 8); res = memblock_alloc(sizeof(*res), 8);
if (!res) if (!res)
panic("%s: Failed to allocate %zu bytes align=0x%x\n", panic("%s: Failed to allocate %zu bytes align=0x%x\n",
@ -502,8 +503,13 @@ static void __init setup_resources(void)
res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM; res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
res->name = "System RAM"; res->name = "System RAM";
res->start = reg->base; res->start = start;
res->end = reg->base + reg->size - 1; /*
* In memblock, end points to the first byte after the
* range while in resourses, end points to the last byte in
* the range.
*/
res->end = end - 1;
request_resource(&iomem_resource, res); request_resource(&iomem_resource, res);
for (j = 0; j < ARRAY_SIZE(standard_resources); j++) { for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
@ -776,8 +782,8 @@ static void __init memblock_add_mem_detect_info(void)
unsigned long start, end; unsigned long start, end;
int i; int i;
memblock_dbg("physmem info source: %s (%hhd)\n", pr_debug("physmem info source: %s (%hhd)\n",
get_mem_info_source(), mem_detect.info_source); get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */ /* keep memblock lists close to the kernel */
memblock_set_bottom_up(true); memblock_set_bottom_up(true);
for_each_mem_detect_block(i, &start, &end) { for_each_mem_detect_block(i, &start, &end) {
@ -819,14 +825,15 @@ static void __init reserve_kernel(void)
static void __init setup_memory(void) static void __init setup_memory(void)
{ {
struct memblock_region *reg; phys_addr_t start, end;
u64 i;
/* /*
* Init storage key for present memory * Init storage key for present memory
*/ */
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end)
storage_key_init_range(reg->base, reg->base + reg->size); storage_key_init_range(start, end);
}
psw_set_key(PAGE_DEFAULT_KEY); psw_set_key(PAGE_DEFAULT_KEY);
/* Only cosmetics */ /* Only cosmetics */

View File

@ -183,9 +183,9 @@ static void mark_kernel_pgd(void)
void __init cmma_init_nodat(void) void __init cmma_init_nodat(void)
{ {
struct memblock_region *reg;
struct page *page; struct page *page;
unsigned long start, end, ix; unsigned long start, end, ix;
int i;
if (cmma_flag < 2) if (cmma_flag < 2)
return; return;
@ -193,9 +193,7 @@ void __init cmma_init_nodat(void)
mark_kernel_pgd(); mark_kernel_pgd();
/* Set all kernel pages not used for page tables to stable/no-dat */ /* Set all kernel pages not used for page tables to stable/no-dat */
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start, &end, NULL) {
start = memblock_region_memory_base_pfn(reg);
end = memblock_region_memory_end_pfn(reg);
page = pfn_to_page(start); page = pfn_to_page(start);
for (ix = start; ix < end; ix++, page++) { for (ix = start; ix < end; ix++, page++) {
if (__test_and_clear_bit(PG_arch_1, &page->flags)) if (__test_and_clear_bit(PG_arch_1, &page->flags))

View File

@ -555,10 +555,11 @@ int vmem_add_mapping(unsigned long start, unsigned long size)
*/ */
void __init vmem_map_init(void) void __init vmem_map_init(void)
{ {
struct memblock_region *reg; phys_addr_t base, end;
u64 i;
for_each_memblock(memory, reg) for_each_mem_range(i, &base, &end)
vmem_add_range(reg->base, reg->size); vmem_add_range(base, end - base);
__set_memory((unsigned long)_stext, __set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT, (unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X); SET_MEMORY_RO | SET_MEMORY_X);

View File

@ -226,15 +226,12 @@ void __init allocate_pgdat(unsigned int nid)
static void __init do_init_bootmem(void) static void __init do_init_bootmem(void)
{ {
struct memblock_region *reg; unsigned long start_pfn, end_pfn;
int i;
/* Add active regions with valid PFNs. */ /* Add active regions with valid PFNs. */
for_each_memblock(memory, reg) { for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, NULL)
unsigned long start_pfn, end_pfn;
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);
__add_active_range(0, start_pfn, end_pfn); __add_active_range(0, start_pfn, end_pfn);
}
/* All of system RAM sits in node 0 for the non-NUMA case */ /* All of system RAM sits in node 0 for the non-NUMA case */
allocate_pgdat(0); allocate_pgdat(0);

View File

@ -1192,18 +1192,14 @@ int of_node_to_nid(struct device_node *dp)
static void __init add_node_ranges(void) static void __init add_node_ranges(void)
{ {
struct memblock_region *reg; phys_addr_t start, end;
unsigned long prev_max; unsigned long prev_max;
u64 i;
memblock_resized: memblock_resized:
prev_max = memblock.memory.max; prev_max = memblock.memory.max;
for_each_memblock(memory, reg) { for_each_mem_range(i, &start, &end) {
unsigned long size = reg->size;
unsigned long start, end;
start = reg->base;
end = start + size;
while (start < end) { while (start < end) {
unsigned long this_end; unsigned long this_end;
int nid; int nid;
@ -1211,7 +1207,7 @@ memblock_resized:
this_end = memblock_nid_range(start, end, &nid); this_end = memblock_nid_range(start, end, &nid);
numadbg("Setting memblock NUMA node nid[%d] " numadbg("Setting memblock NUMA node nid[%d] "
"start[%lx] end[%lx]\n", "start[%llx] end[%lx]\n",
nid, start, this_end); nid, start, this_end);
memblock_set_node(start, this_end - start, memblock_set_node(start, this_end - start,

View File

@ -3,6 +3,7 @@
#define _ASM_X86_NUMA_H #define _ASM_X86_NUMA_H
#include <linux/nodemask.h> #include <linux/nodemask.h>
#include <linux/errno.h>
#include <asm/topology.h> #include <asm/topology.h>
#include <asm/apicdef.h> #include <asm/apicdef.h>
@ -77,7 +78,12 @@ void debug_cpumask_set_cpu(int cpu, int node, bool enable);
#ifdef CONFIG_NUMA_EMU #ifdef CONFIG_NUMA_EMU
#define FAKE_NODE_MIN_SIZE ((u64)32 << 20) #define FAKE_NODE_MIN_SIZE ((u64)32 << 20)
#define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL)) #define FAKE_NODE_MIN_HASH_MASK (~(FAKE_NODE_MIN_SIZE - 1UL))
void numa_emu_cmdline(char *); int numa_emu_cmdline(char *str);
#else /* CONFIG_NUMA_EMU */
static inline int numa_emu_cmdline(char *str)
{
return -EINVAL;
}
#endif /* CONFIG_NUMA_EMU */ #endif /* CONFIG_NUMA_EMU */
#endif /* _ASM_X86_NUMA_H */ #endif /* _ASM_X86_NUMA_H */

View File

@ -305,6 +305,20 @@ static int __init cpcompare(const void *a, const void *b)
return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr); return (ap->addr != ap->entry->addr) - (bp->addr != bp->entry->addr);
} }
static bool e820_nomerge(enum e820_type type)
{
/*
* These types may indicate distinct platform ranges aligned to
* numa node, protection domain, performance domain, or other
* boundaries. Do not merge them.
*/
if (type == E820_TYPE_PRAM)
return true;
if (type == E820_TYPE_SOFT_RESERVED)
return true;
return false;
}
int __init e820__update_table(struct e820_table *table) int __init e820__update_table(struct e820_table *table)
{ {
struct e820_entry *entries = table->entries; struct e820_entry *entries = table->entries;
@ -380,7 +394,7 @@ int __init e820__update_table(struct e820_table *table)
} }
/* Continue building up new map based on this information: */ /* Continue building up new map based on this information: */
if (current_type != last_type || current_type == E820_TYPE_PRAM) { if (current_type != last_type || e820_nomerge(current_type)) {
if (last_type != 0) { if (last_type != 0) {
new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr; new_entries[new_nr_entries].size = change_point[chg_idx]->addr - last_addr;
/* Move forward only if the new size was non-zero: */ /* Move forward only if the new size was non-zero: */

View File

@ -264,16 +264,12 @@ static void __init relocate_initrd(void)
u64 area_size = PAGE_ALIGN(ramdisk_size); u64 area_size = PAGE_ALIGN(ramdisk_size);
/* We need to move the initrd down into directly mapped mem */ /* We need to move the initrd down into directly mapped mem */
relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0,
area_size, PAGE_SIZE); PFN_PHYS(max_pfn_mapped));
if (!relocated_ramdisk) if (!relocated_ramdisk)
panic("Cannot find place for new RAMDISK of size %lld\n", panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size); ramdisk_size);
/* Note: this includes all the mem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
memblock_reserve(relocated_ramdisk, area_size);
initrd_start = relocated_ramdisk + PAGE_OFFSET; initrd_start = relocated_ramdisk + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size; initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
@ -300,13 +296,13 @@ static void __init early_reserve_initrd(void)
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
} }
static void __init reserve_initrd(void) static void __init reserve_initrd(void)
{ {
/* Assume only end is not page aligned */ /* Assume only end is not page aligned */
u64 ramdisk_image = get_ramdisk_image(); u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size = get_ramdisk_size(); u64 ramdisk_size = get_ramdisk_size();
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
u64 mapped_size;
if (!boot_params.hdr.type_of_loader || if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size) !ramdisk_image || !ramdisk_size)
@ -314,12 +310,6 @@ static void __init reserve_initrd(void)
initrd_start = 0; initrd_start = 0;
mapped_size = memblock_mem_size(max_pfn_mapped);
if (ramdisk_size >= (mapped_size>>1))
panic("initrd too large to handle, "
"disabling initrd (%lld needed, %lld available)\n",
ramdisk_size, mapped_size>>1);
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1); ramdisk_end - 1);
@ -431,13 +421,13 @@ static int __init reserve_crashkernel_low(void)
{ {
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
unsigned long long base, low_base = 0, low_size = 0; unsigned long long base, low_base = 0, low_size = 0;
unsigned long total_low_mem; unsigned long low_mem_limit;
int ret; int ret;
total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX);
/* crashkernel=Y,low */ /* crashkernel=Y,low */
ret = parse_crashkernel_low(boot_command_line, total_low_mem, &low_size, &base); ret = parse_crashkernel_low(boot_command_line, low_mem_limit, &low_size, &base);
if (ret) { if (ret) {
/* /*
* two parts from kernel/dma/swiotlb.c: * two parts from kernel/dma/swiotlb.c:
@ -455,23 +445,17 @@ static int __init reserve_crashkernel_low(void)
return 0; return 0;
} }
low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX);
if (!low_base) { if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n",
(unsigned long)(low_size >> 20)); (unsigned long)(low_size >> 20));
return -ENOMEM; return -ENOMEM;
} }
ret = memblock_reserve(low_base, low_size); pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n",
if (ret) {
pr_err("%s: Error reserving crashkernel low memblock.\n", __func__);
return ret;
}
pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n",
(unsigned long)(low_size >> 20), (unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20), (unsigned long)(low_base >> 20),
(unsigned long)(total_low_mem >> 20)); (unsigned long)(low_mem_limit >> 20));
crashk_low_res.start = low_base; crashk_low_res.start = low_base;
crashk_low_res.end = low_base + low_size - 1; crashk_low_res.end = low_base + low_size - 1;
@ -515,13 +499,13 @@ static void __init reserve_crashkernel(void)
* unless "crashkernel=size[KMG],high" is specified. * unless "crashkernel=size[KMG],high" is specified.
*/ */
if (!high) if (!high)
crash_base = memblock_find_in_range(CRASH_ALIGN, crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ADDR_LOW_MAX, CRASH_ALIGN, CRASH_ALIGN,
crash_size, CRASH_ALIGN); CRASH_ADDR_LOW_MAX);
if (!crash_base) if (!crash_base)
crash_base = memblock_find_in_range(CRASH_ALIGN, crash_base = memblock_phys_alloc_range(crash_size,
CRASH_ADDR_HIGH_MAX, CRASH_ALIGN, CRASH_ALIGN,
crash_size, CRASH_ALIGN); CRASH_ADDR_HIGH_MAX);
if (!crash_base) { if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n"); pr_info("crashkernel reservation failed - No suitable area found.\n");
return; return;
@ -529,19 +513,13 @@ static void __init reserve_crashkernel(void)
} else { } else {
unsigned long long start; unsigned long long start;
start = memblock_find_in_range(crash_base, start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base,
crash_base + crash_size, crash_base + crash_size);
crash_size, 1 << 20);
if (start != crash_base) { if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n"); pr_info("crashkernel reservation failed - memory is in use.\n");
return; return;
} }
} }
ret = memblock_reserve(crash_base, crash_size);
if (ret) {
pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
return;
}
if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
memblock_free(crash_base, crash_size); memblock_free(crash_base, crash_size);

View File

@ -37,14 +37,12 @@ static __init int numa_setup(char *opt)
return -EINVAL; return -EINVAL;
if (!strncmp(opt, "off", 3)) if (!strncmp(opt, "off", 3))
numa_off = 1; numa_off = 1;
#ifdef CONFIG_NUMA_EMU
if (!strncmp(opt, "fake=", 5)) if (!strncmp(opt, "fake=", 5))
numa_emu_cmdline(opt + 5); return numa_emu_cmdline(opt + 5);
#endif
#ifdef CONFIG_ACPI_NUMA
if (!strncmp(opt, "noacpi", 6)) if (!strncmp(opt, "noacpi", 6))
acpi_numa = -1; disable_srat();
#endif if (!strncmp(opt, "nohmat", 6))
disable_hmat();
return 0; return 0;
} }
early_param("numa", numa_setup); early_param("numa", numa_setup);
@ -516,7 +514,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
* memory ranges, because quirks such as trim_snb_memory() * memory ranges, because quirks such as trim_snb_memory()
* reserve specific pages for Sandy Bridge graphics. ] * reserve specific pages for Sandy Bridge graphics. ]
*/ */
for_each_memblock(reserved, mb_region) { for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region); int nid = memblock_get_region_node(mb_region);
if (nid != MAX_NUMNODES) if (nid != MAX_NUMNODES)
@ -919,7 +917,6 @@ int phys_to_target_node(phys_addr_t start)
return meminfo_to_nid(&numa_reserved_meminfo, start); return meminfo_to_nid(&numa_reserved_meminfo, start);
} }
EXPORT_SYMBOL_GPL(phys_to_target_node);
int memory_add_physaddr_to_nid(u64 start) int memory_add_physaddr_to_nid(u64 start)
{ {

View File

@ -13,9 +13,10 @@
static int emu_nid_to_phys[MAX_NUMNODES]; static int emu_nid_to_phys[MAX_NUMNODES];
static char *emu_cmdline __initdata; static char *emu_cmdline __initdata;
void __init numa_emu_cmdline(char *str) int __init numa_emu_cmdline(char *str)
{ {
emu_cmdline = str; emu_cmdline = str;
return 0;
} }
static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi)

View File

@ -1300,7 +1300,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
* any NUMA information the kernel tries to get from ACPI will * any NUMA information the kernel tries to get from ACPI will
* be meaningless. Prevent it from trying. * be meaningless. Prevent it from trying.
*/ */
acpi_numa = -1; disable_srat();
#endif #endif
WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv)); WARN_ON(xen_cpuhp_setup(xen_cpu_up_prepare_pv, xen_cpu_dead_pv));

View File

@ -79,67 +79,32 @@ void __init zones_init(void)
free_area_init(max_zone_pfn); free_area_init(max_zone_pfn);
} }
#ifdef CONFIG_HIGHMEM
static void __init free_area_high(unsigned long pfn, unsigned long end)
{
for (; pfn < end; pfn++)
free_highmem_page(pfn_to_page(pfn));
}
static void __init free_highpages(void) static void __init free_highpages(void)
{ {
#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn; unsigned long max_low = max_low_pfn;
struct memblock_region *mem, *res; phys_addr_t range_start, range_end;
u64 i;
reset_all_zones_managed_pages();
/* set highmem page free */ /* set highmem page free */
for_each_memblock(memory, mem) { for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
unsigned long start = memblock_region_memory_base_pfn(mem); &range_start, &range_end, NULL) {
unsigned long end = memblock_region_memory_end_pfn(mem); unsigned long start = PHYS_PFN(range_start);
unsigned long end = PHYS_PFN(range_end);
/* Ignore complete lowmem entries */ /* Ignore complete lowmem entries */
if (end <= max_low) if (end <= max_low)
continue; continue;
if (memblock_is_nomap(mem))
continue;
/* Truncate partial highmem entries */ /* Truncate partial highmem entries */
if (start < max_low) if (start < max_low)
start = max_low; start = max_low;
/* Find and exclude any reserved regions */ for (; start < end; start++)
for_each_memblock(reserved, res) { free_highmem_page(pfn_to_page(start));
unsigned long res_start, res_end;
res_start = memblock_region_reserved_base_pfn(res);
res_end = memblock_region_reserved_end_pfn(res);
if (res_end < start)
continue;
if (res_start < start)
res_start = start;
if (res_start > end)
res_start = end;
if (res_end > end)
res_end = end;
if (res_start != start)
free_area_high(start, res_start);
start = res_end;
if (start == end)
break;
}
/* And now free anything which remains */
if (start < end)
free_area_high(start, end);
} }
}
#else
static void __init free_highpages(void)
{
}
#endif #endif
}
/* /*
* Initialize memory pages. * Initialize memory pages.

View File

@ -24,8 +24,15 @@
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/node.h> #include <linux/node.h>
#include <linux/sysfs.h> #include <linux/sysfs.h>
#include <linux/dax.h>
static u8 hmat_revision; static u8 hmat_revision;
static int hmat_disable __initdata;
void __init disable_hmat(void)
{
hmat_disable = 1;
}
static LIST_HEAD(targets); static LIST_HEAD(targets);
static LIST_HEAD(initiators); static LIST_HEAD(initiators);
@ -634,66 +641,6 @@ static void hmat_register_target_perf(struct memory_target *target)
node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0); node_set_perf_attrs(mem_nid, &target->hmem_attrs, 0);
} }
static void hmat_register_target_device(struct memory_target *target,
struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
struct resource res = {
.start = r->start,
.end = r->end,
.flags = IORESOURCE_MEM,
};
struct platform_device *pdev;
struct memregion_info info;
int rc, id;
rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
if (rc != REGION_INTERSECTS)
return;
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
pr_err("memregion allocation failure for %pr\n", &res);
return;
}
pdev = platform_device_alloc("hmem", id);
if (!pdev) {
pr_err("hmem device allocation failure for %pr\n", &res);
goto out_pdev;
}
pdev->dev.numa_node = acpi_map_pxm_to_online_node(target->memory_pxm);
info = (struct memregion_info) {
.target_node = acpi_map_pxm_to_node(target->memory_pxm),
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
pr_err("hmem memregion_info allocation failure for %pr\n", &res);
goto out_pdev;
}
rc = platform_device_add_resources(pdev, &res, 1);
if (rc < 0) {
pr_err("hmem resource allocation failure for %pr\n", &res);
goto out_resource;
}
rc = platform_device_add(pdev);
if (rc < 0) {
dev_err(&pdev->dev, "device add failed for %pr\n", &res);
goto out_resource;
}
return;
out_resource:
put_device(&pdev->dev);
out_pdev:
memregion_free(id);
}
static void hmat_register_target_devices(struct memory_target *target) static void hmat_register_target_devices(struct memory_target *target)
{ {
struct resource *res; struct resource *res;
@ -705,8 +652,11 @@ static void hmat_register_target_devices(struct memory_target *target)
if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM)) if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
return; return;
for (res = target->memregions.child; res; res = res->sibling) for (res = target->memregions.child; res; res = res->sibling) {
hmat_register_target_device(target, res); int target_nid = acpi_map_pxm_to_node(target->memory_pxm);
hmem_register_device(target_nid, res);
}
} }
static void hmat_register_target(struct memory_target *target) static void hmat_register_target(struct memory_target *target)
@ -814,7 +764,7 @@ static __init int hmat_init(void)
enum acpi_hmat_type i; enum acpi_hmat_type i;
acpi_status status; acpi_status status;
if (srat_disabled()) if (srat_disabled() || hmat_disable)
return 0; return 0;
status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl); status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl);

View File

@ -27,7 +27,12 @@ static int node_to_pxm_map[MAX_NUMNODES]
= { [0 ... MAX_NUMNODES - 1] = PXM_INVAL }; = { [0 ... MAX_NUMNODES - 1] = PXM_INVAL };
unsigned char acpi_srat_revision __initdata; unsigned char acpi_srat_revision __initdata;
int acpi_numa __initdata; static int acpi_numa __initdata;
void __init disable_srat(void)
{
acpi_numa = -1;
}
int pxm_to_node(int pxm) int pxm_to_node(int pxm)
{ {
@ -163,7 +168,7 @@ static int __init slit_valid(struct acpi_table_slit *slit)
void __init bad_srat(void) void __init bad_srat(void)
{ {
pr_err("SRAT: SRAT not used.\n"); pr_err("SRAT: SRAT not used.\n");
acpi_numa = -1; disable_srat();
} }
int __init srat_disabled(void) int __init srat_disabled(void)

View File

@ -3324,7 +3324,7 @@ struct device *device_find_child_by_name(struct device *parent,
klist_iter_init(&parent->p->klist_children, &i); klist_iter_init(&parent->p->klist_children, &i);
while ((child = next_device(&i))) while ((child = next_device(&i)))
if (!strcmp(dev_name(child), name) && get_device(child)) if (sysfs_streq(dev_name(child), name) && get_device(child))
break; break;
klist_iter_exit(&i); klist_iter_exit(&i);
return child; return child;

View File

@ -610,23 +610,23 @@ static unsigned int armada_xp_mbus_win_remap_offset(int win)
static void __init static void __init
mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end) mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
{ {
struct memblock_region *r; phys_addr_t reg_start, reg_end;
uint64_t s = 0; uint64_t i, s = 0;
for_each_memblock(memory, r) { for_each_mem_range(i, &reg_start, &reg_end) {
/* /*
* This part of the memory is above 4 GB, so we don't * This part of the memory is above 4 GB, so we don't
* care for the MBus bridge hole. * care for the MBus bridge hole.
*/ */
if (r->base >= 0x100000000ULL) if (reg_start >= 0x100000000ULL)
continue; continue;
/* /*
* The MBus bridge hole is at the end of the RAM under * The MBus bridge hole is at the end of the RAM under
* the 4 GB limit. * the 4 GB limit.
*/ */
if (r->base + r->size > s) if (reg_end > s)
s = r->base + r->size; s = reg_end;
} }
*start = s; *start = s;

View File

@ -35,6 +35,7 @@ config DEV_DAX_PMEM
config DEV_DAX_HMEM config DEV_DAX_HMEM
tristate "HMEM DAX: direct access to 'specific purpose' memory" tristate "HMEM DAX: direct access to 'specific purpose' memory"
depends on EFI_SOFT_RESERVE depends on EFI_SOFT_RESERVE
select NUMA_KEEP_MEMINFO if (NUMA && X86)
default DEV_DAX default DEV_DAX
help help
EFI 2.8 platforms, and others, may advertise 'specific purpose' EFI 2.8 platforms, and others, may advertise 'specific purpose'
@ -48,6 +49,11 @@ config DEV_DAX_HMEM
Say M if unsure. Say M if unsure.
config DEV_DAX_HMEM_DEVICES
depends on NUMA_KEEP_MEMINFO # for phys_to_target_node()
depends on DEV_DAX_HMEM && DAX=y
def_bool y
config DEV_DAX_KMEM config DEV_DAX_KMEM
tristate "KMEM DAX: volatile-use of persistent memory" tristate "KMEM DAX: volatile-use of persistent memory"
default DEV_DAX default DEV_DAX

View File

@ -2,11 +2,10 @@
obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DAX) += dax.o
obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o
obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o
obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
dax-y := super.o dax-y := super.o
dax-y += bus.o dax-y += bus.o
device_dax-y := device.o device_dax-y := device.o
dax_hmem-y := hmem.o
obj-y += pmem/ obj-y += pmem/
obj-y += hmem/

File diff suppressed because it is too large Load Diff

View File

@ -3,29 +3,33 @@
#ifndef __DAX_BUS_H__ #ifndef __DAX_BUS_H__
#define __DAX_BUS_H__ #define __DAX_BUS_H__
#include <linux/device.h> #include <linux/device.h>
#include <linux/range.h>
struct dev_dax; struct dev_dax;
struct resource; struct resource;
struct dax_device; struct dax_device;
struct dax_region; struct dax_region;
void dax_region_put(struct dax_region *dax_region); void dax_region_put(struct dax_region *dax_region);
#define IORESOURCE_DAX_STATIC (1UL << 0)
struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, int target_node, unsigned int align, struct range *range, int target_node, unsigned int align,
unsigned long long flags); unsigned long flags);
enum dev_dax_subsys { enum dev_dax_subsys {
DEV_DAX_BUS, DEV_DAX_BUS = 0, /* zeroed dev_dax_data picks this by default */
DEV_DAX_CLASS, DEV_DAX_CLASS,
}; };
struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, struct dev_dax_data {
struct dev_pagemap *pgmap, enum dev_dax_subsys subsys); struct dax_region *dax_region;
struct dev_pagemap *pgmap;
enum dev_dax_subsys subsys;
resource_size_t size;
int id;
};
static inline struct dev_dax *devm_create_dev_dax(struct dax_region *dax_region, struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data);
int id, struct dev_pagemap *pgmap)
{
return __devm_create_dev_dax(dax_region, id, pgmap, DEV_DAX_BUS);
}
/* to be deleted when DEV_DAX_CLASS is removed */ /* to be deleted when DEV_DAX_CLASS is removed */
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys); struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys);
@ -34,6 +38,8 @@ struct dax_device_driver {
struct device_driver drv; struct device_driver drv;
struct list_head ids; struct list_head ids;
int match_always; int match_always;
int (*probe)(struct dev_dax *dev);
int (*remove)(struct dev_dax *dev);
}; };
int __dax_driver_register(struct dax_device_driver *dax_drv, int __dax_driver_register(struct dax_device_driver *dax_drv,
@ -44,7 +50,7 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv);
void kill_dev_dax(struct dev_dax *dev_dax); void kill_dev_dax(struct dev_dax *dev_dax);
#if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT) #if IS_ENABLED(CONFIG_DEV_DAX_PMEM_COMPAT)
int dev_dax_probe(struct device *dev); int dev_dax_probe(struct dev_dax *dev_dax);
#endif #endif
/* /*

View File

@ -7,6 +7,7 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/idr.h>
/* private routines between core files */ /* private routines between core files */
struct dax_device; struct dax_device;
@ -22,8 +23,10 @@ void dax_bus_exit(void);
* @kref: to pin while other agents have a need to do lookups * @kref: to pin while other agents have a need to do lookups
* @dev: parent device backing this region * @dev: parent device backing this region
* @align: allocation and mapping alignment for child dax devices * @align: allocation and mapping alignment for child dax devices
* @res: physical address range of the region * @ida: instance id allocator
* @pfn_flags: identify whether the pfns are paged back or not * @res: resource tree to track instance allocations
* @seed: allow userspace to find the first unbound seed device
* @youngest: allow userspace to find the most recently created device
*/ */
struct dax_region { struct dax_region {
int id; int id;
@ -31,8 +34,16 @@ struct dax_region {
struct kref kref; struct kref kref;
struct device *dev; struct device *dev;
unsigned int align; unsigned int align;
struct ida ida;
struct resource res; struct resource res;
unsigned long long pfn_flags; struct device *seed;
struct device *youngest;
};
struct dax_mapping {
struct device dev;
int range_id;
int id;
}; };
/** /**
@ -41,22 +52,57 @@ struct dax_region {
* @region - parent region * @region - parent region
* @dax_dev - core dax functionality * @dax_dev - core dax functionality
* @target_node: effective numa node if dev_dax memory range is onlined * @target_node: effective numa node if dev_dax memory range is onlined
* @id: ida allocated id
* @ida: mapping id allocator
* @dev - device core * @dev - device core
* @pgmap - pgmap for memmap setup / lifetime (driver owned) * @pgmap - pgmap for memmap setup / lifetime (driver owned)
* @dax_mem_res: physical address range of hotadded DAX memory * @nr_range: size of @ranges
* @dax_mem_name: name for hotadded DAX memory via add_memory_driver_managed() * @ranges: resource-span + pgoff tuples for the instance
*/ */
struct dev_dax { struct dev_dax {
struct dax_region *region; struct dax_region *region;
struct dax_device *dax_dev; struct dax_device *dax_dev;
unsigned int align;
int target_node; int target_node;
int id;
struct ida ida;
struct device dev; struct device dev;
struct dev_pagemap pgmap; struct dev_pagemap *pgmap;
struct resource *dax_kmem_res; int nr_range;
struct dev_dax_range {
unsigned long pgoff;
struct range range;
struct dax_mapping *mapping;
} *ranges;
}; };
static inline struct dev_dax *to_dev_dax(struct device *dev) static inline struct dev_dax *to_dev_dax(struct device *dev)
{ {
return container_of(dev, struct dev_dax, dev); return container_of(dev, struct dev_dax, dev);
} }
static inline struct dax_mapping *to_dax_mapping(struct device *dev)
{
return container_of(dev, struct dax_mapping, dev);
}
phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size);
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline bool dax_align_valid(unsigned long align)
{
if (align == PUD_SIZE && IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD))
return true;
if (align == PMD_SIZE && has_transparent_hugepage())
return true;
if (align == PAGE_SIZE)
return true;
return false;
}
#else
static inline bool dax_align_valid(unsigned long align)
{
return align == PAGE_SIZE;
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif #endif

View File

@ -17,7 +17,6 @@
static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma, static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
const char *func) const char *func)
{ {
struct dax_region *dax_region = dev_dax->region;
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
unsigned long mask; unsigned long mask;
@ -32,7 +31,7 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -EINVAL; return -EINVAL;
} }
mask = dax_region->align - 1; mask = dev_dax->align - 1;
if (vma->vm_start & mask || vma->vm_end & mask) { if (vma->vm_start & mask || vma->vm_end & mask) {
dev_info_ratelimited(dev, dev_info_ratelimited(dev,
"%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n", "%s: %s: fail, unaligned vma (%#lx - %#lx, %#lx)\n",
@ -41,14 +40,6 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
return -EINVAL; return -EINVAL;
} }
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) == PFN_DEV
&& (vma->vm_flags & VM_DONTCOPY) == 0) {
dev_info_ratelimited(dev,
"%s: %s: fail, dax range requires MADV_DONTFORK\n",
current->comm, func);
return -EINVAL;
}
if (!vma_is_dax(vma)) { if (!vma_is_dax(vma)) {
dev_info_ratelimited(dev, dev_info_ratelimited(dev,
"%s: %s: fail, vma is not DAX capable\n", "%s: %s: fail, vma is not DAX capable\n",
@ -63,15 +54,22 @@ static int check_vma(struct dev_dax *dev_dax, struct vm_area_struct *vma,
__weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size) unsigned long size)
{ {
struct resource *res = &dev_dax->region->res; int i;
phys_addr_t phys;
phys = pgoff * PAGE_SIZE + res->start; for (i = 0; i < dev_dax->nr_range; i++) {
if (phys >= res->start && phys <= res->end) { struct dev_dax_range *dax_range = &dev_dax->ranges[i];
if (phys + size - 1 <= res->end) struct range *range = &dax_range->range;
unsigned long long pgoff_end;
phys_addr_t phys;
pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
continue;
phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
if (phys + size - 1 <= range->end)
return phys; return phys;
break;
} }
return -1; return -1;
} }
@ -79,21 +77,19 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
struct vm_fault *vmf, pfn_t *pfn) struct vm_fault *vmf, pfn_t *pfn)
{ {
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys; phys_addr_t phys;
unsigned int fault_size = PAGE_SIZE; unsigned int fault_size = PAGE_SIZE;
if (check_vma(dev_dax, vmf->vma, __func__)) if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
dax_region = dev_dax->region; if (dev_dax->align > PAGE_SIZE) {
if (dax_region->align > PAGE_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size); dev_dax->align, fault_size);
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
if (fault_size != dax_region->align) if (fault_size != dev_dax->align)
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE); phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
@ -102,7 +98,7 @@ static vm_fault_t __dev_dax_pte_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_mixed(vmf->vma, vmf->address, *pfn); return vmf_insert_mixed(vmf->vma, vmf->address, *pfn);
} }
@ -112,7 +108,6 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
{ {
unsigned long pmd_addr = vmf->address & PMD_MASK; unsigned long pmd_addr = vmf->address & PMD_MASK;
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys; phys_addr_t phys;
pgoff_t pgoff; pgoff_t pgoff;
unsigned int fault_size = PMD_SIZE; unsigned int fault_size = PMD_SIZE;
@ -120,22 +115,15 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
if (check_vma(dev_dax, vmf->vma, __func__)) if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
dax_region = dev_dax->region; if (dev_dax->align > PMD_SIZE) {
if (dax_region->align > PMD_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size); dev_dax->align, fault_size);
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
/* dax pmd mappings require pfn_t_devmap() */ if (fault_size < dev_dax->align)
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
dev_dbg(dev, "region lacks devmap flags\n");
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} else if (fault_size > dev_dax->align)
if (fault_size < dax_region->align)
return VM_FAULT_SIGBUS;
else if (fault_size > dax_region->align)
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
/* if we are outside of the VMA */ /* if we are outside of the VMA */
@ -150,7 +138,7 @@ static vm_fault_t __dev_dax_pmd_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); return vmf_insert_pfn_pmd(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
} }
@ -161,7 +149,6 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
{ {
unsigned long pud_addr = vmf->address & PUD_MASK; unsigned long pud_addr = vmf->address & PUD_MASK;
struct device *dev = &dev_dax->dev; struct device *dev = &dev_dax->dev;
struct dax_region *dax_region;
phys_addr_t phys; phys_addr_t phys;
pgoff_t pgoff; pgoff_t pgoff;
unsigned int fault_size = PUD_SIZE; unsigned int fault_size = PUD_SIZE;
@ -170,22 +157,15 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
if (check_vma(dev_dax, vmf->vma, __func__)) if (check_vma(dev_dax, vmf->vma, __func__))
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
dax_region = dev_dax->region; if (dev_dax->align > PUD_SIZE) {
if (dax_region->align > PUD_SIZE) {
dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n", dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
dax_region->align, fault_size); dev_dax->align, fault_size);
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
/* dax pud mappings require pfn_t_devmap() */ if (fault_size < dev_dax->align)
if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
dev_dbg(dev, "region lacks devmap flags\n");
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} else if (fault_size > dev_dax->align)
if (fault_size < dax_region->align)
return VM_FAULT_SIGBUS;
else if (fault_size > dax_region->align)
return VM_FAULT_FALLBACK; return VM_FAULT_FALLBACK;
/* if we are outside of the VMA */ /* if we are outside of the VMA */
@ -200,7 +180,7 @@ static vm_fault_t __dev_dax_pud_fault(struct dev_dax *dev_dax,
return VM_FAULT_SIGBUS; return VM_FAULT_SIGBUS;
} }
*pfn = phys_to_pfn_t(phys, dax_region->pfn_flags); *pfn = phys_to_pfn_t(phys, PFN_DEV|PFN_MAP);
return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE); return vmf_insert_pfn_pud(vmf, *pfn, vmf->flags & FAULT_FLAG_WRITE);
} }
@ -280,9 +260,8 @@ static int dev_dax_split(struct vm_area_struct *vma, unsigned long addr)
{ {
struct file *filp = vma->vm_file; struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data; struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
if (!IS_ALIGNED(addr, dax_region->align)) if (!IS_ALIGNED(addr, dev_dax->align))
return -EINVAL; return -EINVAL;
return 0; return 0;
} }
@ -291,9 +270,8 @@ static unsigned long dev_dax_pagesize(struct vm_area_struct *vma)
{ {
struct file *filp = vma->vm_file; struct file *filp = vma->vm_file;
struct dev_dax *dev_dax = filp->private_data; struct dev_dax *dev_dax = filp->private_data;
struct dax_region *dax_region = dev_dax->region;
return dax_region->align; return dev_dax->align;
} }
static const struct vm_operations_struct dax_vm_ops = { static const struct vm_operations_struct dax_vm_ops = {
@ -332,13 +310,11 @@ static unsigned long dax_get_unmapped_area(struct file *filp,
{ {
unsigned long off, off_end, off_align, len_align, addr_align, align; unsigned long off, off_end, off_align, len_align, addr_align, align;
struct dev_dax *dev_dax = filp ? filp->private_data : NULL; struct dev_dax *dev_dax = filp ? filp->private_data : NULL;
struct dax_region *dax_region;
if (!dev_dax || addr) if (!dev_dax || addr)
goto out; goto out;
dax_region = dev_dax->region; align = dev_dax->align;
align = dax_region->align;
off = pgoff << PAGE_SHIFT; off = pgoff << PAGE_SHIFT;
off_end = off + len; off_end = off + len;
off_align = round_up(off, align); off_align = round_up(off, align);
@ -412,25 +388,45 @@ static void dev_dax_kill(void *dev_dax)
kill_dev_dax(dev_dax); kill_dev_dax(dev_dax);
} }
int dev_dax_probe(struct device *dev) int dev_dax_probe(struct dev_dax *dev_dax)
{ {
struct dev_dax *dev_dax = to_dev_dax(dev);
struct dax_device *dax_dev = dev_dax->dax_dev; struct dax_device *dax_dev = dev_dax->dax_dev;
struct resource *res = &dev_dax->region->res; struct device *dev = &dev_dax->dev;
struct dev_pagemap *pgmap;
struct inode *inode; struct inode *inode;
struct cdev *cdev; struct cdev *cdev;
void *addr; void *addr;
int rc; int rc, i;
/* 1:1 map region resource range to device-dax instance range */ pgmap = dev_dax->pgmap;
if (!devm_request_mem_region(dev, res->start, resource_size(res), if (dev_WARN_ONCE(dev, pgmap && dev_dax->nr_range > 1,
dev_name(dev))) { "static pgmap / multi-range device conflict\n"))
dev_warn(dev, "could not reserve region %pR\n", res); return -EINVAL;
return -EBUSY;
if (!pgmap) {
pgmap = devm_kzalloc(dev, sizeof(*pgmap) + sizeof(struct range)
* (dev_dax->nr_range - 1), GFP_KERNEL);
if (!pgmap)
return -ENOMEM;
pgmap->nr_range = dev_dax->nr_range;
} }
dev_dax->pgmap.type = MEMORY_DEVICE_GENERIC; for (i = 0; i < dev_dax->nr_range; i++) {
addr = devm_memremap_pages(dev, &dev_dax->pgmap); struct range *range = &dev_dax->ranges[i].range;
if (!devm_request_mem_region(dev, range->start,
range_len(range), dev_name(dev))) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve range\n",
i, range->start, range->end);
return -EBUSY;
}
/* don't update the range for static pgmap */
if (!dev_dax->pgmap)
pgmap->ranges[i] = *range;
}
pgmap->type = MEMORY_DEVICE_GENERIC;
addr = devm_memremap_pages(dev, pgmap);
if (IS_ERR(addr)) if (IS_ERR(addr))
return PTR_ERR(addr); return PTR_ERR(addr);
@ -456,17 +452,15 @@ int dev_dax_probe(struct device *dev)
} }
EXPORT_SYMBOL_GPL(dev_dax_probe); EXPORT_SYMBOL_GPL(dev_dax_probe);
static int dev_dax_remove(struct device *dev) static int dev_dax_remove(struct dev_dax *dev_dax)
{ {
/* all probe actions are unwound by devm */ /* all probe actions are unwound by devm */
return 0; return 0;
} }
static struct dax_device_driver device_dax_driver = { static struct dax_device_driver device_dax_driver = {
.drv = { .probe = dev_dax_probe,
.probe = dev_dax_probe, .remove = dev_dax_remove,
.remove = dev_dax_remove,
},
.match_always = 1, .match_always = 1,
}; };

View File

@ -0,0 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o
obj-$(CONFIG_DEV_DAX_HMEM_DEVICES) += device_hmem.o
device_hmem-y := device.o
dax_hmem-y := hmem.o

100
drivers/dax/hmem/device.c Normal file
View File

@ -0,0 +1,100 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/platform_device.h>
#include <linux/memregion.h>
#include <linux/module.h>
#include <linux/dax.h>
#include <linux/mm.h>
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
void hmem_register_device(int target_nid, struct resource *r)
{
/* define a clean / non-busy resource for the platform device */
struct resource res = {
.start = r->start,
.end = r->end,
.flags = IORESOURCE_MEM,
};
struct platform_device *pdev;
struct memregion_info info;
int rc, id;
if (nohmem)
return;
rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM,
IORES_DESC_SOFT_RESERVED);
if (rc != REGION_INTERSECTS)
return;
id = memregion_alloc(GFP_KERNEL);
if (id < 0) {
pr_err("memregion allocation failure for %pr\n", &res);
return;
}
pdev = platform_device_alloc("hmem", id);
if (!pdev) {
pr_err("hmem device allocation failure for %pr\n", &res);
goto out_pdev;
}
pdev->dev.numa_node = numa_map_to_online_node(target_nid);
info = (struct memregion_info) {
.target_node = target_nid,
};
rc = platform_device_add_data(pdev, &info, sizeof(info));
if (rc < 0) {
pr_err("hmem memregion_info allocation failure for %pr\n", &res);
goto out_pdev;
}
rc = platform_device_add_resources(pdev, &res, 1);
if (rc < 0) {
pr_err("hmem resource allocation failure for %pr\n", &res);
goto out_resource;
}
rc = platform_device_add(pdev);
if (rc < 0) {
dev_err(&pdev->dev, "device add failed for %pr\n", &res);
goto out_resource;
}
return;
out_resource:
put_device(&pdev->dev);
out_pdev:
memregion_free(id);
}
static __init int hmem_register_one(struct resource *res, void *data)
{
/*
* If the resource is not a top-level resource it was already
* assigned to a device by the HMAT parsing.
*/
if (res->parent != &iomem_resource) {
pr_info("HMEM: skip %pr, already claimed\n", res);
return 0;
}
hmem_register_device(phys_to_target_node(res->start), res);
return 0;
}
static __init int hmem_init(void)
{
walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
return 0;
}
/*
* As this is a fallback for address ranges unclaimed by the ACPI HMAT
* parsing it must be at an initcall level greater than hmat_init().
*/
late_initcall(hmem_init);

View File

@ -3,30 +3,39 @@
#include <linux/memregion.h> #include <linux/memregion.h>
#include <linux/module.h> #include <linux/module.h>
#include <linux/pfn_t.h> #include <linux/pfn_t.h>
#include "bus.h" #include "../bus.h"
static bool region_idle;
module_param_named(region_idle, region_idle, bool, 0644);
static int dax_hmem_probe(struct platform_device *pdev) static int dax_hmem_probe(struct platform_device *pdev)
{ {
struct device *dev = &pdev->dev; struct device *dev = &pdev->dev;
struct dev_pagemap pgmap = { };
struct dax_region *dax_region; struct dax_region *dax_region;
struct memregion_info *mri; struct memregion_info *mri;
struct dev_dax_data data;
struct dev_dax *dev_dax; struct dev_dax *dev_dax;
struct resource *res; struct resource *res;
struct range range;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0); res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) if (!res)
return -ENOMEM; return -ENOMEM;
mri = dev->platform_data; mri = dev->platform_data;
memcpy(&pgmap.res, res, sizeof(*res)); range.start = res->start;
range.end = res->end;
dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node, dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node,
PMD_SIZE, PFN_DEV|PFN_MAP); PMD_SIZE, 0);
if (!dax_region) if (!dax_region)
return -ENOMEM; return -ENOMEM;
dev_dax = devm_create_dev_dax(dax_region, 0, &pgmap); data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = -1,
.size = region_idle ? 0 : resource_size(res),
};
dev_dax = devm_create_dev_dax(&data);
if (IS_ERR(dev_dax)) if (IS_ERR(dev_dax))
return PTR_ERR(dev_dax); return PTR_ERR(dev_dax);

View File

@ -19,17 +19,28 @@ static const char *kmem_name;
/* Set if any memory will remain added when the driver will be unloaded. */ /* Set if any memory will remain added when the driver will be unloaded. */
static bool any_hotremove_failed; static bool any_hotremove_failed;
int dev_dax_kmem_probe(struct device *dev) static int dax_kmem_range(struct dev_dax *dev_dax, int i, struct range *r)
{ {
struct dev_dax *dev_dax = to_dev_dax(dev); struct dev_dax_range *dax_range = &dev_dax->ranges[i];
struct resource *res = &dev_dax->region->res; struct range *range = &dax_range->range;
resource_size_t kmem_start;
resource_size_t kmem_size; /* memory-block align the hotplug range */
resource_size_t kmem_end; r->start = ALIGN(range->start, memory_block_size_bytes());
struct resource *new_res; r->end = ALIGN_DOWN(range->end + 1, memory_block_size_bytes()) - 1;
const char *new_res_name; if (r->start >= r->end) {
r->start = range->start;
r->end = range->end;
return -ENOSPC;
}
return 0;
}
static int dev_dax_kmem_probe(struct dev_dax *dev_dax)
{
struct device *dev = &dev_dax->dev;
int i, mapped = 0;
char *res_name;
int numa_node; int numa_node;
int rc;
/* /*
* Ensure good NUMA information for the persistent memory. * Ensure good NUMA information for the persistent memory.
@ -39,68 +50,80 @@ int dev_dax_kmem_probe(struct device *dev)
*/ */
numa_node = dev_dax->target_node; numa_node = dev_dax->target_node;
if (numa_node < 0) { if (numa_node < 0) {
dev_warn(dev, "rejecting DAX region %pR with invalid node: %d\n", dev_warn(dev, "rejecting DAX region with invalid node: %d\n",
res, numa_node); numa_node);
return -EINVAL; return -EINVAL;
} }
/* Hotplug starting at the beginning of the next block: */ res_name = kstrdup(dev_name(dev), GFP_KERNEL);
kmem_start = ALIGN(res->start, memory_block_size_bytes()); if (!res_name)
kmem_size = resource_size(res);
/* Adjust the size down to compensate for moving up kmem_start: */
kmem_size -= kmem_start - res->start;
/* Align the size down to cover only complete blocks: */
kmem_size &= ~(memory_block_size_bytes() - 1);
kmem_end = kmem_start + kmem_size;
new_res_name = kstrdup(dev_name(dev), GFP_KERNEL);
if (!new_res_name)
return -ENOMEM; return -ENOMEM;
/* Region is permanently reserved if hotremove fails. */ for (i = 0; i < dev_dax->nr_range; i++) {
new_res = request_mem_region(kmem_start, kmem_size, new_res_name); struct resource *res;
if (!new_res) { struct range range;
dev_warn(dev, "could not reserve region [%pa-%pa]\n", int rc;
&kmem_start, &kmem_end);
kfree(new_res_name); rc = dax_kmem_range(dev_dax, i, &range);
return -EBUSY; if (rc) {
dev_info(dev, "mapping%d: %#llx-%#llx too small after alignment\n",
i, range.start, range.end);
continue;
}
/* Region is permanently reserved if hotremove fails. */
res = request_mem_region(range.start, range_len(&range), res_name);
if (!res) {
dev_warn(dev, "mapping%d: %#llx-%#llx could not reserve region\n",
i, range.start, range.end);
/*
* Once some memory has been onlined we can't
* assume that it can be un-onlined safely.
*/
if (mapped)
continue;
kfree(res_name);
return -EBUSY;
}
/*
* Set flags appropriate for System RAM. Leave ..._BUSY clear
* so that add_memory() can add a child resource. Do not
* inherit flags from the parent since it may set new flags
* unknown to us that will break add_memory() below.
*/
res->flags = IORESOURCE_SYSTEM_RAM;
/*
* Ensure that future kexec'd kernels will not treat
* this as RAM automatically.
*/
rc = add_memory_driver_managed(numa_node, range.start,
range_len(&range), kmem_name);
if (rc) {
dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n",
i, range.start, range.end);
release_mem_region(range.start, range_len(&range));
if (mapped)
continue;
kfree(res_name);
return rc;
}
mapped++;
} }
/* dev_set_drvdata(dev, res_name);
* Set flags appropriate for System RAM. Leave ..._BUSY clear
* so that add_memory() can add a child resource. Do not
* inherit flags from the parent since it may set new flags
* unknown to us that will break add_memory() below.
*/
new_res->flags = IORESOURCE_SYSTEM_RAM;
/*
* Ensure that future kexec'd kernels will not treat this as RAM
* automatically.
*/
rc = add_memory_driver_managed(numa_node, new_res->start,
resource_size(new_res), kmem_name);
if (rc) {
release_resource(new_res);
kfree(new_res);
kfree(new_res_name);
return rc;
}
dev_dax->dax_kmem_res = new_res;
return 0; return 0;
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
static int dev_dax_kmem_remove(struct device *dev) static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{ {
struct dev_dax *dev_dax = to_dev_dax(dev); int i, success = 0;
struct resource *res = dev_dax->dax_kmem_res; struct device *dev = &dev_dax->dev;
resource_size_t kmem_start = res->start; const char *res_name = dev_get_drvdata(dev);
resource_size_t kmem_size = resource_size(res);
const char *res_name = res->name;
int rc;
/* /*
* We have one shot for removing memory, if some memory blocks were not * We have one shot for removing memory, if some memory blocks were not
@ -108,25 +131,36 @@ static int dev_dax_kmem_remove(struct device *dev)
* there is no way to hotremove this memory until reboot because device * there is no way to hotremove this memory until reboot because device
* unbind will succeed even if we return failure. * unbind will succeed even if we return failure.
*/ */
rc = remove_memory(dev_dax->target_node, kmem_start, kmem_size); for (i = 0; i < dev_dax->nr_range; i++) {
if (rc) { struct range range;
int rc;
rc = dax_kmem_range(dev_dax, i, &range);
if (rc)
continue;
rc = remove_memory(dev_dax->target_node, range.start,
range_len(&range));
if (rc == 0) {
release_mem_region(range.start, range_len(&range));
success++;
continue;
}
any_hotremove_failed = true; any_hotremove_failed = true;
dev_err(dev, dev_err(dev,
"DAX region %pR cannot be hotremoved until the next reboot\n", "mapping%d: %#llx-%#llx cannot be hotremoved until the next reboot\n",
res); i, range.start, range.end);
return rc;
} }
/* Release and free dax resources */ if (success >= dev_dax->nr_range) {
release_resource(res); kfree(res_name);
kfree(res); dev_set_drvdata(dev, NULL);
kfree(res_name); }
dev_dax->dax_kmem_res = NULL;
return 0; return 0;
} }
#else #else
static int dev_dax_kmem_remove(struct device *dev) static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
{ {
/* /*
* Without hotremove purposely leak the request_mem_region() for the * Without hotremove purposely leak the request_mem_region() for the
@ -141,10 +175,8 @@ static int dev_dax_kmem_remove(struct device *dev)
#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTREMOVE */
static struct dax_device_driver device_dax_kmem_driver = { static struct dax_device_driver device_dax_kmem_driver = {
.drv = { .probe = dev_dax_kmem_probe,
.probe = dev_dax_kmem_probe, .remove = dev_dax_kmem_remove,
.remove = dev_dax_kmem_remove,
},
}; };
static int __init dax_kmem_init(void) static int __init dax_kmem_init(void)

View File

@ -22,7 +22,7 @@ static int dax_pmem_compat_probe(struct device *dev)
return -ENOMEM; return -ENOMEM;
device_lock(&dev_dax->dev); device_lock(&dev_dax->dev);
rc = dev_dax_probe(&dev_dax->dev); rc = dev_dax_probe(dev_dax);
device_unlock(&dev_dax->dev); device_unlock(&dev_dax->dev);
devres_close_group(&dev_dax->dev, dev_dax); devres_close_group(&dev_dax->dev, dev_dax);

View File

@ -9,11 +9,12 @@
struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys) struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
{ {
struct resource res; struct range range;
int rc, id, region_id; int rc, id, region_id;
resource_size_t offset; resource_size_t offset;
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
struct dev_dax *dev_dax; struct dev_dax *dev_dax;
struct dev_dax_data data;
struct nd_namespace_io *nsio; struct nd_namespace_io *nsio;
struct dax_region *dax_region; struct dax_region *dax_region;
struct dev_pagemap pgmap = { }; struct dev_pagemap pgmap = { };
@ -49,16 +50,23 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
if (rc != 2) if (rc != 2)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
/* adjust the dax_region resource to the start of data */ /* adjust the dax_region range to the start of data */
memcpy(&res, &pgmap.res, sizeof(res)); range = pgmap.range;
res.start += offset; range.start += offset,
dax_region = alloc_dax_region(dev, region_id, &res, dax_region = alloc_dax_region(dev, region_id, &range,
nd_region->target_node, le32_to_cpu(pfn_sb->align), nd_region->target_node, le32_to_cpu(pfn_sb->align),
PFN_DEV|PFN_MAP); IORESOURCE_DAX_STATIC);
if (!dax_region) if (!dax_region)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
dev_dax = __devm_create_dev_dax(dax_region, id, &pgmap, subsys); data = (struct dev_dax_data) {
.dax_region = dax_region,
.id = id,
.pgmap = &pgmap,
.subsys = subsys,
.size = range_len(&range),
};
dev_dax = devm_create_dev_dax(&data);
/* child dev_dax instances now own the lifetime of the dax_region */ /* child dev_dax instances now own the lifetime of the dax_region */
dax_region_put(dax_region); dax_region_put(dax_region);

View File

@ -38,7 +38,7 @@ void __init efi_fake_memmap_early(void)
m_start = mem->range.start; m_start = mem->range.start;
m_end = mem->range.end; m_end = mem->range.end;
for_each_efi_memory_desc(md) { for_each_efi_memory_desc(md) {
u64 start, end; u64 start, end, size;
if (md->type != EFI_CONVENTIONAL_MEMORY) if (md->type != EFI_CONVENTIONAL_MEMORY)
continue; continue;
@ -58,11 +58,17 @@ void __init efi_fake_memmap_early(void)
*/ */
start = max(start, m_start); start = max(start, m_start);
end = min(end, m_end); end = min(end, m_end);
size = end - start + 1;
if (end <= start) if (end <= start)
continue; continue;
e820__range_update(start, end - start + 1, E820_TYPE_RAM,
E820_TYPE_SOFT_RESERVED); /*
* Ensure each efi_fake_mem instance results in
* a unique e820 resource
*/
e820__range_remove(start, size, E820_TYPE_RAM, 1);
e820__range_add(start, size, E820_TYPE_SOFT_RESERVED);
e820__update_table(e820_table); e820__update_table(e820_table);
} }
} }

View File

@ -258,8 +258,8 @@ shmem_writeback(struct drm_i915_gem_object *obj)
for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) { for (i = 0; i < obj->base.size >> PAGE_SHIFT; i++) {
struct page *page; struct page *page;
page = find_lock_entry(mapping, i); page = find_lock_page(mapping, i);
if (!page || xa_is_value(page)) if (!page)
continue; continue;
if (!page_mapped(page) && clear_page_dirty_for_io(page)) { if (!page_mapped(page) && clear_page_dirty_for_io(page)) {

View File

@ -101,7 +101,7 @@ unsigned long nouveau_dmem_page_addr(struct page *page)
{ {
struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page); struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) - unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
chunk->pagemap.res.start; chunk->pagemap.range.start;
return chunk->bo->offset + off; return chunk->bo->offset + off;
} }
@ -249,7 +249,9 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
chunk->drm = drm; chunk->drm = drm;
chunk->pagemap.type = MEMORY_DEVICE_PRIVATE; chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
chunk->pagemap.res = *res; chunk->pagemap.range.start = res->start;
chunk->pagemap.range.end = res->end;
chunk->pagemap.nr_range = 1;
chunk->pagemap.ops = &nouveau_dmem_pagemap_ops; chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
chunk->pagemap.owner = drm->dev; chunk->pagemap.owner = drm->dev;
@ -273,7 +275,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
list_add(&chunk->list, &drm->dmem->chunks); list_add(&chunk->list, &drm->dmem->chunks);
mutex_unlock(&drm->dmem->mutex); mutex_unlock(&drm->dmem->mutex);
pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT; pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT;
page = pfn_to_page(pfn_first); page = pfn_to_page(pfn_first);
spin_lock(&drm->dmem->lock); spin_lock(&drm->dmem->lock);
for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) { for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
@ -294,8 +296,7 @@ out_bo_unpin:
out_bo_free: out_bo_free:
nouveau_bo_ref(NULL, &chunk->bo); nouveau_bo_ref(NULL, &chunk->bo);
out_release: out_release:
release_mem_region(chunk->pagemap.res.start, release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range));
resource_size(&chunk->pagemap.res));
out_free: out_free:
kfree(chunk); kfree(chunk);
out: out:
@ -382,8 +383,8 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
nouveau_bo_ref(NULL, &chunk->bo); nouveau_bo_ref(NULL, &chunk->bo);
list_del(&chunk->list); list_del(&chunk->list);
memunmap_pages(&chunk->pagemap); memunmap_pages(&chunk->pagemap);
release_mem_region(chunk->pagemap.res.start, release_mem_region(chunk->pagemap.range.start,
resource_size(&chunk->pagemap.res)); range_len(&chunk->pagemap.range));
kfree(chunk); kfree(chunk);
} }

View File

@ -2198,7 +2198,7 @@ static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size)
addr_end = addr + size - 1; addr_end = addr + size - 1;
for_each_reserved_mem_region(i, &start, &end) { for_each_reserved_mem_range(i, &start, &end) {
if (addr >= start && addr_end <= end) if (addr >= start && addr_end <= end)
return true; return true;
} }

View File

@ -211,7 +211,7 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
} }
static void badblocks_populate(struct badrange *badrange, static void badblocks_populate(struct badrange *badrange,
struct badblocks *bb, const struct resource *res) struct badblocks *bb, const struct range *range)
{ {
struct badrange_entry *bre; struct badrange_entry *bre;
@ -222,34 +222,34 @@ static void badblocks_populate(struct badrange *badrange,
u64 bre_end = bre->start + bre->length - 1; u64 bre_end = bre->start + bre->length - 1;
/* Discard intervals with no intersection */ /* Discard intervals with no intersection */
if (bre_end < res->start) if (bre_end < range->start)
continue; continue;
if (bre->start > res->end) if (bre->start > range->end)
continue; continue;
/* Deal with any overlap after start of the namespace */ /* Deal with any overlap after start of the namespace */
if (bre->start >= res->start) { if (bre->start >= range->start) {
u64 start = bre->start; u64 start = bre->start;
u64 len; u64 len;
if (bre_end <= res->end) if (bre_end <= range->end)
len = bre->length; len = bre->length;
else else
len = res->start + resource_size(res) len = range->start + range_len(range)
- bre->start; - bre->start;
__add_badblock_range(bb, start - res->start, len); __add_badblock_range(bb, start - range->start, len);
continue; continue;
} }
/* /*
* Deal with overlap for badrange starting before * Deal with overlap for badrange starting before
* the namespace. * the namespace.
*/ */
if (bre->start < res->start) { if (bre->start < range->start) {
u64 len; u64 len;
if (bre_end < res->end) if (bre_end < range->end)
len = bre->start + bre->length - res->start; len = bre->start + bre->length - range->start;
else else
len = resource_size(res); len = range_len(range);
__add_badblock_range(bb, 0, len); __add_badblock_range(bb, 0, len);
} }
} }
@ -267,7 +267,7 @@ static void badblocks_populate(struct badrange *badrange,
* and add badblocks entries for all matching sub-ranges * and add badblocks entries for all matching sub-ranges
*/ */
void nvdimm_badblocks_populate(struct nd_region *nd_region, void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res) struct badblocks *bb, const struct range *range)
{ {
struct nvdimm_bus *nvdimm_bus; struct nvdimm_bus *nvdimm_bus;
@ -279,7 +279,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
nvdimm_bus_lock(&nvdimm_bus->dev); nvdimm_bus_lock(&nvdimm_bus->dev);
badblocks_populate(&nvdimm_bus->badrange, bb, res); badblocks_populate(&nvdimm_bus->badrange, bb, range);
nvdimm_bus_unlock(&nvdimm_bus->dev); nvdimm_bus_unlock(&nvdimm_bus->dev);
} }
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate); EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);

View File

@ -303,13 +303,16 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio, int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
resource_size_t size) resource_size_t size)
{ {
struct resource *res = &nsio->res;
struct nd_namespace_common *ndns = &nsio->common; struct nd_namespace_common *ndns = &nsio->common;
struct range range = {
.start = nsio->res.start,
.end = nsio->res.end,
};
nsio->size = size; nsio->size = size;
if (!devm_request_mem_region(dev, res->start, size, if (!devm_request_mem_region(dev, range.start, size,
dev_name(&ndns->dev))) { dev_name(&ndns->dev))) {
dev_warn(dev, "could not reserve region %pR\n", res); dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
return -EBUSY; return -EBUSY;
} }
@ -317,9 +320,9 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
if (devm_init_badblocks(dev, &nsio->bb)) if (devm_init_badblocks(dev, &nsio->bb))
return -ENOMEM; return -ENOMEM;
nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb, nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb,
&nsio->res); &range);
nsio->addr = devm_memremap(dev, res->start, size, ARCH_MEMREMAP_PMEM); nsio->addr = devm_memremap(dev, range.start, size, ARCH_MEMREMAP_PMEM);
return PTR_ERR_OR_ZERO(nsio->addr); return PTR_ERR_OR_ZERO(nsio->addr);
} }

View File

@ -377,8 +377,9 @@ int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name); char *name);
unsigned int pmem_sector_size(struct nd_namespace_common *ndns); unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
struct range;
void nvdimm_badblocks_populate(struct nd_region *nd_region, void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res); struct badblocks *bb, const struct range *range);
int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns, int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns,
resource_size_t size); resource_size_t size);
void devm_namespace_disable(struct device *dev, void devm_namespace_disable(struct device *dev,

View File

@ -672,7 +672,7 @@ static unsigned long init_altmap_reserve(resource_size_t base)
static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{ {
struct resource *res = &pgmap->res; struct range *range = &pgmap->range;
struct vmem_altmap *altmap = &pgmap->altmap; struct vmem_altmap *altmap = &pgmap->altmap;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = le64_to_cpu(pfn_sb->dataoff); u64 offset = le64_to_cpu(pfn_sb->dataoff);
@ -689,16 +689,17 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
.end_pfn = PHYS_PFN(end), .end_pfn = PHYS_PFN(end),
}; };
memcpy(res, &nsio->res, sizeof(*res)); *range = (struct range) {
res->start += start_pad; .start = nsio->res.start + start_pad,
res->end -= end_trunc; .end = nsio->res.end - end_trunc,
};
pgmap->nr_range = 1;
if (nd_pfn->mode == PFN_MODE_RAM) { if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < reserve) if (offset < reserve)
return -EINVAL; return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
} else if (nd_pfn->mode == PFN_MODE_PMEM) { } else if (nd_pfn->mode == PFN_MODE_PMEM) {
nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset)); nd_pfn->npfns = PHYS_PFN((range_len(range) - offset));
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev, dev_info(&nd_pfn->dev,
"number of pfns truncated from %lld to %ld\n", "number of pfns truncated from %lld to %ld\n",

View File

@ -375,7 +375,7 @@ static int pmem_attach_disk(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_region *nd_region = to_nd_region(dev->parent);
int nid = dev_to_node(dev), fua; int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res; struct resource *res = &nsio->res;
struct resource bb_res; struct range bb_range;
struct nd_pfn *nd_pfn = NULL; struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev; struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
@ -434,24 +434,27 @@ static int pmem_attach_disk(struct device *dev,
pfn_sb = nd_pfn->pfn_sb; pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
pmem->pfn_pad = resource_size(res) - pmem->pfn_pad = resource_size(res) -
resource_size(&pmem->pgmap.res); range_len(&pmem->pgmap.range);
pmem->pfn_flags |= PFN_MAP; pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); bb_range = pmem->pgmap.range;
bb_res.start += pmem->data_offset; bb_range.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) { } else if (pmem_should_map_pages(dev)) {
memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res)); pmem->pgmap.range.start = res->start;
pmem->pgmap.range.end = res->end;
pmem->pgmap.nr_range = 1;
pmem->pgmap.type = MEMORY_DEVICE_FS_DAX; pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
pmem->pgmap.ops = &fsdax_pagemap_ops; pmem->pgmap.ops = &fsdax_pagemap_ops;
addr = devm_memremap_pages(dev, &pmem->pgmap); addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP; pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res)); bb_range = pmem->pgmap.range;
} else { } else {
if (devm_add_action_or_reset(dev, pmem_release_queue, if (devm_add_action_or_reset(dev, pmem_release_queue,
&pmem->pgmap)) &pmem->pgmap))
return -ENOMEM; return -ENOMEM;
addr = devm_memremap(dev, pmem->phys_addr, addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM); pmem->size, ARCH_MEMREMAP_PMEM);
memcpy(&bb_res, &nsio->res, sizeof(bb_res)); bb_range.start = res->start;
bb_range.end = res->end;
} }
if (IS_ERR(addr)) if (IS_ERR(addr))
@ -480,7 +483,7 @@ static int pmem_attach_disk(struct device *dev,
/ 512); / 512);
if (devm_init_badblocks(dev, &pmem->bb)) if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM; return -ENOMEM;
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res); nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
disk->bb = &pmem->bb; disk->bb = &pmem->bb;
if (is_nvdimm_sync(nd_region)) if (is_nvdimm_sync(nd_region))
@ -591,8 +594,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
resource_size_t offset = 0, end_trunc = 0; resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns; struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio; struct nd_namespace_io *nsio;
struct resource res;
struct badblocks *bb; struct badblocks *bb;
struct range range;
struct kernfs_node *bb_state; struct kernfs_node *bb_state;
if (event != NVDIMM_REVALIDATE_POISON) if (event != NVDIMM_REVALIDATE_POISON)
@ -628,9 +631,9 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
nsio = to_nd_namespace_io(&ndns->dev); nsio = to_nd_namespace_io(&ndns->dev);
} }
res.start = nsio->res.start + offset; range.start = nsio->res.start + offset;
res.end = nsio->res.end - end_trunc; range.end = nsio->res.end - end_trunc;
nvdimm_badblocks_populate(nd_region, bb, &res); nvdimm_badblocks_populate(nd_region, bb, &range);
if (bb_state) if (bb_state)
sysfs_notify_dirent(bb_state); sysfs_notify_dirent(bb_state);
} }

View File

@ -35,7 +35,10 @@ static int nd_region_probe(struct device *dev)
return rc; return rc;
if (is_memory(&nd_region->dev)) { if (is_memory(&nd_region->dev)) {
struct resource ndr_res; struct range range = {
.start = nd_region->ndr_start,
.end = nd_region->ndr_start + nd_region->ndr_size - 1,
};
if (devm_init_badblocks(dev, &nd_region->bb)) if (devm_init_badblocks(dev, &nd_region->bb))
return -ENODEV; return -ENODEV;
@ -44,9 +47,7 @@ static int nd_region_probe(struct device *dev)
if (!nd_region->bb_state) if (!nd_region->bb_state)
dev_warn(&nd_region->dev, dev_warn(&nd_region->dev,
"'badblocks' notification disabled\n"); "'badblocks' notification disabled\n");
ndr_res.start = nd_region->ndr_start; nvdimm_badblocks_populate(nd_region, &nd_region->bb, &range);
ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
} }
rc = nd_region_register_namespaces(nd_region, &err); rc = nd_region_register_namespaces(nd_region, &err);
@ -121,14 +122,16 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event)
{ {
if (event == NVDIMM_REVALIDATE_POISON) { if (event == NVDIMM_REVALIDATE_POISON) {
struct nd_region *nd_region = to_nd_region(dev); struct nd_region *nd_region = to_nd_region(dev);
struct resource res;
if (is_memory(&nd_region->dev)) { if (is_memory(&nd_region->dev)) {
res.start = nd_region->ndr_start; struct range range = {
res.end = nd_region->ndr_start + .start = nd_region->ndr_start,
nd_region->ndr_size - 1; .end = nd_region->ndr_start +
nd_region->ndr_size - 1,
};
nvdimm_badblocks_populate(nd_region, nvdimm_badblocks_populate(nd_region,
&nd_region->bb, &res); &nd_region->bb, &range);
if (nd_region->bb_state) if (nd_region->bb_state)
sysfs_notify_dirent(nd_region->bb_state); sysfs_notify_dirent(nd_region->bb_state);
} }

View File

@ -185,9 +185,9 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
return -ENOMEM; return -ENOMEM;
pgmap = &p2p_pgmap->pgmap; pgmap = &p2p_pgmap->pgmap;
pgmap->res.start = pci_resource_start(pdev, bar) + offset; pgmap->range.start = pci_resource_start(pdev, bar) + offset;
pgmap->res.end = pgmap->res.start + size - 1; pgmap->range.end = pgmap->range.start + size - 1;
pgmap->res.flags = pci_resource_flags(pdev, bar); pgmap->nr_range = 1;
pgmap->type = MEMORY_DEVICE_PCI_P2PDMA; pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
p2p_pgmap->provider = pdev; p2p_pgmap->provider = pdev;
@ -202,13 +202,13 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr, error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
pci_bus_address(pdev, bar) + offset, pci_bus_address(pdev, bar) + offset,
resource_size(&pgmap->res), dev_to_node(&pdev->dev), range_len(&pgmap->range), dev_to_node(&pdev->dev),
pgmap->ref); pgmap->ref);
if (error) if (error)
goto pages_free; goto pages_free;
pci_info(pdev, "added peer-to-peer DMA memory %pR\n", pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n",
&pgmap->res); pgmap->range.start, pgmap->range.end);
return 0; return 0;

View File

@ -36,18 +36,10 @@ enum virtio_mem_mb_state {
VIRTIO_MEM_MB_STATE_OFFLINE, VIRTIO_MEM_MB_STATE_OFFLINE,
/* Partially plugged, fully added to Linux, offline. */ /* Partially plugged, fully added to Linux, offline. */
VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL, VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL,
/* Fully plugged, fully added to Linux, online (!ZONE_MOVABLE). */ /* Fully plugged, fully added to Linux, online. */
VIRTIO_MEM_MB_STATE_ONLINE, VIRTIO_MEM_MB_STATE_ONLINE,
/* Partially plugged, fully added to Linux, online (!ZONE_MOVABLE). */ /* Partially plugged, fully added to Linux, online. */
VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL, VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL,
/*
* Fully plugged, fully added to Linux, online (ZONE_MOVABLE).
* We are not allowed to allocate (unplug) parts of this block that
* are not movable (similar to gigantic pages). We will never allow
* to online OFFLINE_PARTIAL to ZONE_MOVABLE (as they would contain
* unmovable parts).
*/
VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE,
VIRTIO_MEM_MB_STATE_COUNT VIRTIO_MEM_MB_STATE_COUNT
}; };
@ -526,21 +518,10 @@ static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id)
} }
static int virtio_mem_notify_going_online(struct virtio_mem *vm, static int virtio_mem_notify_going_online(struct virtio_mem *vm,
unsigned long mb_id, unsigned long mb_id)
enum zone_type zone)
{ {
switch (virtio_mem_mb_get_state(vm, mb_id)) { switch (virtio_mem_mb_get_state(vm, mb_id)) {
case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
/*
* We won't allow to online a partially plugged memory block
* to the MOVABLE zone - it would contain unmovable parts.
*/
if (zone == ZONE_MOVABLE) {
dev_warn_ratelimited(&vm->vdev->dev,
"memory block has holes, MOVABLE not supported\n");
return NOTIFY_BAD;
}
return NOTIFY_OK;
case VIRTIO_MEM_MB_STATE_OFFLINE: case VIRTIO_MEM_MB_STATE_OFFLINE:
return NOTIFY_OK; return NOTIFY_OK;
default: default:
@ -560,7 +541,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm,
VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL);
break; break;
case VIRTIO_MEM_MB_STATE_ONLINE: case VIRTIO_MEM_MB_STATE_ONLINE:
case VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE:
virtio_mem_mb_set_state(vm, mb_id, virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_OFFLINE); VIRTIO_MEM_MB_STATE_OFFLINE);
break; break;
@ -579,24 +559,17 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm,
virtio_mem_retry(vm); virtio_mem_retry(vm);
} }
static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id, static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id)
enum zone_type zone)
{ {
unsigned long nb_offline; unsigned long nb_offline;
switch (virtio_mem_mb_get_state(vm, mb_id)) { switch (virtio_mem_mb_get_state(vm, mb_id)) {
case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL:
BUG_ON(zone == ZONE_MOVABLE);
virtio_mem_mb_set_state(vm, mb_id, virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL);
break; break;
case VIRTIO_MEM_MB_STATE_OFFLINE: case VIRTIO_MEM_MB_STATE_OFFLINE:
if (zone == ZONE_MOVABLE) virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE);
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE);
else
virtio_mem_mb_set_state(vm, mb_id,
VIRTIO_MEM_MB_STATE_ONLINE);
break; break;
default: default:
BUG(); BUG();
@ -675,7 +648,6 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
const unsigned long start = PFN_PHYS(mhp->start_pfn); const unsigned long start = PFN_PHYS(mhp->start_pfn);
const unsigned long size = PFN_PHYS(mhp->nr_pages); const unsigned long size = PFN_PHYS(mhp->nr_pages);
const unsigned long mb_id = virtio_mem_phys_to_mb_id(start); const unsigned long mb_id = virtio_mem_phys_to_mb_id(start);
enum zone_type zone;
int rc = NOTIFY_OK; int rc = NOTIFY_OK;
if (!virtio_mem_overlaps_range(vm, start, size)) if (!virtio_mem_overlaps_range(vm, start, size))
@ -717,8 +689,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
break; break;
} }
vm->hotplug_active = true; vm->hotplug_active = true;
zone = page_zonenum(pfn_to_page(mhp->start_pfn)); rc = virtio_mem_notify_going_online(vm, mb_id);
rc = virtio_mem_notify_going_online(vm, mb_id, zone);
break; break;
case MEM_OFFLINE: case MEM_OFFLINE:
virtio_mem_notify_offline(vm, mb_id); virtio_mem_notify_offline(vm, mb_id);
@ -726,8 +697,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
mutex_unlock(&vm->hotplug_mutex); mutex_unlock(&vm->hotplug_mutex);
break; break;
case MEM_ONLINE: case MEM_ONLINE:
zone = page_zonenum(pfn_to_page(mhp->start_pfn)); virtio_mem_notify_online(vm, mb_id);
virtio_mem_notify_online(vm, mb_id, zone);
vm->hotplug_active = false; vm->hotplug_active = false;
mutex_unlock(&vm->hotplug_mutex); mutex_unlock(&vm->hotplug_mutex);
break; break;
@ -1906,8 +1876,7 @@ static void virtio_mem_remove(struct virtio_device *vdev)
if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] || if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] ||
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL] || vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) {
vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_MOVABLE]) {
dev_warn(&vdev->dev, "device still has system memory added\n"); dev_warn(&vdev->dev, "device still has system memory added\n");
} else { } else {
virtio_mem_delete_resource(vm); virtio_mem_delete_resource(vm);

View File

@ -18,27 +18,38 @@ static unsigned int list_count;
static int fill_list(unsigned int nr_pages) static int fill_list(unsigned int nr_pages)
{ {
struct dev_pagemap *pgmap; struct dev_pagemap *pgmap;
struct resource *res;
void *vaddr; void *vaddr;
unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION); unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION);
int ret; int ret = -ENOMEM;
res = kzalloc(sizeof(*res), GFP_KERNEL);
if (!res)
return -ENOMEM;
pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL); pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
if (!pgmap) if (!pgmap)
return -ENOMEM; goto err_pgmap;
pgmap->type = MEMORY_DEVICE_GENERIC; pgmap->type = MEMORY_DEVICE_GENERIC;
pgmap->res.name = "Xen scratch"; res->name = "Xen scratch";
pgmap->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
ret = allocate_resource(&iomem_resource, &pgmap->res, ret = allocate_resource(&iomem_resource, res,
alloc_pages * PAGE_SIZE, 0, -1, alloc_pages * PAGE_SIZE, 0, -1,
PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL); PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
if (ret < 0) { if (ret < 0) {
pr_err("Cannot allocate new IOMEM resource\n"); pr_err("Cannot allocate new IOMEM resource\n");
kfree(pgmap); goto err_resource;
return ret;
} }
pgmap->range = (struct range) {
.start = res->start,
.end = res->end,
};
pgmap->nr_range = 1;
pgmap->owner = res;
#ifdef CONFIG_XEN_HAVE_PVMMU #ifdef CONFIG_XEN_HAVE_PVMMU
/* /*
* memremap will build page tables for the new memory so * memremap will build page tables for the new memory so
@ -50,14 +61,13 @@ static int fill_list(unsigned int nr_pages)
* conflict with any devices. * conflict with any devices.
*/ */
if (!xen_feature(XENFEAT_auto_translated_physmap)) { if (!xen_feature(XENFEAT_auto_translated_physmap)) {
xen_pfn_t pfn = PFN_DOWN(pgmap->res.start); xen_pfn_t pfn = PFN_DOWN(res->start);
for (i = 0; i < alloc_pages; i++) { for (i = 0; i < alloc_pages; i++) {
if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) { if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
pr_warn("set_phys_to_machine() failed, no memory added\n"); pr_warn("set_phys_to_machine() failed, no memory added\n");
release_resource(&pgmap->res); ret = -ENOMEM;
kfree(pgmap); goto err_memremap;
return -ENOMEM;
} }
} }
} }
@ -66,9 +76,8 @@ static int fill_list(unsigned int nr_pages)
vaddr = memremap_pages(pgmap, NUMA_NO_NODE); vaddr = memremap_pages(pgmap, NUMA_NO_NODE);
if (IS_ERR(vaddr)) { if (IS_ERR(vaddr)) {
pr_err("Cannot remap memory range\n"); pr_err("Cannot remap memory range\n");
release_resource(&pgmap->res); ret = PTR_ERR(vaddr);
kfree(pgmap); goto err_memremap;
return PTR_ERR(vaddr);
} }
for (i = 0; i < alloc_pages; i++) { for (i = 0; i < alloc_pages; i++) {
@ -80,6 +89,14 @@ static int fill_list(unsigned int nr_pages)
} }
return 0; return 0;
err_memremap:
release_resource(res);
err_resource:
kfree(pgmap);
err_pgmap:
kfree(res);
return ret;
} }
/** /**

View File

@ -189,7 +189,7 @@ out:
} }
EXPORT_SYMBOL(fs_lookup_param); EXPORT_SYMBOL(fs_lookup_param);
int fs_param_bad_value(struct p_log *log, struct fs_parameter *param) static int fs_param_bad_value(struct p_log *log, struct fs_parameter *param)
{ {
return inval_plog(log, "Bad value for '%s'", param->key); return inval_plog(log, "Bad value for '%s'", param->key);
} }

View File

@ -1810,6 +1810,12 @@ int ntfs_read_inode_mount(struct inode *vi)
brelse(bh); brelse(bh);
} }
if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
goto err_out;
}
/* Apply the mst fixups. */ /* Apply the mst fixups. */
if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) { if (post_read_mst_fixup((NTFS_RECORD*)m, vol->mft_record_size)) {
/* FIXME: Try to use the $MFTMirr now. */ /* FIXME: Try to use the $MFTMirr now. */

View File

@ -6013,7 +6013,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out; goto out;
} }
/* Appending truncate log(TA) and and flushing truncate log(TF) are /* Appending truncate log(TA) and flushing truncate log(TF) are
* two separated transactions. They can be both committed but not * two separated transactions. They can be both committed but not
* checkpointed. If crash occurs then, both two transaction will be * checkpointed. If crash occurs then, both two transaction will be
* replayed with several already released to global bitmap clusters. * replayed with several already released to global bitmap clusters.
@ -7654,8 +7654,10 @@ out_mutex:
* main_bm related locks for avoiding the current IO starve, then go to * main_bm related locks for avoiding the current IO starve, then go to
* trim the next group * trim the next group
*/ */
if (ret >= 0 && group <= last_group) if (ret >= 0 && group <= last_group) {
cond_resched();
goto next_group; goto next_group;
}
out: out:
range->len = trimmed * sb->s_blocksize; range->len = trimmed * sb->s_blocksize;
return ret; return ret;

View File

@ -677,7 +677,7 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
/* /*
* Under certain conditions, the window slide code * Under certain conditions, the window slide code
* might have reduced the number of bits available or * might have reduced the number of bits available or
* disabled the the local alloc entirely. Re-check * disabled the local alloc entirely. Re-check
* here and return -ENOSPC if necessary. * here and return -ENOSPC if necessary.
*/ */
status = -ENOSPC; status = -ENOSPC;

View File

@ -1055,7 +1055,6 @@ static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count,
static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
{ {
static DEFINE_MUTEX(oom_adj_mutex);
struct mm_struct *mm = NULL; struct mm_struct *mm = NULL;
struct task_struct *task; struct task_struct *task;
int err = 0; int err = 0;
@ -1095,7 +1094,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy)
struct task_struct *p = find_lock_task_mm(task); struct task_struct *p = find_lock_task_mm(task);
if (p) { if (p) {
if (atomic_read(&p->mm->mm_users) > 1) { if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) {
mm = p->mm; mm = p->mm;
mmgrab(mm); mmgrab(mm);
} }

View File

@ -520,16 +520,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
page = device_private_entry_to_page(swpent); page = device_private_entry_to_page(swpent);
} else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap } else if (unlikely(IS_ENABLED(CONFIG_SHMEM) && mss->check_shmem_swap
&& pte_none(*pte))) { && pte_none(*pte))) {
page = find_get_entry(vma->vm_file->f_mapping, page = xa_load(&vma->vm_file->f_mapping->i_pages,
linear_page_index(vma, addr)); linear_page_index(vma, addr));
if (!page)
return;
if (xa_is_value(page)) if (xa_is_value(page))
mss->swap += PAGE_SIZE; mss->swap += PAGE_SIZE;
else
put_page(page);
return; return;
} }
@ -727,9 +721,21 @@ static const struct mm_walk_ops smaps_shmem_walk_ops = {
.pte_hole = smaps_pte_hole, .pte_hole = smaps_pte_hole,
}; };
/*
* Gather mem stats from @vma with the indicated beginning
* address @start, and keep them in @mss.
*
* Use vm_start of @vma as the beginning address if @start is 0.
*/
static void smap_gather_stats(struct vm_area_struct *vma, static void smap_gather_stats(struct vm_area_struct *vma,
struct mem_size_stats *mss) struct mem_size_stats *mss, unsigned long start)
{ {
const struct mm_walk_ops *ops = &smaps_walk_ops;
/* Invalid start */
if (start >= vma->vm_end)
return;
#ifdef CONFIG_SHMEM #ifdef CONFIG_SHMEM
/* In case of smaps_rollup, reset the value from previous vma */ /* In case of smaps_rollup, reset the value from previous vma */
mss->check_shmem_swap = false; mss->check_shmem_swap = false;
@ -746,18 +752,20 @@ static void smap_gather_stats(struct vm_area_struct *vma,
*/ */
unsigned long shmem_swapped = shmem_swap_usage(vma); unsigned long shmem_swapped = shmem_swap_usage(vma);
if (!shmem_swapped || (vma->vm_flags & VM_SHARED) || if (!start && (!shmem_swapped || (vma->vm_flags & VM_SHARED) ||
!(vma->vm_flags & VM_WRITE)) { !(vma->vm_flags & VM_WRITE))) {
mss->swap += shmem_swapped; mss->swap += shmem_swapped;
} else { } else {
mss->check_shmem_swap = true; mss->check_shmem_swap = true;
walk_page_vma(vma, &smaps_shmem_walk_ops, mss); ops = &smaps_shmem_walk_ops;
return;
} }
} }
#endif #endif
/* mmap_lock is held in m_start */ /* mmap_lock is held in m_start */
walk_page_vma(vma, &smaps_walk_ops, mss); if (!start)
walk_page_vma(vma, ops, mss);
else
walk_page_range(vma->vm_mm, start, vma->vm_end, ops, mss);
} }
#define SEQ_PUT_DEC(str, val) \ #define SEQ_PUT_DEC(str, val) \
@ -809,7 +817,7 @@ static int show_smap(struct seq_file *m, void *v)
memset(&mss, 0, sizeof(mss)); memset(&mss, 0, sizeof(mss));
smap_gather_stats(vma, &mss); smap_gather_stats(vma, &mss, 0);
show_map_vma(m, vma); show_map_vma(m, vma);
@ -857,9 +865,73 @@ static int show_smaps_rollup(struct seq_file *m, void *v)
hold_task_mempolicy(priv); hold_task_mempolicy(priv);
for (vma = priv->mm->mmap; vma; vma = vma->vm_next) { for (vma = priv->mm->mmap; vma;) {
smap_gather_stats(vma, &mss); smap_gather_stats(vma, &mss, 0);
last_vma_end = vma->vm_end; last_vma_end = vma->vm_end;
/*
* Release mmap_lock temporarily if someone wants to
* access it for write request.
*/
if (mmap_lock_is_contended(mm)) {
mmap_read_unlock(mm);
ret = mmap_read_lock_killable(mm);
if (ret) {
release_task_mempolicy(priv);
goto out_put_mm;
}
/*
* After dropping the lock, there are four cases to
* consider. See the following example for explanation.
*
* +------+------+-----------+
* | VMA1 | VMA2 | VMA3 |
* +------+------+-----------+
* | | | |
* 4k 8k 16k 400k
*
* Suppose we drop the lock after reading VMA2 due to
* contention, then we get:
*
* last_vma_end = 16k
*
* 1) VMA2 is freed, but VMA3 exists:
*
* find_vma(mm, 16k - 1) will return VMA3.
* In this case, just continue from VMA3.
*
* 2) VMA2 still exists:
*
* find_vma(mm, 16k - 1) will return VMA2.
* Iterate the loop like the original one.
*
* 3) No more VMAs can be found:
*
* find_vma(mm, 16k - 1) will return NULL.
* No more things to do, just break.
*
* 4) (last_vma_end - 1) is the middle of a vma (VMA'):
*
* find_vma(mm, 16k - 1) will return VMA' whose range
* contains last_vma_end.
* Iterate VMA' from last_vma_end.
*/
vma = find_vma(mm, last_vma_end - 1);
/* Case 3 above */
if (!vma)
break;
/* Case 1 above */
if (vma->vm_start >= last_vma_end)
continue;
/* Case 4 above */
if (vma->vm_end > last_vma_end)
smap_gather_stats(vma, &mss, last_vma_end);
}
/* Case 2 above */
vma = vma->vm_next;
} }
show_vma_header_prefix(m, priv->mm->mmap->vm_start, show_vma_header_prefix(m, priv->mm->mmap->vm_start,

View File

@ -232,15 +232,15 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
} }
/** /**
* __vfs_setxattr_locked: set an extended attribute while holding the inode * __vfs_setxattr_locked - set an extended attribute while holding the inode
* lock * lock
* *
* @dentry - object to perform setxattr on * @dentry: object to perform setxattr on
* @name - xattr name to set * @name: xattr name to set
* @value - value to set @name to * @value: value to set @name to
* @size - size of @value * @size: size of @value
* @flags - flags to pass into filesystem operations * @flags: flags to pass into filesystem operations
* @delegated_inode - on return, will contain an inode pointer that * @delegated_inode: on return, will contain an inode pointer that
* a delegation was broken on, NULL if none. * a delegation was broken on, NULL if none.
*/ */
int int
@ -443,12 +443,12 @@ __vfs_removexattr(struct dentry *dentry, const char *name)
EXPORT_SYMBOL(__vfs_removexattr); EXPORT_SYMBOL(__vfs_removexattr);
/** /**
* __vfs_removexattr_locked: set an extended attribute while holding the inode * __vfs_removexattr_locked - set an extended attribute while holding the inode
* lock * lock
* *
* @dentry - object to perform setxattr on * @dentry: object to perform setxattr on
* @name - name of xattr to remove * @name: name of xattr to remove
* @delegated_inode - on return, will contain an inode pointer that * @delegated_inode: on return, will contain an inode pointer that
* a delegation was broken on, NULL if none. * a delegation was broken on, NULL if none.
*/ */
int int

View File

@ -17,10 +17,22 @@ extern int pxm_to_node(int);
extern int node_to_pxm(int); extern int node_to_pxm(int);
extern int acpi_map_pxm_to_node(int); extern int acpi_map_pxm_to_node(int);
extern unsigned char acpi_srat_revision; extern unsigned char acpi_srat_revision;
extern int acpi_numa __initdata; extern void disable_srat(void);
extern void bad_srat(void); extern void bad_srat(void);
extern int srat_disabled(void); extern int srat_disabled(void);
#else /* CONFIG_ACPI_NUMA */
static inline void disable_srat(void)
{
}
#endif /* CONFIG_ACPI_NUMA */ #endif /* CONFIG_ACPI_NUMA */
#ifdef CONFIG_ACPI_HMAT
extern void disable_hmat(void);
#else /* CONFIG_ACPI_HMAT */
static inline void disable_hmat(void)
{
}
#endif /* CONFIG_ACPI_HMAT */
#endif /* __ACP_NUMA_H */ #endif /* __ACP_NUMA_H */

View File

@ -224,6 +224,11 @@ struct kunit {
struct list_head resources; /* Protected by lock. */ struct list_head resources; /* Protected by lock. */
}; };
static inline void kunit_set_failure(struct kunit *test)
{
WRITE_ONCE(test->success, false);
}
void kunit_init_test(struct kunit *test, const char *name, char *log); void kunit_init_test(struct kunit *test, const char *name, char *log);
int kunit_run_tests(struct kunit_suite *suite); int kunit_run_tests(struct kunit_suite *suite);

View File

@ -709,6 +709,8 @@ static inline u64 acpi_arch_get_root_pointer(void)
#define ACPI_HANDLE_FWNODE(fwnode) (NULL) #define ACPI_HANDLE_FWNODE(fwnode) (NULL)
#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0),
#include <acpi/acpi_numa.h>
struct fwnode_handle; struct fwnode_handle;
static inline bool acpi_dev_found(const char *hid) static inline bool acpi_dev_found(const char *hid)

View File

@ -29,9 +29,6 @@ enum compact_result {
/* compaction didn't start as it was deferred due to past failures */ /* compaction didn't start as it was deferred due to past failures */
COMPACT_DEFERRED, COMPACT_DEFERRED,
/* compaction not active last round */
COMPACT_INACTIVE = COMPACT_DEFERRED,
/* For more detailed tracepoint output - internal to compaction */ /* For more detailed tracepoint output - internal to compaction */
COMPACT_NO_SUITABLE_PAGE, COMPACT_NO_SUITABLE_PAGE,
/* compaction should continue to another pageblock */ /* compaction should continue to another pageblock */

View File

@ -3,6 +3,14 @@
#error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead." #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead."
#endif #endif
#define CLANG_VERSION (__clang_major__ * 10000 \
+ __clang_minor__ * 100 \
+ __clang_patchlevel__)
#if CLANG_VERSION < 100001
# error Sorry, your version of Clang is too old - please use 10.0.1 or newer.
#endif
/* Compiler specific definitions for Clang compiler */ /* Compiler specific definitions for Clang compiler */
/* same as gcc, this was present in clang-2.6 so we can assume it works /* same as gcc, this was present in clang-2.6 so we can assume it works

Some files were not shown because too many files have changed in this diff Show More