libnvdimm for 4.16

* Require struct page by default for filesystem DAX to remove a number of
   surprising failure cases.  This includes failures with direct I/O, gdb and
   fork(2).
 
 * Add support for the new Platform Capabilities Structure added to the NFIT in
   ACPI 6.2a.  This new table tells us whether the platform supports flushing
   of CPU and memory controller caches on unexpected power loss events.
 
 * Revamp vmem_altmap and dev_pagemap handling to clean up code and better
   support future future PCI P2P uses.
 
 * Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has become
   out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL spec, and
   instead rely on the generic ND_CMD_CALL approach used by the two other IOCTL
   families, NVDIMM_FAMILY_{HPE,MSFT}.
 
 * Enhance nfit_test so we can test some of the new things added in version 1.6
   of the DSM specification.  This includes testing firmware download and
   simulating the Last Shutdown State (LSS) status.
 -----BEGIN PGP SIGNATURE-----
 
 iQIcBAABAgAGBQJaeOg0AAoJEJ/BjXdf9fLBAFoQAI/IgcgJ2h9lfEpgjBRTC44t
 2p8dxwT1Ofw3Y1aR/tI8nYRXjRtAGuP4UIeRVnb1CL/N7PagJyoMGU+6hmzg+ptY
 c7cEDvw6nZOhrFwXx/xn7R53sYG8zH+UE6+jTR/PP/G4mQJfFCg4iF9R72Y7z0n7
 aurf82Kz137NPUy6dNr4V9bmPMJWAaOci9WOj5SKddR5ZSNbjoxylTwQRvre5y4r
 7HQTScEkirABOdSf1JoXTSUXCH/RC9UFFXR03ScHstGb1HjCj3KdcicVc50Q++Ub
 qsEudhE6i44PEW1Hh4Qkg6hjHMEa8qHP+ShBuRuVaUmlghYTQn66niJAYLZilwdz
 EVjE7vR+toHA5g3YCalEmYVutUEhIDkh/xfpd7vM6ZorUGJy95a2elEJs2fHBffC
 gEhnCip7FROPcK5RDNUM8hBgnG/q5wwWPQMKY+6rKDZQx3mXssCrKp2Vlx7kBwMG
 rpblkEpYjPonbLEHxsSU8yTg9Uq55ciIWgnOToffcjZvjbihi8WUVlHcwHUMPf/o
 DWElg+4qmG0Sdd4S2NeAGwTl1Ewrf2RrtUGMjHtH4OUFs1wo6ZmfrxFzzMfoZ1Od
 ko/s65v4uwtTzECh2o+XQaNsReR5YETXxmA40N/Jpo7/7twABIoZ/ASvj/3ZBYj+
 sie+u2rTod8/gQWSfHpJ
 =MIMX
 -----END PGP SIGNATURE-----

Merge tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm

Pull libnvdimm updates from Ross Zwisler:

 - Require struct page by default for filesystem DAX to remove a number
   of surprising failure cases. This includes failures with direct I/O,
   gdb and fork(2).

 - Add support for the new Platform Capabilities Structure added to the
   NFIT in ACPI 6.2a. This new table tells us whether the platform
   supports flushing of CPU and memory controller caches on unexpected
   power loss events.

 - Revamp vmem_altmap and dev_pagemap handling to clean up code and
   better support future future PCI P2P uses.

 - Deprecate the ND_IOCTL_SMART_THRESHOLD command whose payload has
   become out-of-sync with recent versions of the NVDIMM_FAMILY_INTEL
   spec, and instead rely on the generic ND_CMD_CALL approach used by
   the two other IOCTL families, NVDIMM_FAMILY_{HPE,MSFT}.

 - Enhance nfit_test so we can test some of the new things added in
   version 1.6 of the DSM specification. This includes testing firmware
   download and simulating the Last Shutdown State (LSS) status.

* tag 'libnvdimm-for-4.16' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (37 commits)
  libnvdimm, namespace: remove redundant initialization of 'nd_mapping'
  acpi, nfit: fix register dimm error handling
  libnvdimm, namespace: make min namespace size 4K
  tools/testing/nvdimm: force nfit_test to depend on instrumented modules
  libnvdimm/nfit_test: adding support for unit testing enable LSS status
  libnvdimm/nfit_test: add firmware download emulation
  nfit-test: Add platform cap support from ACPI 6.2a to test
  libnvdimm: expose platform persistence attribute for nd_region
  acpi: nfit: add persistent memory control flag for nd_region
  acpi: nfit: Add support for detect platform CPU cache flush on power loss
  device-dax: Fix trailing semicolon
  libnvdimm, btt: fix uninitialized err_lock
  dax: require 'struct page' by default for filesystem dax
  ext2: auto disable dax instead of failing mount
  ext4: auto disable dax instead of failing mount
  mm, dax: introduce pfn_t_special()
  mm: Fix devm_memremap_pages() collision handling
  mm: Fix memory size alignment in devm_memremap_pages_release()
  memremap: merge find_dev_pagemap into get_dev_pagemap
  memremap: change devm_memremap_pages interface to use struct dev_pagemap
  ...
This commit is contained in:
Linus Torvalds 2018-02-06 10:41:33 -08:00
commit 3ff1b28caa
52 changed files with 1124 additions and 529 deletions

View File

@ -685,12 +685,14 @@ int kern_addr_valid(unsigned long addr)
} }
#ifdef CONFIG_SPARSEMEM_VMEMMAP #ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS #if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{ {
return vmemmap_populate_basepages(start, end, node); return vmemmap_populate_basepages(start, end, node);
} }
#else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{ {
unsigned long addr = start; unsigned long addr = start;
unsigned long next; unsigned long next;
@ -725,7 +727,8 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
return 0; return 0;
} }
#endif /* CONFIG_ARM64_64K_PAGES */ #endif /* CONFIG_ARM64_64K_PAGES */
void vmemmap_free(unsigned long start, unsigned long end) void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{ {
} }
#endif /* CONFIG_SPARSEMEM_VMEMMAP */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */

View File

@ -754,12 +754,14 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat)
#endif #endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP #ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{ {
return vmemmap_populate_basepages(start, end, node); return vmemmap_populate_basepages(start, end, node);
} }
void vmemmap_free(unsigned long start, unsigned long end) void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{ {
} }
#endif #endif

View File

@ -501,7 +501,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
if (map_start < map_end) if (map_start < map_end)
memmap_init_zone((unsigned long)(map_end - map_start), memmap_init_zone((unsigned long)(map_end - map_start),
args->nid, args->zone, page_to_pfn(map_start), args->nid, args->zone, page_to_pfn(map_start),
MEMMAP_EARLY); MEMMAP_EARLY, NULL);
return 0; return 0;
} }
@ -509,9 +509,10 @@ void __meminit
memmap_init (unsigned long size, int nid, unsigned long zone, memmap_init (unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn) unsigned long start_pfn)
{ {
if (!vmem_map) if (!vmem_map) {
memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY); memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY,
else { NULL);
} else {
struct page *start; struct page *start;
struct memmap_init_callback_data args; struct memmap_init_callback_data args;
@ -647,13 +648,14 @@ mem_init (void)
} }
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
int ret; int ret;
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
if (ret) if (ret)
printk("%s: Problem encountered in __add_pages() as ret=%d\n", printk("%s: Problem encountered in __add_pages() as ret=%d\n",
__func__, ret); __func__, ret);
@ -662,7 +664,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
int arch_remove_memory(u64 start, u64 size) int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
@ -670,7 +672,7 @@ int arch_remove_memory(u64 start, u64 size)
int ret; int ret;
zone = page_zone(pfn_to_page(start_pfn)); zone = page_zone(pfn_to_page(start_pfn));
ret = __remove_pages(zone, start_pfn, nr_pages); ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
if (ret) if (ret)
pr_warn("%s: Problem encountered in __remove_pages() as" pr_warn("%s: Problem encountered in __remove_pages() as"
" ret=%d\n", __func__, ret); " ret=%d\n", __func__, ret);

View File

@ -183,7 +183,8 @@ static __meminit void vmemmap_list_populate(unsigned long phys,
vmemmap_list = vmem_back; vmemmap_list = vmem_back;
} }
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{ {
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
@ -193,17 +194,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) { for (; start < end; start += page_size) {
struct vmem_altmap *altmap;
void *p; void *p;
int rc; int rc;
if (vmemmap_populated(start, page_size)) if (vmemmap_populated(start, page_size))
continue; continue;
/* altmap lookups only work at section boundaries */ if (altmap)
altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start)); p = altmap_alloc_block_buf(page_size, altmap);
else
p = __vmemmap_alloc_block_buf(page_size, node, altmap); p = vmemmap_alloc_block_buf(page_size, node);
if (!p) if (!p)
return -ENOMEM; return -ENOMEM;
@ -256,7 +256,8 @@ static unsigned long vmemmap_list_free(unsigned long start)
return vmem_back->phys; return vmem_back->phys;
} }
void __ref vmemmap_free(unsigned long start, unsigned long end) void __ref vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{ {
unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift;
unsigned long page_order = get_order(page_size); unsigned long page_order = get_order(page_size);
@ -267,7 +268,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
for (; start < end; start += page_size) { for (; start < end; start += page_size) {
unsigned long nr_pages, addr; unsigned long nr_pages, addr;
struct vmem_altmap *altmap;
struct page *section_base; struct page *section_base;
struct page *page; struct page *page;
@ -287,7 +287,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
section_base = pfn_to_page(vmemmap_section_start(start)); section_base = pfn_to_page(vmemmap_section_start(start));
nr_pages = 1 << page_order; nr_pages = 1 << page_order;
altmap = to_vmem_altmap((unsigned long) section_base);
if (altmap) { if (altmap) {
vmem_altmap_free(altmap, nr_pages); vmem_altmap_free(altmap, nr_pages);
} else if (PageReserved(page)) { } else if (PageReserved(page)) {

View File

@ -127,7 +127,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV; return -ENODEV;
} }
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
@ -143,15 +144,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
return -EFAULT; return -EFAULT;
} }
return __add_pages(nid, start_pfn, nr_pages, want_memblock); return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
int arch_remove_memory(u64 start, u64 size) int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
struct vmem_altmap *altmap;
struct page *page; struct page *page;
int ret; int ret;
@ -160,11 +160,10 @@ int arch_remove_memory(u64 start, u64 size)
* when querying the zone. * when querying the zone.
*/ */
page = pfn_to_page(start_pfn); page = pfn_to_page(start_pfn);
altmap = to_vmem_altmap((unsigned long) page);
if (altmap) if (altmap)
page += vmem_altmap_offset(altmap); page += vmem_altmap_offset(altmap);
ret = __remove_pages(page_zone(page), start_pfn, nr_pages); ret = __remove_pages(page_zone(page), start_pfn, nr_pages, altmap);
if (ret) if (ret)
return ret; return ret;

View File

@ -222,7 +222,8 @@ device_initcall(s390_cma_mem_init);
#endif /* CONFIG_CMA */ #endif /* CONFIG_CMA */
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{ {
unsigned long start_pfn = PFN_DOWN(start); unsigned long start_pfn = PFN_DOWN(start);
unsigned long size_pages = PFN_DOWN(size); unsigned long size_pages = PFN_DOWN(size);
@ -232,14 +233,14 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock)
if (rc) if (rc)
return rc; return rc;
rc = __add_pages(nid, start_pfn, size_pages, want_memblock); rc = __add_pages(nid, start_pfn, size_pages, altmap, want_memblock);
if (rc) if (rc)
vmem_remove_mapping(start, size); vmem_remove_mapping(start, size);
return rc; return rc;
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
int arch_remove_memory(u64 start, u64 size) int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{ {
/* /*
* There is no hardware or firmware interface which could trigger a * There is no hardware or firmware interface which could trigger a

View File

@ -211,7 +211,8 @@ static void vmem_remove_range(unsigned long start, unsigned long size)
/* /*
* Add a backed mem_map array to the virtual mem_map array. * Add a backed mem_map array to the virtual mem_map array.
*/ */
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{ {
unsigned long pgt_prot, sgt_prot; unsigned long pgt_prot, sgt_prot;
unsigned long address = start; unsigned long address = start;
@ -296,7 +297,8 @@ out:
return ret; return ret;
} }
void vmemmap_free(unsigned long start, unsigned long end) void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{ {
} }

View File

@ -485,20 +485,20 @@ void free_initrd_mem(unsigned long start, unsigned long end)
#endif #endif
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{ {
unsigned long start_pfn = PFN_DOWN(start); unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
int ret; int ret;
/* We only have ZONE_NORMAL, so this is easy.. */ /* We only have ZONE_NORMAL, so this is easy.. */
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
if (unlikely(ret)) if (unlikely(ret))
printk("%s: Failed, __add_pages() == %d\n", __func__, ret); printk("%s: Failed, __add_pages() == %d\n", __func__, ret);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(arch_add_memory);
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
int memory_add_physaddr_to_nid(u64 addr) int memory_add_physaddr_to_nid(u64 addr)
@ -510,7 +510,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif #endif
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
int arch_remove_memory(u64 start, u64 size) int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{ {
unsigned long start_pfn = PFN_DOWN(start); unsigned long start_pfn = PFN_DOWN(start);
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
@ -518,7 +518,7 @@ int arch_remove_memory(u64 start, u64 size)
int ret; int ret;
zone = page_zone(pfn_to_page(start_pfn)); zone = page_zone(pfn_to_page(start_pfn));
ret = __remove_pages(zone, start_pfn, nr_pages); ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
if (unlikely(ret)) if (unlikely(ret))
pr_warn("%s: Failed, __remove_pages() == %d\n", __func__, pr_warn("%s: Failed, __remove_pages() == %d\n", __func__,
ret); ret);

View File

@ -2628,7 +2628,7 @@ EXPORT_SYMBOL(_PAGE_CACHE);
#ifdef CONFIG_SPARSEMEM_VMEMMAP #ifdef CONFIG_SPARSEMEM_VMEMMAP
int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend, int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
int node) int node, struct vmem_altmap *altmap)
{ {
unsigned long pte_base; unsigned long pte_base;
@ -2671,7 +2671,8 @@ int __meminit vmemmap_populate(unsigned long vstart, unsigned long vend,
return 0; return 0;
} }
void vmemmap_free(unsigned long start, unsigned long end) void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{ {
} }
#endif /* CONFIG_SPARSEMEM_VMEMMAP */ #endif /* CONFIG_SPARSEMEM_VMEMMAP */

View File

@ -829,23 +829,24 @@ void __init mem_init(void)
} }
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
return __add_pages(nid, start_pfn, nr_pages, want_memblock); return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
int arch_remove_memory(u64 start, u64 size) int arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
struct zone *zone; struct zone *zone;
zone = page_zone(pfn_to_page(start_pfn)); zone = page_zone(pfn_to_page(start_pfn));
return __remove_pages(zone, start_pfn, nr_pages); return __remove_pages(zone, start_pfn, nr_pages, altmap);
} }
#endif #endif
#endif #endif

View File

@ -772,12 +772,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
} }
} }
int add_pages(int nid, unsigned long start_pfn, int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
unsigned long nr_pages, bool want_memblock) struct vmem_altmap *altmap, bool want_memblock)
{ {
int ret; int ret;
ret = __add_pages(nid, start_pfn, nr_pages, want_memblock); ret = __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
/* update max_pfn, max_low_pfn and high_memory */ /* update max_pfn, max_low_pfn and high_memory */
@ -787,24 +787,24 @@ int add_pages(int nid, unsigned long start_pfn,
return ret; return ret;
} }
int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
bool want_memblock)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
init_memory_mapping(start, start + size); init_memory_mapping(start, start + size);
return add_pages(nid, start_pfn, nr_pages, want_memblock); return add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
} }
EXPORT_SYMBOL_GPL(arch_add_memory);
#define PAGE_INUSE 0xFD #define PAGE_INUSE 0xFD
static void __meminit free_pagetable(struct page *page, int order) static void __meminit free_pagetable(struct page *page, int order,
struct vmem_altmap *altmap)
{ {
unsigned long magic; unsigned long magic;
unsigned int nr_pages = 1 << order; unsigned int nr_pages = 1 << order;
struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
if (altmap) { if (altmap) {
vmem_altmap_free(altmap, nr_pages); vmem_altmap_free(altmap, nr_pages);
@ -826,7 +826,8 @@ static void __meminit free_pagetable(struct page *page, int order)
free_pages((unsigned long)page_address(page), order); free_pages((unsigned long)page_address(page), order);
} }
static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd,
struct vmem_altmap *altmap)
{ {
pte_t *pte; pte_t *pte;
int i; int i;
@ -838,13 +839,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd)
} }
/* free a pte talbe */ /* free a pte talbe */
free_pagetable(pmd_page(*pmd), 0); free_pagetable(pmd_page(*pmd), 0, altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd); pmd_clear(pmd);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
} }
static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud) static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud,
struct vmem_altmap *altmap)
{ {
pmd_t *pmd; pmd_t *pmd;
int i; int i;
@ -856,13 +858,14 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud)
} }
/* free a pmd talbe */ /* free a pmd talbe */
free_pagetable(pud_page(*pud), 0); free_pagetable(pud_page(*pud), 0, altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pud_clear(pud); pud_clear(pud);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
} }
static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d,
struct vmem_altmap *altmap)
{ {
pud_t *pud; pud_t *pud;
int i; int i;
@ -874,7 +877,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
} }
/* free a pud talbe */ /* free a pud talbe */
free_pagetable(p4d_page(*p4d), 0); free_pagetable(p4d_page(*p4d), 0, altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
p4d_clear(p4d); p4d_clear(p4d);
spin_unlock(&init_mm.page_table_lock); spin_unlock(&init_mm.page_table_lock);
@ -882,7 +885,7 @@ static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d)
static void __meminit static void __meminit
remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end, remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
bool direct) struct vmem_altmap *altmap, bool direct)
{ {
unsigned long next, pages = 0; unsigned long next, pages = 0;
pte_t *pte; pte_t *pte;
@ -913,7 +916,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
* freed when offlining, or simplely not in use. * freed when offlining, or simplely not in use.
*/ */
if (!direct) if (!direct)
free_pagetable(pte_page(*pte), 0); free_pagetable(pte_page(*pte), 0, altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte); pte_clear(&init_mm, addr, pte);
@ -936,7 +939,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
page_addr = page_address(pte_page(*pte)); page_addr = page_address(pte_page(*pte));
if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) { if (!memchr_inv(page_addr, PAGE_INUSE, PAGE_SIZE)) {
free_pagetable(pte_page(*pte), 0); free_pagetable(pte_page(*pte), 0, altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pte_clear(&init_mm, addr, pte); pte_clear(&init_mm, addr, pte);
@ -953,7 +956,7 @@ remove_pte_table(pte_t *pte_start, unsigned long addr, unsigned long end,
static void __meminit static void __meminit
remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end, remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
bool direct) bool direct, struct vmem_altmap *altmap)
{ {
unsigned long next, pages = 0; unsigned long next, pages = 0;
pte_t *pte_base; pte_t *pte_base;
@ -972,7 +975,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PMD_SIZE)) { IS_ALIGNED(next, PMD_SIZE)) {
if (!direct) if (!direct)
free_pagetable(pmd_page(*pmd), free_pagetable(pmd_page(*pmd),
get_order(PMD_SIZE)); get_order(PMD_SIZE),
altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd); pmd_clear(pmd);
@ -986,7 +990,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE, if (!memchr_inv(page_addr, PAGE_INUSE,
PMD_SIZE)) { PMD_SIZE)) {
free_pagetable(pmd_page(*pmd), free_pagetable(pmd_page(*pmd),
get_order(PMD_SIZE)); get_order(PMD_SIZE),
altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pmd_clear(pmd); pmd_clear(pmd);
@ -998,8 +1003,8 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
} }
pte_base = (pte_t *)pmd_page_vaddr(*pmd); pte_base = (pte_t *)pmd_page_vaddr(*pmd);
remove_pte_table(pte_base, addr, next, direct); remove_pte_table(pte_base, addr, next, altmap, direct);
free_pte_table(pte_base, pmd); free_pte_table(pte_base, pmd, altmap);
} }
/* Call free_pmd_table() in remove_pud_table(). */ /* Call free_pmd_table() in remove_pud_table(). */
@ -1009,7 +1014,7 @@ remove_pmd_table(pmd_t *pmd_start, unsigned long addr, unsigned long end,
static void __meminit static void __meminit
remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end, remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
bool direct) struct vmem_altmap *altmap, bool direct)
{ {
unsigned long next, pages = 0; unsigned long next, pages = 0;
pmd_t *pmd_base; pmd_t *pmd_base;
@ -1028,7 +1033,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PUD_SIZE)) { IS_ALIGNED(next, PUD_SIZE)) {
if (!direct) if (!direct)
free_pagetable(pud_page(*pud), free_pagetable(pud_page(*pud),
get_order(PUD_SIZE)); get_order(PUD_SIZE),
altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pud_clear(pud); pud_clear(pud);
@ -1042,7 +1048,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
if (!memchr_inv(page_addr, PAGE_INUSE, if (!memchr_inv(page_addr, PAGE_INUSE,
PUD_SIZE)) { PUD_SIZE)) {
free_pagetable(pud_page(*pud), free_pagetable(pud_page(*pud),
get_order(PUD_SIZE)); get_order(PUD_SIZE),
altmap);
spin_lock(&init_mm.page_table_lock); spin_lock(&init_mm.page_table_lock);
pud_clear(pud); pud_clear(pud);
@ -1054,8 +1061,8 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
} }
pmd_base = pmd_offset(pud, 0); pmd_base = pmd_offset(pud, 0);
remove_pmd_table(pmd_base, addr, next, direct); remove_pmd_table(pmd_base, addr, next, direct, altmap);
free_pmd_table(pmd_base, pud); free_pmd_table(pmd_base, pud, altmap);
} }
if (direct) if (direct)
@ -1064,7 +1071,7 @@ remove_pud_table(pud_t *pud_start, unsigned long addr, unsigned long end,
static void __meminit static void __meminit
remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
bool direct) struct vmem_altmap *altmap, bool direct)
{ {
unsigned long next, pages = 0; unsigned long next, pages = 0;
pud_t *pud_base; pud_t *pud_base;
@ -1080,14 +1087,14 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
BUILD_BUG_ON(p4d_large(*p4d)); BUILD_BUG_ON(p4d_large(*p4d));
pud_base = pud_offset(p4d, 0); pud_base = pud_offset(p4d, 0);
remove_pud_table(pud_base, addr, next, direct); remove_pud_table(pud_base, addr, next, altmap, direct);
/* /*
* For 4-level page tables we do not want to free PUDs, but in the * For 4-level page tables we do not want to free PUDs, but in the
* 5-level case we should free them. This code will have to change * 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables. * to adapt for boot-time switching between 4 and 5 level page tables.
*/ */
if (CONFIG_PGTABLE_LEVELS == 5) if (CONFIG_PGTABLE_LEVELS == 5)
free_pud_table(pud_base, p4d); free_pud_table(pud_base, p4d, altmap);
} }
if (direct) if (direct)
@ -1096,7 +1103,8 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
/* start and end are both virtual address. */ /* start and end are both virtual address. */
static void __meminit static void __meminit
remove_pagetable(unsigned long start, unsigned long end, bool direct) remove_pagetable(unsigned long start, unsigned long end, bool direct,
struct vmem_altmap *altmap)
{ {
unsigned long next; unsigned long next;
unsigned long addr; unsigned long addr;
@ -1111,15 +1119,16 @@ remove_pagetable(unsigned long start, unsigned long end, bool direct)
continue; continue;
p4d = p4d_offset(pgd, 0); p4d = p4d_offset(pgd, 0);
remove_p4d_table(p4d, addr, next, direct); remove_p4d_table(p4d, addr, next, altmap, direct);
} }
flush_tlb_all(); flush_tlb_all();
} }
void __ref vmemmap_free(unsigned long start, unsigned long end) void __ref vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap)
{ {
remove_pagetable(start, end, false); remove_pagetable(start, end, false, altmap);
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
@ -1129,24 +1138,22 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
start = (unsigned long)__va(start); start = (unsigned long)__va(start);
end = (unsigned long)__va(end); end = (unsigned long)__va(end);
remove_pagetable(start, end, true); remove_pagetable(start, end, true, NULL);
} }
int __ref arch_remove_memory(u64 start, u64 size) int __ref arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
{ {
unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn); struct page *page = pfn_to_page(start_pfn);
struct vmem_altmap *altmap;
struct zone *zone; struct zone *zone;
int ret; int ret;
/* With altmap the first mapped page is offset from @start */ /* With altmap the first mapped page is offset from @start */
altmap = to_vmem_altmap((unsigned long) page);
if (altmap) if (altmap)
page += vmem_altmap_offset(altmap); page += vmem_altmap_offset(altmap);
zone = page_zone(page); zone = page_zone(page);
ret = __remove_pages(zone, start_pfn, nr_pages); ret = __remove_pages(zone, start_pfn, nr_pages, altmap);
WARN_ON_ONCE(ret); WARN_ON_ONCE(ret);
kernel_physical_mapping_remove(start, start + size); kernel_physical_mapping_remove(start, start + size);
@ -1378,7 +1385,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (pmd_none(*pmd)) { if (pmd_none(*pmd)) {
void *p; void *p;
p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (altmap)
p = altmap_alloc_block_buf(PMD_SIZE, altmap);
else
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (p) { if (p) {
pte_t entry; pte_t entry;
@ -1411,9 +1421,9 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
return 0; return 0;
} }
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
{ {
struct vmem_altmap *altmap = to_vmem_altmap(start);
int err; int err;
if (boot_cpu_has(X86_FEATURE_PSE)) if (boot_cpu_has(X86_FEATURE_PSE))

View File

@ -838,6 +838,18 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
return true; return true;
} }
static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
struct acpi_nfit_capabilities *pcap)
{
struct device *dev = acpi_desc->dev;
u32 mask;
mask = (1 << (pcap->highest_capability + 1)) - 1;
acpi_desc->platform_cap = pcap->capabilities & mask;
dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap);
return true;
}
static void *add_table(struct acpi_nfit_desc *acpi_desc, static void *add_table(struct acpi_nfit_desc *acpi_desc,
struct nfit_table_prev *prev, void *table, const void *end) struct nfit_table_prev *prev, void *table, const void *end)
{ {
@ -883,6 +895,10 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
case ACPI_NFIT_TYPE_SMBIOS: case ACPI_NFIT_TYPE_SMBIOS:
dev_dbg(dev, "%s: smbios\n", __func__); dev_dbg(dev, "%s: smbios\n", __func__);
break; break;
case ACPI_NFIT_TYPE_CAPABILITIES:
if (!add_platform_cap(acpi_desc, table))
return err;
break;
default: default:
dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type); dev_err(dev, "unknown table '%d' parsing nfit\n", hdr->type);
break; break;
@ -1867,6 +1883,9 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
struct kernfs_node *nfit_kernfs; struct kernfs_node *nfit_kernfs;
nvdimm = nfit_mem->nvdimm; nvdimm = nfit_mem->nvdimm;
if (!nvdimm)
continue;
nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit"); nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
if (nfit_kernfs) if (nfit_kernfs)
nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs, nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
@ -2656,6 +2675,12 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
else else
ndr_desc->numa_node = NUMA_NO_NODE; ndr_desc->numa_node = NUMA_NO_NODE;
if(acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_CACHE_FLUSH)
set_bit(ND_REGION_PERSIST_CACHE, &ndr_desc->flags);
if (acpi_desc->platform_cap & ACPI_NFIT_CAPABILITY_MEM_FLUSH)
set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc->flags);
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) { list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev; struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
struct nd_mapping_desc *mapping; struct nd_mapping_desc *mapping;
@ -3464,6 +3489,7 @@ static __init int nfit_init(void)
BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9); BUILD_BUG_ON(sizeof(struct acpi_nfit_smbios) != 9);
BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80); BUILD_BUG_ON(sizeof(struct acpi_nfit_control_region) != 80);
BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40); BUILD_BUG_ON(sizeof(struct acpi_nfit_data_region) != 40);
BUILD_BUG_ON(sizeof(struct acpi_nfit_capabilities) != 16);
guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]); guid_parse(UUID_VOLATILE_MEMORY, &nfit_uuid[NFIT_SPA_VOLATILE]);
guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]); guid_parse(UUID_PERSISTENT_MEMORY, &nfit_uuid[NFIT_SPA_PM]);

View File

@ -202,6 +202,7 @@ struct acpi_nfit_desc {
unsigned long dimm_cmd_force_en; unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en; unsigned long bus_cmd_force_en;
unsigned long bus_nfit_cmd_force_en; unsigned long bus_nfit_cmd_force_en;
unsigned int platform_cap;
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
void *iobuf, u64 len, int rw); void *iobuf, u64 len, int rw);
}; };

View File

@ -133,7 +133,7 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
dax_region->base = addr; dax_region->base = addr;
if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) {
kfree(dax_region); kfree(dax_region);
return NULL;; return NULL;
} }
kref_get(&dax_region->kref); kref_get(&dax_region->kref);

View File

@ -21,6 +21,7 @@
struct dax_pmem { struct dax_pmem {
struct device *dev; struct device *dev;
struct percpu_ref ref; struct percpu_ref ref;
struct dev_pagemap pgmap;
struct completion cmp; struct completion cmp;
}; };
@ -69,20 +70,23 @@ static int dax_pmem_probe(struct device *dev)
struct nd_namespace_common *ndns; struct nd_namespace_common *ndns;
struct nd_dax *nd_dax = to_nd_dax(dev); struct nd_dax *nd_dax = to_nd_dax(dev);
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
struct vmem_altmap __altmap, *altmap = NULL;
ndns = nvdimm_namespace_common_probe(dev); ndns = nvdimm_namespace_common_probe(dev);
if (IS_ERR(ndns)) if (IS_ERR(ndns))
return PTR_ERR(ndns); return PTR_ERR(ndns);
nsio = to_nd_namespace_io(&ndns->dev); nsio = to_nd_namespace_io(&ndns->dev);
dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
if (!dax_pmem)
return -ENOMEM;
/* parse the 'pfn' info block via ->rw_bytes */ /* parse the 'pfn' info block via ->rw_bytes */
rc = devm_nsio_enable(dev, nsio); rc = devm_nsio_enable(dev, nsio);
if (rc) if (rc)
return rc; return rc;
altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap); rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
if (IS_ERR(altmap)) if (rc)
return PTR_ERR(altmap); return rc;
devm_nsio_disable(dev, nsio); devm_nsio_disable(dev, nsio);
pfn_sb = nd_pfn->pfn_sb; pfn_sb = nd_pfn->pfn_sb;
@ -94,10 +98,6 @@ static int dax_pmem_probe(struct device *dev)
return -EBUSY; return -EBUSY;
} }
dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
if (!dax_pmem)
return -ENOMEM;
dax_pmem->dev = dev; dax_pmem->dev = dev;
init_completion(&dax_pmem->cmp); init_completion(&dax_pmem->cmp);
rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0, rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
@ -110,7 +110,8 @@ static int dax_pmem_probe(struct device *dev)
if (rc) if (rc)
return rc; return rc;
addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap); dax_pmem->pgmap.ref = &dax_pmem->ref;
addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
if (IS_ERR(addr)) if (IS_ERR(addr))
return PTR_ERR(addr); return PTR_ERR(addr);
@ -120,6 +121,7 @@ static int dax_pmem_probe(struct device *dev)
return rc; return rc;
/* adjust the dax_region resource to the start of data */ /* adjust the dax_region resource to the start of data */
memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
res.start += le64_to_cpu(pfn_sb->dataoff); res.start += le64_to_cpu(pfn_sb->dataoff);
rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id); rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", &region_id, &id);

View File

@ -15,6 +15,7 @@
#include <linux/mount.h> #include <linux/mount.h>
#include <linux/magic.h> #include <linux/magic.h>
#include <linux/genhd.h> #include <linux/genhd.h>
#include <linux/pfn_t.h>
#include <linux/cdev.h> #include <linux/cdev.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/slab.h> #include <linux/slab.h>
@ -123,6 +124,15 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
return len < 0 ? len : -EIO; return len < 0 ? len : -EIO;
} }
if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
|| pfn_t_devmap(pfn))
/* pass */;
else {
pr_debug("VFS (%s): error: dax support not enabled\n",
sb->s_id);
return -EOPNOTSUPP;
}
return 0; return 0;
} }
EXPORT_SYMBOL_GPL(__bdev_dax_supported); EXPORT_SYMBOL_GPL(__bdev_dax_supported);

View File

@ -753,6 +753,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size,
return NULL; return NULL;
arena->nd_btt = btt->nd_btt; arena->nd_btt = btt->nd_btt;
arena->sector_size = btt->sector_size; arena->sector_size = btt->sector_size;
mutex_init(&arena->err_lock);
if (!size) if (!size)
return arena; return arena;
@ -891,7 +892,6 @@ static int discover_arenas(struct btt *btt)
goto out; goto out;
} }
mutex_init(&arena->err_lock);
ret = btt_freelist_init(arena); ret = btt_freelist_init(arena);
if (ret) if (ret)
goto out; goto out;

View File

@ -1142,9 +1142,6 @@ int __init nvdimm_bus_init(void)
{ {
int rc; int rc;
BUILD_BUG_ON(sizeof(struct nd_smart_payload) != 128);
BUILD_BUG_ON(sizeof(struct nd_smart_threshold_payload) != 8);
rc = bus_register(&nvdimm_bus_type); rc = bus_register(&nvdimm_bus_type);
if (rc) if (rc)
return rc; return rc;

View File

@ -2408,7 +2408,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
static struct device **create_namespaces(struct nd_region *nd_region) static struct device **create_namespaces(struct nd_region *nd_region)
{ {
struct nd_mapping *nd_mapping = &nd_region->mapping[0]; struct nd_mapping *nd_mapping;
struct device **devs; struct device **devs;
int i; int i;

View File

@ -368,15 +368,14 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
void nvdimm_badblocks_populate(struct nd_region *nd_region, void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res); struct badblocks *bb, const struct resource *res);
#if IS_ENABLED(CONFIG_ND_CLAIM) #if IS_ENABLED(CONFIG_ND_CLAIM)
struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
struct resource *res, struct vmem_altmap *altmap);
int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
#else #else
static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
struct resource *res, struct vmem_altmap *altmap) struct dev_pagemap *pgmap)
{ {
return ERR_PTR(-ENXIO); return -ENXIO;
} }
static inline int devm_nsio_enable(struct device *dev, static inline int devm_nsio_enable(struct device *dev,
struct nd_namespace_io *nsio) struct nd_namespace_io *nsio)

View File

@ -542,9 +542,10 @@ static unsigned long init_altmap_reserve(resource_size_t base)
return reserve; return reserve;
} }
static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
struct resource *res, struct vmem_altmap *altmap)
{ {
struct resource *res = &pgmap->res;
struct vmem_altmap *altmap = &pgmap->altmap;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = le64_to_cpu(pfn_sb->dataoff); u64 offset = le64_to_cpu(pfn_sb->dataoff);
u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
@ -561,11 +562,13 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
res->start += start_pad; res->start += start_pad;
res->end -= end_trunc; res->end -= end_trunc;
pgmap->type = MEMORY_DEVICE_HOST;
if (nd_pfn->mode == PFN_MODE_RAM) { if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < SZ_8K) if (offset < SZ_8K)
return ERR_PTR(-EINVAL); return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
altmap = NULL; pgmap->altmap_valid = false;
} else if (nd_pfn->mode == PFN_MODE_PMEM) { } else if (nd_pfn->mode == PFN_MODE_PMEM) {
nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res) nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- offset) / PAGE_SIZE); - offset) / PAGE_SIZE);
@ -577,10 +580,11 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
memcpy(altmap, &__altmap, sizeof(*altmap)); memcpy(altmap, &__altmap, sizeof(*altmap));
altmap->free = PHYS_PFN(offset - SZ_8K); altmap->free = PHYS_PFN(offset - SZ_8K);
altmap->alloc = 0; altmap->alloc = 0;
pgmap->altmap_valid = true;
} else } else
return ERR_PTR(-ENXIO); return -ENXIO;
return altmap; return 0;
} }
static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys) static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
@ -708,19 +712,18 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
* Determine the effective resource range and vmem_altmap from an nd_pfn * Determine the effective resource range and vmem_altmap from an nd_pfn
* instance. * instance.
*/ */
struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
struct resource *res, struct vmem_altmap *altmap)
{ {
int rc; int rc;
if (!nd_pfn->uuid || !nd_pfn->ndns) if (!nd_pfn->uuid || !nd_pfn->ndns)
return ERR_PTR(-ENODEV); return -ENODEV;
rc = nd_pfn_init(nd_pfn); rc = nd_pfn_init(nd_pfn);
if (rc) if (rc)
return ERR_PTR(rc); return rc;
/* we need a valid pfn_sb before we can init a vmem_altmap */ /* we need a valid pfn_sb before we can init a dev_pagemap */
return __nvdimm_setup_pfn(nd_pfn, res, altmap); return __nvdimm_setup_pfn(nd_pfn, pgmap);
} }
EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);

View File

@ -35,6 +35,7 @@
#include "pmem.h" #include "pmem.h"
#include "pfn.h" #include "pfn.h"
#include "nd.h" #include "nd.h"
#include "nd-core.h"
static struct device *to_dev(struct pmem_device *pmem) static struct device *to_dev(struct pmem_device *pmem)
{ {
@ -298,34 +299,34 @@ static int pmem_attach_disk(struct device *dev,
{ {
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_region *nd_region = to_nd_region(dev->parent);
struct vmem_altmap __altmap, *altmap = NULL;
int nid = dev_to_node(dev), fua, wbc; int nid = dev_to_node(dev), fua, wbc;
struct resource *res = &nsio->res; struct resource *res = &nsio->res;
struct resource bb_res;
struct nd_pfn *nd_pfn = NULL; struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev; struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb; struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem; struct pmem_device *pmem;
struct resource pfn_res;
struct request_queue *q; struct request_queue *q;
struct device *gendev; struct device *gendev;
struct gendisk *disk; struct gendisk *disk;
void *addr; void *addr;
int rc;
/* while nsio_rw_bytes is active, parse a pfn info block if present */
if (is_nd_pfn(dev)) {
nd_pfn = to_nd_pfn(dev);
altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap);
if (IS_ERR(altmap))
return PTR_ERR(altmap);
}
/* we're attaching a block device, disable raw namespace access */
devm_nsio_disable(dev, nsio);
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem) if (!pmem)
return -ENOMEM; return -ENOMEM;
/* while nsio_rw_bytes is active, parse a pfn info block if present */
if (is_nd_pfn(dev)) {
nd_pfn = to_nd_pfn(dev);
rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap);
if (rc)
return rc;
}
/* we're attaching a block device, disable raw namespace access */
devm_nsio_disable(dev, nsio);
dev_set_drvdata(dev, pmem); dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start; pmem->phys_addr = res->start;
pmem->size = resource_size(res); pmem->size = resource_size(res);
@ -334,7 +335,8 @@ static int pmem_attach_disk(struct device *dev,
dev_warn(dev, "unable to guarantee persistence of writes\n"); dev_warn(dev, "unable to guarantee persistence of writes\n");
fua = 0; fua = 0;
} }
wbc = nvdimm_has_cache(nd_region); wbc = nvdimm_has_cache(nd_region) &&
!test_bit(ND_REGION_PERSIST_CACHE, &nd_region->flags);
if (!devm_request_mem_region(dev, res->start, resource_size(res), if (!devm_request_mem_region(dev, res->start, resource_size(res),
dev_name(&ndns->dev))) { dev_name(&ndns->dev))) {
@ -350,19 +352,22 @@ static int pmem_attach_disk(struct device *dev,
return -ENOMEM; return -ENOMEM;
pmem->pfn_flags = PFN_DEV; pmem->pfn_flags = PFN_DEV;
pmem->pgmap.ref = &q->q_usage_counter;
if (is_nd_pfn(dev)) { if (is_nd_pfn(dev)) {
addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, addr = devm_memremap_pages(dev, &pmem->pgmap);
altmap);
pfn_sb = nd_pfn->pfn_sb; pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); pmem->pfn_pad = resource_size(res) -
resource_size(&pmem->pgmap.res);
pmem->pfn_flags |= PFN_MAP; pmem->pfn_flags |= PFN_MAP;
res = &pfn_res; /* for badblocks populate */ memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
res->start += pmem->data_offset; bb_res.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) { } else if (pmem_should_map_pages(dev)) {
addr = devm_memremap_pages(dev, &nsio->res, memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
&q->q_usage_counter, NULL); pmem->pgmap.altmap_valid = false;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP; pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
} else } else
addr = devm_memremap(dev, pmem->phys_addr, addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM); pmem->size, ARCH_MEMREMAP_PMEM);
@ -401,7 +406,7 @@ static int pmem_attach_disk(struct device *dev,
/ 512); / 512);
if (devm_init_badblocks(dev, &pmem->bb)) if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM; return -ENOMEM;
nvdimm_badblocks_populate(nd_region, &pmem->bb, res); nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
disk->bb = &pmem->bb; disk->bb = &pmem->bb;
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops); dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);

View File

@ -22,6 +22,7 @@ struct pmem_device {
struct badblocks bb; struct badblocks bb;
struct dax_device *dax_dev; struct dax_device *dax_dev;
struct gendisk *disk; struct gendisk *disk;
struct dev_pagemap pgmap;
}; };
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,

View File

@ -528,6 +528,18 @@ static ssize_t resource_show(struct device *dev,
} }
static DEVICE_ATTR_RO(resource); static DEVICE_ATTR_RO(resource);
static ssize_t persistence_domain_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
unsigned long flags = nd_region->flags;
return sprintf(buf, "%s%s\n",
flags & BIT(ND_REGION_PERSIST_CACHE) ? "cpu_cache " : "",
flags & BIT(ND_REGION_PERSIST_MEMCTRL) ? "memory_controller " : "");
}
static DEVICE_ATTR_RO(persistence_domain);
static struct attribute *nd_region_attributes[] = { static struct attribute *nd_region_attributes[] = {
&dev_attr_size.attr, &dev_attr_size.attr,
&dev_attr_nstype.attr, &dev_attr_nstype.attr,
@ -543,6 +555,7 @@ static struct attribute *nd_region_attributes[] = {
&dev_attr_init_namespaces.attr, &dev_attr_init_namespaces.attr,
&dev_attr_badblocks.attr, &dev_attr_badblocks.attr,
&dev_attr_resource.attr, &dev_attr_resource.attr,
&dev_attr_persistence_domain.attr,
NULL, NULL,
}; };

View File

@ -16,6 +16,7 @@ config BLK_DEV_XPRAM
config DCSSBLK config DCSSBLK
def_tristate m def_tristate m
select DAX select DAX
select FS_DAX_LIMITED
prompt "DCSSBLK support" prompt "DCSSBLK support"
depends on S390 && BLOCK depends on S390 && BLOCK
help help

View File

@ -916,7 +916,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff,
dev_sz = dev_info->end - dev_info->start + 1; dev_sz = dev_info->end - dev_info->start + 1;
*kaddr = (void *) dev_info->start + offset; *kaddr = (void *) dev_info->start + offset;
*pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset), PFN_DEV); *pfn = __pfn_to_pfn_t(PFN_DOWN(dev_info->start + offset),
PFN_DEV|PFN_SPECIAL);
return (dev_sz - offset) / PAGE_SIZE; return (dev_sz - offset) / PAGE_SIZE;
} }

View File

@ -58,6 +58,13 @@ config FS_DAX_PMD
depends on ZONE_DEVICE depends on ZONE_DEVICE
depends on TRANSPARENT_HUGEPAGE depends on TRANSPARENT_HUGEPAGE
# Selected by DAX drivers that do not expect filesystem DAX to support
# get_user_pages() of DAX mappings. I.e. "limited" indicates no support
# for fork() of processes with MAP_SHARED mappings or support for
# direct-I/O to a DAX mapping.
config FS_DAX_LIMITED
bool
endif # BLOCK endif # BLOCK
# Posix ACL utility routines # Posix ACL utility routines

View File

@ -962,8 +962,11 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
if (sbi->s_mount_opt & EXT2_MOUNT_DAX) { if (sbi->s_mount_opt & EXT2_MOUNT_DAX) {
err = bdev_dax_supported(sb, blocksize); err = bdev_dax_supported(sb, blocksize);
if (err) if (err) {
goto failed_mount; ext2_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
sbi->s_mount_opt &= ~EXT2_MOUNT_DAX;
}
} }
/* If the blocksize doesn't match, re-read the thing.. */ /* If the blocksize doesn't match, re-read the thing.. */

View File

@ -3712,11 +3712,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (ext4_has_feature_inline_data(sb)) { if (ext4_has_feature_inline_data(sb)) {
ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem" ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
" that may contain inline data"); " that may contain inline data");
goto failed_mount; sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
} }
err = bdev_dax_supported(sb, blocksize); err = bdev_dax_supported(sb, blocksize);
if (err) if (err) {
goto failed_mount; ext4_msg(sb, KERN_ERR,
"DAX unsupported by block device. Turning off DAX.");
sbi->s_mount_opt &= ~EXT4_MOUNT_DAX;
}
} }
if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) { if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {

View File

@ -47,6 +47,17 @@ enum {
/* region flag indicating to direct-map persistent memory by default */ /* region flag indicating to direct-map persistent memory by default */
ND_REGION_PAGEMAP = 0, ND_REGION_PAGEMAP = 0,
/*
* Platform ensures entire CPU store data path is flushed to pmem on
* system power loss.
*/
ND_REGION_PERSIST_CACHE = 1,
/*
* Platform provides mechanisms to automatically flush outstanding
* write data from memory controler to pmem on system power loss.
* (ADR)
*/
ND_REGION_PERSIST_MEMCTRL = 2,
/* mark newly adjusted resources as requiring a label update */ /* mark newly adjusted resources as requiring a label update */
DPA_RESOURCE_ADJUSTED = 1 << 0, DPA_RESOURCE_ADJUSTED = 1 << 0,

View File

@ -13,6 +13,7 @@ struct pglist_data;
struct mem_section; struct mem_section;
struct memory_block; struct memory_block;
struct resource; struct resource;
struct vmem_altmap;
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
/* /*
@ -125,24 +126,26 @@ static inline bool movable_node_is_enabled(void)
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
extern bool is_pageblock_removable_nolock(struct page *page); extern bool is_pageblock_removable_nolock(struct page *page);
extern int arch_remove_memory(u64 start, u64 size); extern int arch_remove_memory(u64 start, u64 size,
struct vmem_altmap *altmap);
extern int __remove_pages(struct zone *zone, unsigned long start_pfn, extern int __remove_pages(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages); unsigned long nr_pages, struct vmem_altmap *altmap);
#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTREMOVE */
/* reasonably generic interface to expand the physical pages */ /* reasonably generic interface to expand the physical pages */
extern int __add_pages(int nid, unsigned long start_pfn, extern int __add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
unsigned long nr_pages, bool want_memblock); struct vmem_altmap *altmap, bool want_memblock);
#ifndef CONFIG_ARCH_HAS_ADD_PAGES #ifndef CONFIG_ARCH_HAS_ADD_PAGES
static inline int add_pages(int nid, unsigned long start_pfn, static inline int add_pages(int nid, unsigned long start_pfn,
unsigned long nr_pages, bool want_memblock) unsigned long nr_pages, struct vmem_altmap *altmap,
bool want_memblock)
{ {
return __add_pages(nid, start_pfn, nr_pages, want_memblock); return __add_pages(nid, start_pfn, nr_pages, altmap, want_memblock);
} }
#else /* ARCH_HAS_ADD_PAGES */ #else /* ARCH_HAS_ADD_PAGES */
int add_pages(int nid, unsigned long start_pfn, int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
unsigned long nr_pages, bool want_memblock); struct vmem_altmap *altmap, bool want_memblock);
#endif /* ARCH_HAS_ADD_PAGES */ #endif /* ARCH_HAS_ADD_PAGES */
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
@ -318,15 +321,17 @@ extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn,
void *arg, int (*func)(struct memory_block *, void *)); void *arg, int (*func)(struct memory_block *, void *));
extern int add_memory(int nid, u64 start, u64 size); extern int add_memory(int nid, u64 start, u64 size);
extern int add_memory_resource(int nid, struct resource *resource, bool online); extern int add_memory_resource(int nid, struct resource *resource, bool online);
extern int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock); extern int arch_add_memory(int nid, u64 start, u64 size,
struct vmem_altmap *altmap, bool want_memblock);
extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, extern void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages); unsigned long nr_pages, struct vmem_altmap *altmap);
extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
extern bool is_memblock_offlined(struct memory_block *mem); extern bool is_memblock_offlined(struct memory_block *mem);
extern void remove_memory(int nid, u64 start, u64 size); extern void remove_memory(int nid, u64 start, u64 size);
extern int sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn); extern int sparse_add_one_section(struct pglist_data *pgdat,
unsigned long start_pfn, struct vmem_altmap *altmap);
extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset); unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum); unsigned long pnum);
extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages, extern bool allow_online_pfn_range(int nid, unsigned long pfn, unsigned long nr_pages,

View File

@ -26,18 +26,6 @@ struct vmem_altmap {
unsigned long alloc; unsigned long alloc;
}; };
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
#ifdef CONFIG_ZONE_DEVICE
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
#else
static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
{
return NULL;
}
#endif
/* /*
* Specialize ZONE_DEVICE memory into multiple types each having differents * Specialize ZONE_DEVICE memory into multiple types each having differents
* usage. * usage.
@ -125,8 +113,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
struct dev_pagemap { struct dev_pagemap {
dev_page_fault_t page_fault; dev_page_fault_t page_fault;
dev_page_free_t page_free; dev_page_free_t page_free;
struct vmem_altmap *altmap; struct vmem_altmap altmap;
const struct resource *res; bool altmap_valid;
struct resource res;
struct percpu_ref *ref; struct percpu_ref *ref;
struct device *dev; struct device *dev;
void *data; void *data;
@ -134,15 +123,17 @@ struct dev_pagemap {
}; };
#ifdef CONFIG_ZONE_DEVICE #ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct resource *res, void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct percpu_ref *ref, struct vmem_altmap *altmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *find_dev_pagemap(resource_size_t phys); struct dev_pagemap *pgmap);
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
static inline bool is_zone_device_page(const struct page *page); static inline bool is_zone_device_page(const struct page *page);
#else #else
static inline void *devm_memremap_pages(struct device *dev, static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref, struct dev_pagemap *pgmap)
struct vmem_altmap *altmap)
{ {
/* /*
* Fail attempts to call devm_memremap_pages() without * Fail attempts to call devm_memremap_pages() without
@ -153,11 +144,22 @@ static inline void *devm_memremap_pages(struct device *dev,
return ERR_PTR(-ENXIO); return ERR_PTR(-ENXIO);
} }
static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys) static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{ {
return NULL; return NULL;
} }
#endif
static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
return 0;
}
static inline void vmem_altmap_free(struct vmem_altmap *altmap,
unsigned long nr_pfns)
{
}
#endif /* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC) #if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
static inline bool is_device_private_page(const struct page *page) static inline bool is_device_private_page(const struct page *page)
@ -173,39 +175,6 @@ static inline bool is_device_public_page(const struct page *page)
} }
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */ #endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
/**
* get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
* @pfn: page frame number to lookup page_map
* @pgmap: optional known pgmap that already has a reference
*
* @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
* same mapping.
*/
static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
const struct resource *res = pgmap ? pgmap->res : NULL;
resource_size_t phys = PFN_PHYS(pfn);
/*
* In the cached case we're already holding a live reference so
* we can simply do a blind increment
*/
if (res && phys >= res->start && phys <= res->end) {
percpu_ref_get(pgmap->ref);
return pgmap;
}
/* fall back to slow path lookup */
rcu_read_lock();
pgmap = find_dev_pagemap(phys);
if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
pgmap = NULL;
rcu_read_unlock();
return pgmap;
}
static inline void put_dev_pagemap(struct dev_pagemap *pgmap) static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
{ {
if (pgmap) if (pgmap)

View File

@ -2075,8 +2075,8 @@ static inline void zero_resv_unavail(void) {}
#endif #endif
extern void set_dma_reserve(unsigned long new_dma_reserve); extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_zone(unsigned long, int, unsigned long, extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
unsigned long, enum memmap_context); enum memmap_context, struct vmem_altmap *);
extern void setup_per_zone_wmarks(void); extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void); extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void); extern void mem_init(void);
@ -2544,7 +2544,8 @@ void sparse_mem_maps_populate_node(struct page **map_map,
unsigned long map_count, unsigned long map_count,
int nodeid); int nodeid);
struct page *sparse_mem_map_populate(unsigned long pnum, int nid); struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
struct vmem_altmap *altmap);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node); p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node); pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
@ -2552,20 +2553,17 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node); void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap; struct vmem_altmap;
void *__vmemmap_alloc_block_buf(unsigned long size, int node, void *vmemmap_alloc_block_buf(unsigned long size, int node);
struct vmem_altmap *altmap); void *altmap_alloc_block_buf(unsigned long size, struct vmem_altmap *altmap);
static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
{
return __vmemmap_alloc_block_buf(size, node, NULL);
}
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end, int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node); int node);
int vmemmap_populate(unsigned long start, unsigned long end, int node); int vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap);
void vmemmap_populate_print_last(void); void vmemmap_populate_print_last(void);
#ifdef CONFIG_MEMORY_HOTPLUG #ifdef CONFIG_MEMORY_HOTPLUG
void vmemmap_free(unsigned long start, unsigned long end); void vmemmap_free(unsigned long start, unsigned long end,
struct vmem_altmap *altmap);
#endif #endif
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
unsigned long nr_pages); unsigned long nr_pages);

View File

@ -15,8 +15,10 @@
#define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2)) #define PFN_SG_LAST (1ULL << (BITS_PER_LONG_LONG - 2))
#define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3)) #define PFN_DEV (1ULL << (BITS_PER_LONG_LONG - 3))
#define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4)) #define PFN_MAP (1ULL << (BITS_PER_LONG_LONG - 4))
#define PFN_SPECIAL (1ULL << (BITS_PER_LONG_LONG - 5))
#define PFN_FLAGS_TRACE \ #define PFN_FLAGS_TRACE \
{ PFN_SPECIAL, "SPECIAL" }, \
{ PFN_SG_CHAIN, "SG_CHAIN" }, \ { PFN_SG_CHAIN, "SG_CHAIN" }, \
{ PFN_SG_LAST, "SG_LAST" }, \ { PFN_SG_LAST, "SG_LAST" }, \
{ PFN_DEV, "DEV" }, \ { PFN_DEV, "DEV" }, \
@ -120,4 +122,15 @@ pud_t pud_mkdevmap(pud_t pud);
#endif #endif
#endif /* __HAVE_ARCH_PTE_DEVMAP */ #endif /* __HAVE_ARCH_PTE_DEVMAP */
#ifdef __HAVE_ARCH_PTE_SPECIAL
static inline bool pfn_t_special(pfn_t pfn)
{
return (pfn.val & PFN_SPECIAL) == PFN_SPECIAL;
}
#else
static inline bool pfn_t_special(pfn_t pfn)
{
return false;
}
#endif /* __HAVE_ARCH_PTE_SPECIAL */
#endif /* _LINUX_PFN_T_H_ */ #endif /* _LINUX_PFN_T_H_ */

View File

@ -15,54 +15,6 @@
#include <linux/types.h> #include <linux/types.h>
struct nd_cmd_smart {
__u32 status;
__u8 data[128];
} __packed;
#define ND_SMART_HEALTH_VALID (1 << 0)
#define ND_SMART_SPARES_VALID (1 << 1)
#define ND_SMART_USED_VALID (1 << 2)
#define ND_SMART_TEMP_VALID (1 << 3)
#define ND_SMART_CTEMP_VALID (1 << 4)
#define ND_SMART_ALARM_VALID (1 << 9)
#define ND_SMART_SHUTDOWN_VALID (1 << 10)
#define ND_SMART_VENDOR_VALID (1 << 11)
#define ND_SMART_SPARE_TRIP (1 << 0)
#define ND_SMART_TEMP_TRIP (1 << 1)
#define ND_SMART_CTEMP_TRIP (1 << 2)
#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0)
#define ND_SMART_CRITICAL_HEALTH (1 << 1)
#define ND_SMART_FATAL_HEALTH (1 << 2)
struct nd_smart_payload {
__u32 flags;
__u8 reserved0[4];
__u8 health;
__u8 spares;
__u8 life_used;
__u8 alarm_flags;
__u16 temperature;
__u16 ctrl_temperature;
__u8 reserved1[15];
__u8 shutdown_state;
__u32 vendor_size;
__u8 vendor_data[92];
} __packed;
struct nd_cmd_smart_threshold {
__u32 status;
__u8 data[8];
} __packed;
struct nd_smart_threshold_payload {
__u8 alarm_control;
__u8 reserved0;
__u16 temperature;
__u8 spares;
__u8 reserved[3];
} __packed;
struct nd_cmd_dimm_flags { struct nd_cmd_dimm_flags {
__u32 status; __u32 status;
__u32 flags; __u32 flags;
@ -211,12 +163,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
#define ND_IOCTL 'N' #define ND_IOCTL 'N'
#define ND_IOCTL_SMART _IOWR(ND_IOCTL, ND_CMD_SMART,\
struct nd_cmd_smart)
#define ND_IOCTL_SMART_THRESHOLD _IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\
struct nd_cmd_smart_threshold)
#define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\ #define ND_IOCTL_DIMM_FLAGS _IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\
struct nd_cmd_dimm_flags) struct nd_cmd_dimm_flags)
@ -263,7 +209,7 @@ enum nd_driver_flags {
}; };
enum { enum {
ND_MIN_NAMESPACE_SIZE = 0x00400000, ND_MIN_NAMESPACE_SIZE = PAGE_SIZE,
}; };
enum ars_masks { enum ars_masks {

View File

@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1) #define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
#define SECTION_SIZE (1UL << PA_SECTION_SHIFT) #define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
struct page_map {
struct resource res;
struct percpu_ref *ref;
struct dev_pagemap pgmap;
struct vmem_altmap altmap;
};
static unsigned long order_at(struct resource *res, unsigned long pgoff) static unsigned long order_at(struct resource *res, unsigned long pgoff)
{ {
unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff; unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
@ -248,34 +241,36 @@ int device_private_entry_fault(struct vm_area_struct *vma,
EXPORT_SYMBOL(device_private_entry_fault); EXPORT_SYMBOL(device_private_entry_fault);
#endif /* CONFIG_DEVICE_PRIVATE */ #endif /* CONFIG_DEVICE_PRIVATE */
static void pgmap_radix_release(struct resource *res) static void pgmap_radix_release(struct resource *res, unsigned long end_pgoff)
{ {
unsigned long pgoff, order; unsigned long pgoff, order;
mutex_lock(&pgmap_lock); mutex_lock(&pgmap_lock);
foreach_order_pgoff(res, order, pgoff) foreach_order_pgoff(res, order, pgoff) {
if (pgoff >= end_pgoff)
break;
radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff); radix_tree_delete(&pgmap_radix, PHYS_PFN(res->start) + pgoff);
}
mutex_unlock(&pgmap_lock); mutex_unlock(&pgmap_lock);
synchronize_rcu(); synchronize_rcu();
} }
static unsigned long pfn_first(struct page_map *page_map) static unsigned long pfn_first(struct dev_pagemap *pgmap)
{ {
struct dev_pagemap *pgmap = &page_map->pgmap; const struct resource *res = &pgmap->res;
const struct resource *res = &page_map->res; struct vmem_altmap *altmap = &pgmap->altmap;
struct vmem_altmap *altmap = pgmap->altmap;
unsigned long pfn; unsigned long pfn;
pfn = res->start >> PAGE_SHIFT; pfn = res->start >> PAGE_SHIFT;
if (altmap) if (pgmap->altmap_valid)
pfn += vmem_altmap_offset(altmap); pfn += vmem_altmap_offset(altmap);
return pfn; return pfn;
} }
static unsigned long pfn_end(struct page_map *page_map) static unsigned long pfn_end(struct dev_pagemap *pgmap)
{ {
const struct resource *res = &page_map->res; const struct resource *res = &pgmap->res;
return (res->start + resource_size(res)) >> PAGE_SHIFT; return (res->start + resource_size(res)) >> PAGE_SHIFT;
} }
@ -283,15 +278,15 @@ static unsigned long pfn_end(struct page_map *page_map)
#define for_each_device_pfn(pfn, map) \ #define for_each_device_pfn(pfn, map) \
for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++) for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
static void devm_memremap_pages_release(struct device *dev, void *data) static void devm_memremap_pages_release(void *data)
{ {
struct page_map *page_map = data; struct dev_pagemap *pgmap = data;
struct resource *res = &page_map->res; struct device *dev = pgmap->dev;
struct resource *res = &pgmap->res;
resource_size_t align_start, align_size; resource_size_t align_start, align_size;
struct dev_pagemap *pgmap = &page_map->pgmap;
unsigned long pfn; unsigned long pfn;
for_each_device_pfn(pfn, page_map) for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn)); put_page(pfn_to_page(pfn));
if (percpu_ref_tryget_live(pgmap->ref)) { if (percpu_ref_tryget_live(pgmap->ref)) {
@ -301,56 +296,51 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
/* pages are dead and unused, undo the arch mapping */ /* pages are dead and unused, undo the arch mapping */
align_start = res->start & ~(SECTION_SIZE - 1); align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(resource_size(res), SECTION_SIZE); align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
- align_start;
mem_hotplug_begin(); mem_hotplug_begin();
arch_remove_memory(align_start, align_size); arch_remove_memory(align_start, align_size, pgmap->altmap_valid ?
&pgmap->altmap : NULL);
mem_hotplug_done(); mem_hotplug_done();
untrack_pfn(NULL, PHYS_PFN(align_start), align_size); untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
pgmap_radix_release(res); pgmap_radix_release(res, -1);
dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc, dev_WARN_ONCE(dev, pgmap->altmap.alloc,
"%s: failed to free all reserved pages\n", __func__); "%s: failed to free all reserved pages\n", __func__);
}
/* assumes rcu_read_lock() held at entry */
struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
{
struct page_map *page_map;
WARN_ON_ONCE(!rcu_read_lock_held());
page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
return page_map ? &page_map->pgmap : NULL;
} }
/** /**
* devm_memremap_pages - remap and provide memmap backing for the given resource * devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res * @dev: hosting device for @res
* @res: "host memory" address range * @pgmap: pointer to a struct dev_pgmap
* @ref: a live per-cpu reference count
* @altmap: optional descriptor for allocating the memmap from @res
* *
* Notes: * Notes:
* 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
* (or devm release event). The expected order of events is that @ref has * by the caller before passing it to this function
*
* 2/ The altmap field may optionally be initialized, in which case altmap_valid
* must be set to true
*
* 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
* time (or devm release event). The expected order of events is that ref has
* been through percpu_ref_kill() before devm_memremap_pages_release(). The * been through percpu_ref_kill() before devm_memremap_pages_release(). The
* wait for the completion of all references being dropped and * wait for the completion of all references being dropped and
* percpu_ref_exit() must occur after devm_memremap_pages_release(). * percpu_ref_exit() must occur after devm_memremap_pages_release().
* *
* 2/ @res is expected to be a host memory range that could feasibly be * 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but * treated as a "System RAM" range, i.e. not a device mmio range, but
* this is not enforced. * this is not enforced.
*/ */
void *devm_memremap_pages(struct device *dev, struct resource *res, void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
struct percpu_ref *ref, struct vmem_altmap *altmap)
{ {
resource_size_t align_start, align_size, align_end; resource_size_t align_start, align_size, align_end;
struct vmem_altmap *altmap = pgmap->altmap_valid ?
&pgmap->altmap : NULL;
unsigned long pfn, pgoff, order; unsigned long pfn, pgoff, order;
pgprot_t pgprot = PAGE_KERNEL; pgprot_t pgprot = PAGE_KERNEL;
struct dev_pagemap *pgmap;
struct page_map *page_map;
int error, nid, is_ram, i = 0; int error, nid, is_ram, i = 0;
struct resource *res = &pgmap->res;
align_start = res->start & ~(SECTION_SIZE - 1); align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE) align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@ -367,47 +357,18 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (is_ram == REGION_INTERSECTS) if (is_ram == REGION_INTERSECTS)
return __va(res->start); return __va(res->start);
if (!ref) if (!pgmap->ref)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
page_map = devres_alloc_node(devm_memremap_pages_release,
sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
if (!page_map)
return ERR_PTR(-ENOMEM);
pgmap = &page_map->pgmap;
memcpy(&page_map->res, res, sizeof(*res));
pgmap->dev = dev; pgmap->dev = dev;
if (altmap) {
memcpy(&page_map->altmap, altmap, sizeof(*altmap));
pgmap->altmap = &page_map->altmap;
}
pgmap->ref = ref;
pgmap->res = &page_map->res;
pgmap->type = MEMORY_DEVICE_HOST;
pgmap->page_fault = NULL;
pgmap->page_free = NULL;
pgmap->data = NULL;
mutex_lock(&pgmap_lock); mutex_lock(&pgmap_lock);
error = 0; error = 0;
align_end = align_start + align_size - 1; align_end = align_start + align_size - 1;
foreach_order_pgoff(res, order, pgoff) { foreach_order_pgoff(res, order, pgoff) {
struct dev_pagemap *dup;
rcu_read_lock();
dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
rcu_read_unlock();
if (dup) {
dev_err(dev, "%s: %pr collides with mapping for %s\n",
__func__, res, dev_name(dup->dev));
error = -EBUSY;
break;
}
error = __radix_tree_insert(&pgmap_radix, error = __radix_tree_insert(&pgmap_radix,
PHYS_PFN(res->start) + pgoff, order, page_map); PHYS_PFN(res->start) + pgoff, order, pgmap);
if (error) { if (error) {
dev_err(dev, "%s: failed: %d\n", __func__, error); dev_err(dev, "%s: failed: %d\n", __func__, error);
break; break;
@ -427,16 +388,16 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
goto err_pfn_remap; goto err_pfn_remap;
mem_hotplug_begin(); mem_hotplug_begin();
error = arch_add_memory(nid, align_start, align_size, false); error = arch_add_memory(nid, align_start, align_size, altmap, false);
if (!error) if (!error)
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
align_start >> PAGE_SHIFT, align_start >> PAGE_SHIFT,
align_size >> PAGE_SHIFT); align_size >> PAGE_SHIFT, altmap);
mem_hotplug_done(); mem_hotplug_done();
if (error) if (error)
goto err_add_memory; goto err_add_memory;
for_each_device_pfn(pfn, page_map) { for_each_device_pfn(pfn, pgmap) {
struct page *page = pfn_to_page(pfn); struct page *page = pfn_to_page(pfn);
/* /*
@ -447,19 +408,21 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
*/ */
list_del(&page->lru); list_del(&page->lru);
page->pgmap = pgmap; page->pgmap = pgmap;
percpu_ref_get(ref); percpu_ref_get(pgmap->ref);
if (!(++i % 1024)) if (!(++i % 1024))
cond_resched(); cond_resched();
} }
devres_add(dev, page_map);
devm_add_action(dev, devm_memremap_pages_release, pgmap);
return __va(res->start); return __va(res->start);
err_add_memory: err_add_memory:
untrack_pfn(NULL, PHYS_PFN(align_start), align_size); untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
err_pfn_remap: err_pfn_remap:
err_radix: err_radix:
pgmap_radix_release(res); pgmap_radix_release(res, pgoff);
devres_free(page_map); devres_free(pgmap);
return ERR_PTR(error); return ERR_PTR(error);
} }
EXPORT_SYMBOL(devm_memremap_pages); EXPORT_SYMBOL(devm_memremap_pages);
@ -475,34 +438,39 @@ void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
altmap->alloc -= nr_pfns; altmap->alloc -= nr_pfns;
} }
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start) /**
* get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
* @pfn: page frame number to lookup page_map
* @pgmap: optional known pgmap that already has a reference
*
* If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
* is non-NULL but does not cover @pfn the reference to it will be released.
*/
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{ {
/* resource_size_t phys = PFN_PHYS(pfn);
* 'memmap_start' is the virtual address for the first "struct
* page" in this range of the vmemmap array. In the case of
* CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple
* pointer arithmetic, so we can perform this to_vmem_altmap()
* conversion without concern for the initialization state of
* the struct page fields.
*/
struct page *page = (struct page *) memmap_start;
struct dev_pagemap *pgmap;
/* /*
* Unconditionally retrieve a dev_pagemap associated with the * In the cached case we're already holding a live reference.
* given physical address, this is only for use in the
* arch_{add|remove}_memory() for setting up and tearing down
* the memmap.
*/ */
if (pgmap) {
if (phys >= pgmap->res.start && phys <= pgmap->res.end)
return pgmap;
put_dev_pagemap(pgmap);
}
/* fall back to slow path lookup */
rcu_read_lock(); rcu_read_lock();
pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page))); pgmap = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
pgmap = NULL;
rcu_read_unlock(); rcu_read_unlock();
return pgmap ? pgmap->altmap : NULL; return pgmap;
} }
#endif /* CONFIG_ZONE_DEVICE */ #endif /* CONFIG_ZONE_DEVICE */
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC) #if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
void put_zone_device_private_or_public_page(struct page *page) void put_zone_device_private_or_public_page(struct page *page)
{ {

View File

@ -1394,7 +1394,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON_PAGE(compound_head(page) != head, page); VM_BUG_ON_PAGE(compound_head(page) != head, page);
put_dev_pagemap(pgmap);
SetPageReferenced(page); SetPageReferenced(page);
pages[*nr] = page; pages[*nr] = page;
(*nr)++; (*nr)++;
@ -1404,6 +1403,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
ret = 1; ret = 1;
pte_unmap: pte_unmap:
if (pgmap)
put_dev_pagemap(pgmap);
pte_unmap(ptem); pte_unmap(ptem);
return ret; return ret;
} }
@ -1443,10 +1444,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
SetPageReferenced(page); SetPageReferenced(page);
pages[*nr] = page; pages[*nr] = page;
get_page(page); get_page(page);
put_dev_pagemap(pgmap);
(*nr)++; (*nr)++;
pfn++; pfn++;
} while (addr += PAGE_SIZE, addr != end); } while (addr += PAGE_SIZE, addr != end);
if (pgmap)
put_dev_pagemap(pgmap);
return 1; return 1;
} }

View File

@ -836,10 +836,10 @@ static void hmm_devmem_release(struct device *dev, void *data)
mem_hotplug_begin(); mem_hotplug_begin();
if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) if (resource->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY)
__remove_pages(zone, start_pfn, npages); __remove_pages(zone, start_pfn, npages, NULL);
else else
arch_remove_memory(start_pfn << PAGE_SHIFT, arch_remove_memory(start_pfn << PAGE_SHIFT,
npages << PAGE_SHIFT); npages << PAGE_SHIFT, NULL);
mem_hotplug_done(); mem_hotplug_done();
hmm_devmem_radix_release(resource); hmm_devmem_radix_release(resource);
@ -880,7 +880,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
else else
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE; devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
devmem->pagemap.res = devmem->resource; devmem->pagemap.res = *devmem->resource;
devmem->pagemap.page_fault = hmm_devmem_fault; devmem->pagemap.page_fault = hmm_devmem_fault;
devmem->pagemap.page_free = hmm_devmem_free; devmem->pagemap.page_free = hmm_devmem_free;
devmem->pagemap.dev = devmem->device; devmem->pagemap.dev = devmem->device;
@ -929,17 +929,18 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
* want the linear mapping and thus use arch_add_memory(). * want the linear mapping and thus use arch_add_memory().
*/ */
if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC) if (devmem->pagemap.type == MEMORY_DEVICE_PUBLIC)
ret = arch_add_memory(nid, align_start, align_size, false); ret = arch_add_memory(nid, align_start, align_size, NULL,
false);
else else
ret = add_pages(nid, align_start >> PAGE_SHIFT, ret = add_pages(nid, align_start >> PAGE_SHIFT,
align_size >> PAGE_SHIFT, false); align_size >> PAGE_SHIFT, NULL, false);
if (ret) { if (ret) {
mem_hotplug_done(); mem_hotplug_done();
goto error_add_memory; goto error_add_memory;
} }
move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE], move_pfn_range_to_zone(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
align_start >> PAGE_SHIFT, align_start >> PAGE_SHIFT,
align_size >> PAGE_SHIFT); align_size >> PAGE_SHIFT, NULL);
mem_hotplug_done(); mem_hotplug_done();
for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) { for (pfn = devmem->pfn_first; pfn < devmem->pfn_last; pfn++) {

View File

@ -1904,12 +1904,26 @@ int vm_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,
} }
EXPORT_SYMBOL(vm_insert_pfn_prot); EXPORT_SYMBOL(vm_insert_pfn_prot);
static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn)
{
/* these checks mirror the abort conditions in vm_normal_page */
if (vma->vm_flags & VM_MIXEDMAP)
return true;
if (pfn_t_devmap(pfn))
return true;
if (pfn_t_special(pfn))
return true;
if (is_zero_pfn(pfn_t_to_pfn(pfn)))
return true;
return false;
}
static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr, static int __vm_insert_mixed(struct vm_area_struct *vma, unsigned long addr,
pfn_t pfn, bool mkwrite) pfn_t pfn, bool mkwrite)
{ {
pgprot_t pgprot = vma->vm_page_prot; pgprot_t pgprot = vma->vm_page_prot;
BUG_ON(!(vma->vm_flags & VM_MIXEDMAP)); BUG_ON(!vm_mixed_ok(vma, pfn));
if (addr < vma->vm_start || addr >= vma->vm_end) if (addr < vma->vm_start || addr >= vma->vm_end)
return -EFAULT; return -EFAULT;

View File

@ -247,7 +247,7 @@ void __init register_page_bootmem_info_node(struct pglist_data *pgdat)
#endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */ #endif /* CONFIG_HAVE_BOOTMEM_INFO_NODE */
static int __meminit __add_section(int nid, unsigned long phys_start_pfn, static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
bool want_memblock) struct vmem_altmap *altmap, bool want_memblock)
{ {
int ret; int ret;
int i; int i;
@ -255,7 +255,7 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
if (pfn_valid(phys_start_pfn)) if (pfn_valid(phys_start_pfn))
return -EEXIST; return -EEXIST;
ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn); ret = sparse_add_one_section(NODE_DATA(nid), phys_start_pfn, altmap);
if (ret < 0) if (ret < 0)
return ret; return ret;
@ -289,18 +289,17 @@ static int __meminit __add_section(int nid, unsigned long phys_start_pfn,
* add the new pages. * add the new pages.
*/ */
int __ref __add_pages(int nid, unsigned long phys_start_pfn, int __ref __add_pages(int nid, unsigned long phys_start_pfn,
unsigned long nr_pages, bool want_memblock) unsigned long nr_pages, struct vmem_altmap *altmap,
bool want_memblock)
{ {
unsigned long i; unsigned long i;
int err = 0; int err = 0;
int start_sec, end_sec; int start_sec, end_sec;
struct vmem_altmap *altmap;
/* during initialize mem_map, align hot-added range to section */ /* during initialize mem_map, align hot-added range to section */
start_sec = pfn_to_section_nr(phys_start_pfn); start_sec = pfn_to_section_nr(phys_start_pfn);
end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1); end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
if (altmap) { if (altmap) {
/* /*
* Validate altmap is within bounds of the total request * Validate altmap is within bounds of the total request
@ -315,7 +314,8 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
} }
for (i = start_sec; i <= end_sec; i++) { for (i = start_sec; i <= end_sec; i++) {
err = __add_section(nid, section_nr_to_pfn(i), want_memblock); err = __add_section(nid, section_nr_to_pfn(i), altmap,
want_memblock);
/* /*
* EEXIST is finally dealt with by ioresource collision * EEXIST is finally dealt with by ioresource collision
@ -331,7 +331,6 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
out: out:
return err; return err;
} }
EXPORT_SYMBOL_GPL(__add_pages);
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
/* find the smallest valid pfn in the range [start_pfn, end_pfn) */ /* find the smallest valid pfn in the range [start_pfn, end_pfn) */
@ -534,7 +533,7 @@ static void __remove_zone(struct zone *zone, unsigned long start_pfn)
} }
static int __remove_section(struct zone *zone, struct mem_section *ms, static int __remove_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset) unsigned long map_offset, struct vmem_altmap *altmap)
{ {
unsigned long start_pfn; unsigned long start_pfn;
int scn_nr; int scn_nr;
@ -551,7 +550,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
start_pfn = section_nr_to_pfn((unsigned long)scn_nr); start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
__remove_zone(zone, start_pfn); __remove_zone(zone, start_pfn);
sparse_remove_one_section(zone, ms, map_offset); sparse_remove_one_section(zone, ms, map_offset, altmap);
return 0; return 0;
} }
@ -567,7 +566,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
* calling offline_pages(). * calling offline_pages().
*/ */
int __remove_pages(struct zone *zone, unsigned long phys_start_pfn, int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
unsigned long nr_pages) unsigned long nr_pages, struct vmem_altmap *altmap)
{ {
unsigned long i; unsigned long i;
unsigned long map_offset = 0; unsigned long map_offset = 0;
@ -575,10 +574,6 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
/* In the ZONE_DEVICE case device driver owns the memory region */ /* In the ZONE_DEVICE case device driver owns the memory region */
if (is_dev_zone(zone)) { if (is_dev_zone(zone)) {
struct page *page = pfn_to_page(phys_start_pfn);
struct vmem_altmap *altmap;
altmap = to_vmem_altmap((unsigned long) page);
if (altmap) if (altmap)
map_offset = vmem_altmap_offset(altmap); map_offset = vmem_altmap_offset(altmap);
} else { } else {
@ -609,7 +604,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
for (i = 0; i < sections_to_remove; i++) { for (i = 0; i < sections_to_remove; i++) {
unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION; unsigned long pfn = phys_start_pfn + i*PAGES_PER_SECTION;
ret = __remove_section(zone, __pfn_to_section(pfn), map_offset); ret = __remove_section(zone, __pfn_to_section(pfn), map_offset,
altmap);
map_offset = 0; map_offset = 0;
if (ret) if (ret)
break; break;
@ -799,8 +795,8 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn; pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
} }
void __ref move_pfn_range_to_zone(struct zone *zone, void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long start_pfn, unsigned long nr_pages) unsigned long nr_pages, struct vmem_altmap *altmap)
{ {
struct pglist_data *pgdat = zone->zone_pgdat; struct pglist_data *pgdat = zone->zone_pgdat;
int nid = pgdat->node_id; int nid = pgdat->node_id;
@ -825,7 +821,8 @@ void __ref move_pfn_range_to_zone(struct zone *zone,
* expects the zone spans the pfn range. All the pages in the range * expects the zone spans the pfn range. All the pages in the range
* are reserved so nobody should be touching them so we should be safe * are reserved so nobody should be touching them so we should be safe
*/ */
memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, MEMMAP_HOTPLUG); memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
MEMMAP_HOTPLUG, altmap);
set_zone_contiguous(zone); set_zone_contiguous(zone);
} }
@ -897,7 +894,7 @@ static struct zone * __meminit move_pfn_range(int online_type, int nid,
struct zone *zone; struct zone *zone;
zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
move_pfn_range_to_zone(zone, start_pfn, nr_pages); move_pfn_range_to_zone(zone, start_pfn, nr_pages, NULL);
return zone; return zone;
} }
@ -1146,7 +1143,7 @@ int __ref add_memory_resource(int nid, struct resource *res, bool online)
} }
/* call arch's memory hotadd */ /* call arch's memory hotadd */
ret = arch_add_memory(nid, start, size, true); ret = arch_add_memory(nid, start, size, NULL, true);
if (ret < 0) if (ret < 0)
goto error; goto error;
@ -1888,7 +1885,7 @@ void __ref remove_memory(int nid, u64 start, u64 size)
memblock_free(start, size); memblock_free(start, size);
memblock_remove(start, size); memblock_remove(start, size);
arch_remove_memory(start, size); arch_remove_memory(start, size, NULL);
try_offline_node(nid); try_offline_node(nid);

View File

@ -5321,9 +5321,9 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
* done. Non-atomic initialization, single-pass. * done. Non-atomic initialization, single-pass.
*/ */
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context) unsigned long start_pfn, enum memmap_context context,
struct vmem_altmap *altmap)
{ {
struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
unsigned long end_pfn = start_pfn + size; unsigned long end_pfn = start_pfn + size;
pg_data_t *pgdat = NODE_DATA(nid); pg_data_t *pgdat = NODE_DATA(nid);
unsigned long pfn; unsigned long pfn;
@ -5429,7 +5429,7 @@ static void __meminit zone_init_free_lists(struct zone *zone)
#ifndef __HAVE_ARCH_MEMMAP_INIT #ifndef __HAVE_ARCH_MEMMAP_INIT
#define memmap_init(size, nid, zone, start_pfn) \ #define memmap_init(size, nid, zone, start_pfn) \
memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY) memmap_init_zone((size), (nid), (zone), (start_pfn), MEMMAP_EARLY, NULL)
#endif #endif
static int zone_batchsize(struct zone *zone) static int zone_batchsize(struct zone *zone)

View File

@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
} }
/* need to make sure size is all the same during early stage */ /* need to make sure size is all the same during early stage */
static void * __meminit alloc_block_buf(unsigned long size, int node) void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
{ {
void *ptr; void *ptr;
@ -107,33 +107,16 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
} }
/** /**
* vmem_altmap_alloc - allocate pages from the vmem_altmap reservation * altmap_alloc_block_buf - allocate pages from the device page map
* @altmap - reserved page pool for the allocation * @altmap: device page map
* @nr_pfns - size (in pages) of the allocation * @size: size (in bytes) of the allocation
* *
* Allocations are aligned to the size of the request * Allocations are aligned to the size of the request.
*/ */
static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap, void * __meminit altmap_alloc_block_buf(unsigned long size,
unsigned long nr_pfns)
{
unsigned long pfn = vmem_altmap_next_pfn(altmap);
unsigned long nr_align;
nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
nr_align = ALIGN(pfn, nr_align) - pfn;
if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
return ULONG_MAX;
altmap->alloc += nr_pfns;
altmap->align += nr_align;
return pfn + nr_align;
}
static void * __meminit altmap_alloc_block_buf(unsigned long size,
struct vmem_altmap *altmap) struct vmem_altmap *altmap)
{ {
unsigned long pfn, nr_pfns; unsigned long pfn, nr_pfns, nr_align;
void *ptr;
if (size & ~PAGE_MASK) { if (size & ~PAGE_MASK) {
pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n", pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
@ -141,25 +124,20 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size,
return NULL; return NULL;
} }
pfn = vmem_altmap_next_pfn(altmap);
nr_pfns = size >> PAGE_SHIFT; nr_pfns = size >> PAGE_SHIFT;
pfn = vmem_altmap_alloc(altmap, nr_pfns); nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
if (pfn < ULONG_MAX) nr_align = ALIGN(pfn, nr_align) - pfn;
ptr = __va(__pfn_to_phys(pfn)); if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
else return NULL;
ptr = NULL;
altmap->alloc += nr_pfns;
altmap->align += nr_align;
pfn += nr_align;
pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n", pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
__func__, pfn, altmap->alloc, altmap->align, nr_pfns); __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
return __va(__pfn_to_phys(pfn));
return ptr;
}
/* need to make sure size is all the same during early stage */
void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node,
struct vmem_altmap *altmap)
{
if (altmap)
return altmap_alloc_block_buf(size, altmap);
return alloc_block_buf(size, node);
} }
void __meminit vmemmap_verify(pte_t *pte, int node, void __meminit vmemmap_verify(pte_t *pte, int node,
@ -178,7 +156,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
pte_t *pte = pte_offset_kernel(pmd, addr); pte_t *pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte)) { if (pte_none(*pte)) {
pte_t entry; pte_t entry;
void *p = alloc_block_buf(PAGE_SIZE, node); void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
if (!p) if (!p)
return NULL; return NULL;
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
@ -278,7 +256,8 @@ int __meminit vmemmap_populate_basepages(unsigned long start,
return 0; return 0;
} }
struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid,
struct vmem_altmap *altmap)
{ {
unsigned long start; unsigned long start;
unsigned long end; unsigned long end;
@ -288,7 +267,7 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid)
start = (unsigned long)map; start = (unsigned long)map;
end = (unsigned long)(map + PAGES_PER_SECTION); end = (unsigned long)(map + PAGES_PER_SECTION);
if (vmemmap_populate(start, end, nid)) if (vmemmap_populate(start, end, nid, altmap))
return NULL; return NULL;
return map; return map;
@ -318,7 +297,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
if (!present_section_nr(pnum)) if (!present_section_nr(pnum))
continue; continue;
map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
if (map_map[pnum]) if (map_map[pnum])
continue; continue;
ms = __nr_to_section(pnum); ms = __nr_to_section(pnum);

View File

@ -421,7 +421,8 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
} }
#ifndef CONFIG_SPARSEMEM_VMEMMAP #ifndef CONFIG_SPARSEMEM_VMEMMAP
struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
struct vmem_altmap *altmap)
{ {
struct page *map; struct page *map;
unsigned long size; unsigned long size;
@ -476,7 +477,7 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
if (!present_section_nr(pnum)) if (!present_section_nr(pnum))
continue; continue;
map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); map_map[pnum] = sparse_mem_map_populate(pnum, nodeid, NULL);
if (map_map[pnum]) if (map_map[pnum])
continue; continue;
ms = __nr_to_section(pnum); ms = __nr_to_section(pnum);
@ -504,7 +505,7 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum)
struct mem_section *ms = __nr_to_section(pnum); struct mem_section *ms = __nr_to_section(pnum);
int nid = sparse_early_nid(ms); int nid = sparse_early_nid(ms);
map = sparse_mem_map_populate(pnum, nid); map = sparse_mem_map_populate(pnum, nid, NULL);
if (map) if (map)
return map; return map;
@ -682,17 +683,19 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
#endif #endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP #ifdef CONFIG_SPARSEMEM_VMEMMAP
static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
struct vmem_altmap *altmap)
{ {
/* This will make the necessary allocations eventually. */ /* This will make the necessary allocations eventually. */
return sparse_mem_map_populate(pnum, nid); return sparse_mem_map_populate(pnum, nid, altmap);
} }
static void __kfree_section_memmap(struct page *memmap) static void __kfree_section_memmap(struct page *memmap,
struct vmem_altmap *altmap)
{ {
unsigned long start = (unsigned long)memmap; unsigned long start = (unsigned long)memmap;
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
vmemmap_free(start, end); vmemmap_free(start, end, altmap);
} }
#ifdef CONFIG_MEMORY_HOTREMOVE #ifdef CONFIG_MEMORY_HOTREMOVE
static void free_map_bootmem(struct page *memmap) static void free_map_bootmem(struct page *memmap)
@ -700,7 +703,7 @@ static void free_map_bootmem(struct page *memmap)
unsigned long start = (unsigned long)memmap; unsigned long start = (unsigned long)memmap;
unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION); unsigned long end = (unsigned long)(memmap + PAGES_PER_SECTION);
vmemmap_free(start, end); vmemmap_free(start, end, NULL);
} }
#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTREMOVE */
#else #else
@ -725,12 +728,14 @@ got_map_ptr:
return ret; return ret;
} }
static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid) static inline struct page *kmalloc_section_memmap(unsigned long pnum, int nid,
struct vmem_altmap *altmap)
{ {
return __kmalloc_section_memmap(); return __kmalloc_section_memmap();
} }
static void __kfree_section_memmap(struct page *memmap) static void __kfree_section_memmap(struct page *memmap,
struct vmem_altmap *altmap)
{ {
if (is_vmalloc_addr(memmap)) if (is_vmalloc_addr(memmap))
vfree(memmap); vfree(memmap);
@ -777,7 +782,8 @@ static void free_map_bootmem(struct page *memmap)
* set. If this is <=0, then that means that the passed-in * set. If this is <=0, then that means that the passed-in
* map was not consumed and must be freed. * map was not consumed and must be freed.
*/ */
int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long start_pfn) int __meminit sparse_add_one_section(struct pglist_data *pgdat,
unsigned long start_pfn, struct vmem_altmap *altmap)
{ {
unsigned long section_nr = pfn_to_section_nr(start_pfn); unsigned long section_nr = pfn_to_section_nr(start_pfn);
struct mem_section *ms; struct mem_section *ms;
@ -793,12 +799,12 @@ int __meminit sparse_add_one_section(struct pglist_data *pgdat, unsigned long st
ret = sparse_index_init(section_nr, pgdat->node_id); ret = sparse_index_init(section_nr, pgdat->node_id);
if (ret < 0 && ret != -EEXIST) if (ret < 0 && ret != -EEXIST)
return ret; return ret;
memmap = kmalloc_section_memmap(section_nr, pgdat->node_id); memmap = kmalloc_section_memmap(section_nr, pgdat->node_id, altmap);
if (!memmap) if (!memmap)
return -ENOMEM; return -ENOMEM;
usemap = __kmalloc_section_usemap(); usemap = __kmalloc_section_usemap();
if (!usemap) { if (!usemap) {
__kfree_section_memmap(memmap); __kfree_section_memmap(memmap, altmap);
return -ENOMEM; return -ENOMEM;
} }
@ -820,7 +826,7 @@ out:
pgdat_resize_unlock(pgdat, &flags); pgdat_resize_unlock(pgdat, &flags);
if (ret <= 0) { if (ret <= 0) {
kfree(usemap); kfree(usemap);
__kfree_section_memmap(memmap); __kfree_section_memmap(memmap, altmap);
} }
return ret; return ret;
} }
@ -847,7 +853,8 @@ static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
} }
#endif #endif
static void free_section_usemap(struct page *memmap, unsigned long *usemap) static void free_section_usemap(struct page *memmap, unsigned long *usemap,
struct vmem_altmap *altmap)
{ {
struct page *usemap_page; struct page *usemap_page;
@ -861,7 +868,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
if (PageSlab(usemap_page) || PageCompound(usemap_page)) { if (PageSlab(usemap_page) || PageCompound(usemap_page)) {
kfree(usemap); kfree(usemap);
if (memmap) if (memmap)
__kfree_section_memmap(memmap); __kfree_section_memmap(memmap, altmap);
return; return;
} }
@ -875,7 +882,7 @@ static void free_section_usemap(struct page *memmap, unsigned long *usemap)
} }
void sparse_remove_one_section(struct zone *zone, struct mem_section *ms, void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
unsigned long map_offset) unsigned long map_offset, struct vmem_altmap *altmap)
{ {
struct page *memmap = NULL; struct page *memmap = NULL;
unsigned long *usemap = NULL, flags; unsigned long *usemap = NULL, flags;
@ -893,7 +900,7 @@ void sparse_remove_one_section(struct zone *zone, struct mem_section *ms,
clear_hwpoisoned_pages(memmap + map_offset, clear_hwpoisoned_pages(memmap + map_offset,
PAGES_PER_SECTION - map_offset); PAGES_PER_SECTION - map_offset);
free_section_usemap(memmap, usemap); free_section_usemap(memmap, usemap, altmap);
} }
#endif /* CONFIG_MEMORY_HOTREMOVE */ #endif /* CONFIG_MEMORY_HOTREMOVE */
#endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* CONFIG_MEMORY_HOTPLUG */

View File

@ -37,10 +37,12 @@ obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o
nfit-y := $(ACPI_SRC)/core.o nfit-y := $(ACPI_SRC)/core.o
nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o nfit-$(CONFIG_X86_MCE) += $(ACPI_SRC)/mce.o
nfit-y += acpi_nfit_test.o
nfit-y += config_check.o nfit-y += config_check.o
nd_pmem-y := $(NVDIMM_SRC)/pmem.o nd_pmem-y := $(NVDIMM_SRC)/pmem.o
nd_pmem-y += pmem-dax.o nd_pmem-y += pmem-dax.o
nd_pmem-y += pmem_test.o
nd_pmem-y += config_check.o nd_pmem-y += config_check.o
nd_btt-y := $(NVDIMM_SRC)/btt.o nd_btt-y := $(NVDIMM_SRC)/btt.o
@ -57,6 +59,7 @@ dax-y += config_check.o
device_dax-y := $(DAX_SRC)/device.o device_dax-y := $(DAX_SRC)/device.o
device_dax-y += dax-dev.o device_dax-y += dax-dev.o
device_dax-y += device_dax_test.o
device_dax-y += config_check.o device_dax-y += config_check.o
dax_pmem-y := $(DAX_SRC)/pmem.o dax_pmem-y := $(DAX_SRC)/pmem.o
@ -75,6 +78,7 @@ libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o
libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
libnvdimm-y += libnvdimm_test.o
libnvdimm-y += config_check.o libnvdimm-y += config_check.o
obj-m += test/ obj-m += test/

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2018 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(acpi_nfit);

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2018 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(device_dax);

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2018 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(libnvdimm);

View File

@ -0,0 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2018 Intel Corporation. All rights reserved.
#include <linux/module.h>
#include <linux/printk.h>
#include "watermark.h"
nfit_test_watermark(pmem);

View File

@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
} }
EXPORT_SYMBOL(__wrap_devm_memremap); EXPORT_SYMBOL(__wrap_devm_memremap);
void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res, void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
struct percpu_ref *ref, struct vmem_altmap *altmap)
{ {
resource_size_t offset = res->start; resource_size_t offset = pgmap->res.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset); struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res) if (nfit_res)
return nfit_res->buf + offset - nfit_res->res.start; return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap_pages(dev, res, ref, altmap); return devm_memremap_pages(dev, pgmap);
} }
EXPORT_SYMBOL(__wrap_devm_memremap_pages); EXPORT_SYMBOL(__wrap_devm_memremap_pages);

View File

@ -27,6 +27,7 @@
#include <nfit.h> #include <nfit.h>
#include <nd.h> #include <nd.h>
#include "nfit_test.h" #include "nfit_test.h"
#include "../watermark.h"
/* /*
* Generate an NFIT table to describe the following topology: * Generate an NFIT table to describe the following topology:
@ -137,6 +138,14 @@ static u32 handle[] = {
static unsigned long dimm_fail_cmd_flags[NUM_DCR]; static unsigned long dimm_fail_cmd_flags[NUM_DCR];
struct nfit_test_fw {
enum intel_fw_update_state state;
u32 context;
u64 version;
u32 size_received;
u64 end_time;
};
struct nfit_test { struct nfit_test {
struct acpi_nfit_desc acpi_desc; struct acpi_nfit_desc acpi_desc;
struct platform_device pdev; struct platform_device pdev;
@ -168,8 +177,11 @@ struct nfit_test {
spinlock_t lock; spinlock_t lock;
} ars_state; } ars_state;
struct device *dimm_dev[NUM_DCR]; struct device *dimm_dev[NUM_DCR];
struct nd_intel_smart *smart;
struct nd_intel_smart_threshold *smart_threshold;
struct badrange badrange; struct badrange badrange;
struct work_struct work; struct work_struct work;
struct nfit_test_fw *fw;
}; };
static struct workqueue_struct *nfit_wq; static struct workqueue_struct *nfit_wq;
@ -181,6 +193,226 @@ static struct nfit_test *to_nfit_test(struct device *dev)
return container_of(pdev, struct nfit_test, pdev); return container_of(pdev, struct nfit_test, pdev);
} }
static int nd_intel_test_get_fw_info(struct nfit_test *t,
struct nd_intel_fw_info *nd_cmd, unsigned int buf_len,
int idx)
{
struct device *dev = &t->pdev.dev;
struct nfit_test_fw *fw = &t->fw[idx];
dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p, buf_len: %u, idx: %d\n",
__func__, t, nd_cmd, buf_len, idx);
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
nd_cmd->status = 0;
nd_cmd->storage_size = INTEL_FW_STORAGE_SIZE;
nd_cmd->max_send_len = INTEL_FW_MAX_SEND_LEN;
nd_cmd->query_interval = INTEL_FW_QUERY_INTERVAL;
nd_cmd->max_query_time = INTEL_FW_QUERY_MAX_TIME;
nd_cmd->update_cap = 0;
nd_cmd->fis_version = INTEL_FW_FIS_VERSION;
nd_cmd->run_version = 0;
nd_cmd->updated_version = fw->version;
return 0;
}
static int nd_intel_test_start_update(struct nfit_test *t,
struct nd_intel_fw_start *nd_cmd, unsigned int buf_len,
int idx)
{
struct device *dev = &t->pdev.dev;
struct nfit_test_fw *fw = &t->fw[idx];
dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
__func__, t, nd_cmd, buf_len, idx);
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
if (fw->state != FW_STATE_NEW) {
/* extended status, FW update in progress */
nd_cmd->status = 0x10007;
return 0;
}
fw->state = FW_STATE_IN_PROGRESS;
fw->context++;
fw->size_received = 0;
nd_cmd->status = 0;
nd_cmd->context = fw->context;
dev_dbg(dev, "%s: context issued: %#x\n", __func__, nd_cmd->context);
return 0;
}
static int nd_intel_test_send_data(struct nfit_test *t,
struct nd_intel_fw_send_data *nd_cmd, unsigned int buf_len,
int idx)
{
struct device *dev = &t->pdev.dev;
struct nfit_test_fw *fw = &t->fw[idx];
u32 *status = (u32 *)&nd_cmd->data[nd_cmd->length];
dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
__func__, t, nd_cmd, buf_len, idx);
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
dev_dbg(dev, "%s: cmd->status: %#x\n", __func__, *status);
dev_dbg(dev, "%s: cmd->data[0]: %#x\n", __func__, nd_cmd->data[0]);
dev_dbg(dev, "%s: cmd->data[%u]: %#x\n", __func__, nd_cmd->length-1,
nd_cmd->data[nd_cmd->length-1]);
if (fw->state != FW_STATE_IN_PROGRESS) {
dev_dbg(dev, "%s: not in IN_PROGRESS state\n", __func__);
*status = 0x5;
return 0;
}
if (nd_cmd->context != fw->context) {
dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
__func__, nd_cmd->context, fw->context);
*status = 0x10007;
return 0;
}
/*
* check offset + len > size of fw storage
* check length is > max send length
*/
if (nd_cmd->offset + nd_cmd->length > INTEL_FW_STORAGE_SIZE ||
nd_cmd->length > INTEL_FW_MAX_SEND_LEN) {
*status = 0x3;
dev_dbg(dev, "%s: buffer boundary violation\n", __func__);
return 0;
}
fw->size_received += nd_cmd->length;
dev_dbg(dev, "%s: copying %u bytes, %u bytes so far\n",
__func__, nd_cmd->length, fw->size_received);
*status = 0;
return 0;
}
static int nd_intel_test_finish_fw(struct nfit_test *t,
struct nd_intel_fw_finish_update *nd_cmd,
unsigned int buf_len, int idx)
{
struct device *dev = &t->pdev.dev;
struct nfit_test_fw *fw = &t->fw[idx];
dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
__func__, t, nd_cmd, buf_len, idx);
if (fw->state == FW_STATE_UPDATED) {
/* update already done, need cold boot */
nd_cmd->status = 0x20007;
return 0;
}
dev_dbg(dev, "%s: context: %#x ctrl_flags: %#x\n",
__func__, nd_cmd->context, nd_cmd->ctrl_flags);
switch (nd_cmd->ctrl_flags) {
case 0: /* finish */
if (nd_cmd->context != fw->context) {
dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
__func__, nd_cmd->context,
fw->context);
nd_cmd->status = 0x10007;
return 0;
}
nd_cmd->status = 0;
fw->state = FW_STATE_VERIFY;
/* set 1 second of time for firmware "update" */
fw->end_time = jiffies + HZ;
break;
case 1: /* abort */
fw->size_received = 0;
/* successfully aborted status */
nd_cmd->status = 0x40007;
fw->state = FW_STATE_NEW;
dev_dbg(dev, "%s: abort successful\n", __func__);
break;
default: /* bad control flag */
dev_warn(dev, "%s: unknown control flag: %#x\n",
__func__, nd_cmd->ctrl_flags);
return -EINVAL;
}
return 0;
}
static int nd_intel_test_finish_query(struct nfit_test *t,
struct nd_intel_fw_finish_query *nd_cmd,
unsigned int buf_len, int idx)
{
struct device *dev = &t->pdev.dev;
struct nfit_test_fw *fw = &t->fw[idx];
dev_dbg(dev, "%s(nfit_test: %p nd_cmd: %p buf_len: %u idx: %d)\n",
__func__, t, nd_cmd, buf_len, idx);
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
if (nd_cmd->context != fw->context) {
dev_dbg(dev, "%s: incorrect context: in: %#x correct: %#x\n",
__func__, nd_cmd->context, fw->context);
nd_cmd->status = 0x10007;
return 0;
}
dev_dbg(dev, "%s context: %#x\n", __func__, nd_cmd->context);
switch (fw->state) {
case FW_STATE_NEW:
nd_cmd->updated_fw_rev = 0;
nd_cmd->status = 0;
dev_dbg(dev, "%s: new state\n", __func__);
break;
case FW_STATE_IN_PROGRESS:
/* sequencing error */
nd_cmd->status = 0x40007;
nd_cmd->updated_fw_rev = 0;
dev_dbg(dev, "%s: sequence error\n", __func__);
break;
case FW_STATE_VERIFY:
if (time_is_after_jiffies64(fw->end_time)) {
nd_cmd->updated_fw_rev = 0;
nd_cmd->status = 0x20007;
dev_dbg(dev, "%s: still verifying\n", __func__);
break;
}
dev_dbg(dev, "%s: transition out verify\n", __func__);
fw->state = FW_STATE_UPDATED;
/* we are going to fall through if it's "done" */
case FW_STATE_UPDATED:
nd_cmd->status = 0;
/* bogus test version */
fw->version = nd_cmd->updated_fw_rev =
INTEL_FW_FAKE_VERSION;
dev_dbg(dev, "%s: updated\n", __func__);
break;
default: /* we should never get here */
return -EINVAL;
}
return 0;
}
static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd, static int nfit_test_cmd_get_config_size(struct nd_cmd_get_config_size *nd_cmd,
unsigned int buf_len) unsigned int buf_len)
{ {
@ -440,39 +672,66 @@ static int nfit_test_cmd_translate_spa(struct nvdimm_bus *bus,
return 0; return 0;
} }
static int nfit_test_cmd_smart(struct nd_cmd_smart *smart, unsigned int buf_len) static int nfit_test_cmd_smart(struct nd_intel_smart *smart, unsigned int buf_len,
struct nd_intel_smart *smart_data)
{ {
static const struct nd_smart_payload smart_data = {
.flags = ND_SMART_HEALTH_VALID | ND_SMART_TEMP_VALID
| ND_SMART_SPARES_VALID | ND_SMART_ALARM_VALID
| ND_SMART_USED_VALID | ND_SMART_SHUTDOWN_VALID,
.health = ND_SMART_NON_CRITICAL_HEALTH,
.temperature = 23 * 16,
.spares = 75,
.alarm_flags = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
.life_used = 5,
.shutdown_state = 0,
.vendor_size = 0,
};
if (buf_len < sizeof(*smart)) if (buf_len < sizeof(*smart))
return -EINVAL; return -EINVAL;
memcpy(smart->data, &smart_data, sizeof(smart_data)); memcpy(smart, smart_data, sizeof(*smart));
return 0; return 0;
} }
static int nfit_test_cmd_smart_threshold(struct nd_cmd_smart_threshold *smart_t, static int nfit_test_cmd_smart_threshold(
unsigned int buf_len) struct nd_intel_smart_threshold *out,
unsigned int buf_len,
struct nd_intel_smart_threshold *smart_t)
{ {
static const struct nd_smart_threshold_payload smart_t_data = {
.alarm_control = ND_SMART_SPARE_TRIP | ND_SMART_TEMP_TRIP,
.temperature = 40 * 16,
.spares = 5,
};
if (buf_len < sizeof(*smart_t)) if (buf_len < sizeof(*smart_t))
return -EINVAL; return -EINVAL;
memcpy(smart_t->data, &smart_t_data, sizeof(smart_t_data)); memcpy(out, smart_t, sizeof(*smart_t));
return 0;
}
static void smart_notify(struct device *bus_dev,
struct device *dimm_dev, struct nd_intel_smart *smart,
struct nd_intel_smart_threshold *thresh)
{
dev_dbg(dimm_dev, "%s: alarm: %#x spares: %d (%d) mtemp: %d (%d) ctemp: %d (%d)\n",
__func__, thresh->alarm_control, thresh->spares,
smart->spares, thresh->media_temperature,
smart->media_temperature, thresh->ctrl_temperature,
smart->ctrl_temperature);
if (((thresh->alarm_control & ND_INTEL_SMART_SPARE_TRIP)
&& smart->spares
<= thresh->spares)
|| ((thresh->alarm_control & ND_INTEL_SMART_TEMP_TRIP)
&& smart->media_temperature
>= thresh->media_temperature)
|| ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
&& smart->ctrl_temperature
>= thresh->ctrl_temperature)) {
device_lock(bus_dev);
__acpi_nvdimm_notify(dimm_dev, 0x81);
device_unlock(bus_dev);
}
}
static int nfit_test_cmd_smart_set_threshold(
struct nd_intel_smart_set_threshold *in,
unsigned int buf_len,
struct nd_intel_smart_threshold *thresh,
struct nd_intel_smart *smart,
struct device *bus_dev, struct device *dimm_dev)
{
unsigned int size;
size = sizeof(*in) - 4;
if (buf_len < size)
return -EINVAL;
memcpy(thresh->data, in, size);
in->status = 0;
smart_notify(bus_dev, dimm_dev, smart, thresh);
return 0; return 0;
} }
@ -563,6 +822,52 @@ static int nfit_test_cmd_ars_inject_status(struct nfit_test *t,
return 0; return 0;
} }
static int nd_intel_test_cmd_set_lss_status(struct nfit_test *t,
struct nd_intel_lss *nd_cmd, unsigned int buf_len)
{
struct device *dev = &t->pdev.dev;
if (buf_len < sizeof(*nd_cmd))
return -EINVAL;
switch (nd_cmd->enable) {
case 0:
nd_cmd->status = 0;
dev_dbg(dev, "%s: Latch System Shutdown Status disabled\n",
__func__);
break;
case 1:
nd_cmd->status = 0;
dev_dbg(dev, "%s: Latch System Shutdown Status enabled\n",
__func__);
break;
default:
dev_warn(dev, "Unknown enable value: %#x\n", nd_cmd->enable);
nd_cmd->status = 0x3;
break;
}
return 0;
}
static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
{
int i;
/* lookup per-dimm data */
for (i = 0; i < ARRAY_SIZE(handle); i++)
if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i])
break;
if (i >= ARRAY_SIZE(handle))
return -ENXIO;
if ((1 << func) & dimm_fail_cmd_flags[i])
return -EIO;
return i;
}
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf, struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc) unsigned int buf_len, int *cmd_rc)
@ -591,22 +896,57 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
func = call_pkg->nd_command; func = call_pkg->nd_command;
if (call_pkg->nd_family != nfit_mem->family) if (call_pkg->nd_family != nfit_mem->family)
return -ENOTTY; return -ENOTTY;
i = get_dimm(nfit_mem, func);
if (i < 0)
return i;
switch (func) {
case ND_INTEL_ENABLE_LSS_STATUS:
return nd_intel_test_cmd_set_lss_status(t,
buf, buf_len);
case ND_INTEL_FW_GET_INFO:
return nd_intel_test_get_fw_info(t, buf,
buf_len, i - t->dcr_idx);
case ND_INTEL_FW_START_UPDATE:
return nd_intel_test_start_update(t, buf,
buf_len, i - t->dcr_idx);
case ND_INTEL_FW_SEND_DATA:
return nd_intel_test_send_data(t, buf,
buf_len, i - t->dcr_idx);
case ND_INTEL_FW_FINISH_UPDATE:
return nd_intel_test_finish_fw(t, buf,
buf_len, i - t->dcr_idx);
case ND_INTEL_FW_FINISH_QUERY:
return nd_intel_test_finish_query(t, buf,
buf_len, i - t->dcr_idx);
case ND_INTEL_SMART:
return nfit_test_cmd_smart(buf, buf_len,
&t->smart[i - t->dcr_idx]);
case ND_INTEL_SMART_THRESHOLD:
return nfit_test_cmd_smart_threshold(buf,
buf_len,
&t->smart_threshold[i -
t->dcr_idx]);
case ND_INTEL_SMART_SET_THRESHOLD:
return nfit_test_cmd_smart_set_threshold(buf,
buf_len,
&t->smart_threshold[i -
t->dcr_idx],
&t->smart[i - t->dcr_idx],
&t->pdev.dev, t->dimm_dev[i]);
default:
return -ENOTTY;
}
} }
if (!test_bit(cmd, &cmd_mask) if (!test_bit(cmd, &cmd_mask)
|| !test_bit(func, &nfit_mem->dsm_mask)) || !test_bit(func, &nfit_mem->dsm_mask))
return -ENOTTY; return -ENOTTY;
/* lookup label space for the given dimm */ i = get_dimm(nfit_mem, func);
for (i = 0; i < ARRAY_SIZE(handle); i++) if (i < 0)
if (__to_nfit_memdev(nfit_mem)->device_handle == return i;
handle[i])
break;
if (i >= ARRAY_SIZE(handle))
return -ENXIO;
if ((1 << func) & dimm_fail_cmd_flags[i])
return -EIO;
switch (func) { switch (func) {
case ND_CMD_GET_CONFIG_SIZE: case ND_CMD_GET_CONFIG_SIZE:
@ -620,15 +960,6 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
rc = nfit_test_cmd_set_config_data(buf, buf_len, rc = nfit_test_cmd_set_config_data(buf, buf_len,
t->label[i - t->dcr_idx]); t->label[i - t->dcr_idx]);
break; break;
case ND_CMD_SMART:
rc = nfit_test_cmd_smart(buf, buf_len);
break;
case ND_CMD_SMART_THRESHOLD:
rc = nfit_test_cmd_smart_threshold(buf, buf_len);
device_lock(&t->pdev.dev);
__acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
device_unlock(&t->pdev.dev);
break;
default: default:
return -ENOTTY; return -ENOTTY;
} }
@ -872,6 +1203,44 @@ static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
NULL, NULL,
}; };
static void smart_init(struct nfit_test *t)
{
int i;
const struct nd_intel_smart_threshold smart_t_data = {
.alarm_control = ND_INTEL_SMART_SPARE_TRIP
| ND_INTEL_SMART_TEMP_TRIP,
.media_temperature = 40 * 16,
.ctrl_temperature = 30 * 16,
.spares = 5,
};
const struct nd_intel_smart smart_data = {
.flags = ND_INTEL_SMART_HEALTH_VALID
| ND_INTEL_SMART_SPARES_VALID
| ND_INTEL_SMART_ALARM_VALID
| ND_INTEL_SMART_USED_VALID
| ND_INTEL_SMART_SHUTDOWN_VALID
| ND_INTEL_SMART_MTEMP_VALID,
.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
.media_temperature = 23 * 16,
.ctrl_temperature = 30 * 16,
.pmic_temperature = 40 * 16,
.spares = 75,
.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
| ND_INTEL_SMART_TEMP_TRIP,
.ait_status = 1,
.life_used = 5,
.shutdown_state = 0,
.vendor_size = 0,
.shutdown_count = 100,
};
for (i = 0; i < t->num_dcr; i++) {
memcpy(&t->smart[i], &smart_data, sizeof(smart_data));
memcpy(&t->smart_threshold[i], &smart_t_data,
sizeof(smart_t_data));
}
}
static int nfit_test0_alloc(struct nfit_test *t) static int nfit_test0_alloc(struct nfit_test *t)
{ {
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@ -881,7 +1250,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
window_size) * NUM_DCR window_size) * NUM_DCR
+ sizeof(struct acpi_nfit_data_region) * NUM_BDW + sizeof(struct acpi_nfit_data_region) * NUM_BDW
+ (sizeof(struct acpi_nfit_flush_address) + (sizeof(struct acpi_nfit_flush_address)
+ sizeof(u64) * NUM_HINTS) * NUM_DCR; + sizeof(u64) * NUM_HINTS) * NUM_DCR
+ sizeof(struct acpi_nfit_capabilities);
int i; int i;
t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
@ -939,6 +1309,7 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM; return -ENOMEM;
} }
smart_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state); return ars_state_init(&t->pdev.dev, &t->ars_state);
} }
@ -969,6 +1340,7 @@ static int nfit_test1_alloc(struct nfit_test *t)
if (!t->spa_set[1]) if (!t->spa_set[1])
return -ENOMEM; return -ENOMEM;
smart_init(t);
return ars_state_init(&t->pdev.dev, &t->ars_state); return ars_state_init(&t->pdev.dev, &t->ars_state);
} }
@ -993,6 +1365,7 @@ static void nfit_test0_setup(struct nfit_test *t)
struct acpi_nfit_control_region *dcr; struct acpi_nfit_control_region *dcr;
struct acpi_nfit_data_region *bdw; struct acpi_nfit_data_region *bdw;
struct acpi_nfit_flush_address *flush; struct acpi_nfit_flush_address *flush;
struct acpi_nfit_capabilities *pcap;
unsigned int offset, i; unsigned int offset, i;
/* /*
@ -1500,8 +1873,16 @@ static void nfit_test0_setup(struct nfit_test *t)
for (i = 0; i < NUM_HINTS; i++) for (i = 0; i < NUM_HINTS; i++)
flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64); flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
/* platform capabilities */
pcap = nfit_buf + offset + flush_hint_size * 4;
pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
pcap->header.length = sizeof(*pcap);
pcap->highest_capability = 1;
pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
ACPI_NFIT_CAPABILITY_MEM_FLUSH;
if (t->setup_hotplug) { if (t->setup_hotplug) {
offset = offset + flush_hint_size * 4; offset = offset + flush_hint_size * 4 + sizeof(*pcap);
/* dcr-descriptor4: blk */ /* dcr-descriptor4: blk */
dcr = nfit_buf + offset; dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@ -1642,17 +2023,24 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_SMART, &acpi_desc->dimm_cmd_force_en); set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en); set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_FINISH_UPDATE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_FINISH_QUERY, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
} }
static void nfit_test1_setup(struct nfit_test *t) static void nfit_test1_setup(struct nfit_test *t)
@ -1750,6 +2138,7 @@ static void nfit_test1_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_INTEL_ENABLE_LSS_STATUS, &acpi_desc->dimm_cmd_force_en);
} }
static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
@ -2054,10 +2443,18 @@ static int nfit_test_probe(struct platform_device *pdev)
sizeof(struct nfit_test_dcr *), GFP_KERNEL); sizeof(struct nfit_test_dcr *), GFP_KERNEL);
nfit_test->dcr_dma = devm_kcalloc(dev, num, nfit_test->dcr_dma = devm_kcalloc(dev, num,
sizeof(dma_addr_t), GFP_KERNEL); sizeof(dma_addr_t), GFP_KERNEL);
nfit_test->smart = devm_kcalloc(dev, num,
sizeof(struct nd_intel_smart), GFP_KERNEL);
nfit_test->smart_threshold = devm_kcalloc(dev, num,
sizeof(struct nd_intel_smart_threshold),
GFP_KERNEL);
nfit_test->fw = devm_kcalloc(dev, num,
sizeof(struct nfit_test_fw), GFP_KERNEL);
if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label
&& nfit_test->label_dma && nfit_test->dcr && nfit_test->label_dma && nfit_test->dcr
&& nfit_test->dcr_dma && nfit_test->flush && nfit_test->dcr_dma && nfit_test->flush
&& nfit_test->flush_dma) && nfit_test->flush_dma
&& nfit_test->fw)
/* pass */; /* pass */;
else else
return -ENOMEM; return -ENOMEM;
@ -2159,6 +2556,11 @@ static __init int nfit_test_init(void)
{ {
int rc, i; int rc, i;
pmem_test();
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm); nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
nfit_wq = create_singlethread_workqueue("nfit"); nfit_wq = create_singlethread_workqueue("nfit");

View File

@ -84,6 +84,140 @@ struct nd_cmd_ars_err_inj_stat {
} __packed record[0]; } __packed record[0];
} __packed; } __packed;
#define ND_INTEL_SMART 1
#define ND_INTEL_SMART_THRESHOLD 2
#define ND_INTEL_ENABLE_LSS_STATUS 10
#define ND_INTEL_FW_GET_INFO 12
#define ND_INTEL_FW_START_UPDATE 13
#define ND_INTEL_FW_SEND_DATA 14
#define ND_INTEL_FW_FINISH_UPDATE 15
#define ND_INTEL_FW_FINISH_QUERY 16
#define ND_INTEL_SMART_SET_THRESHOLD 17
#define ND_INTEL_SMART_HEALTH_VALID (1 << 0)
#define ND_INTEL_SMART_SPARES_VALID (1 << 1)
#define ND_INTEL_SMART_USED_VALID (1 << 2)
#define ND_INTEL_SMART_MTEMP_VALID (1 << 3)
#define ND_INTEL_SMART_CTEMP_VALID (1 << 4)
#define ND_INTEL_SMART_SHUTDOWN_COUNT_VALID (1 << 5)
#define ND_INTEL_SMART_AIT_STATUS_VALID (1 << 6)
#define ND_INTEL_SMART_PTEMP_VALID (1 << 7)
#define ND_INTEL_SMART_ALARM_VALID (1 << 9)
#define ND_INTEL_SMART_SHUTDOWN_VALID (1 << 10)
#define ND_INTEL_SMART_VENDOR_VALID (1 << 11)
#define ND_INTEL_SMART_SPARE_TRIP (1 << 0)
#define ND_INTEL_SMART_TEMP_TRIP (1 << 1)
#define ND_INTEL_SMART_CTEMP_TRIP (1 << 2)
#define ND_INTEL_SMART_NON_CRITICAL_HEALTH (1 << 0)
#define ND_INTEL_SMART_CRITICAL_HEALTH (1 << 1)
#define ND_INTEL_SMART_FATAL_HEALTH (1 << 2)
struct nd_intel_smart {
__u32 status;
union {
struct {
__u32 flags;
__u8 reserved0[4];
__u8 health;
__u8 spares;
__u8 life_used;
__u8 alarm_flags;
__u16 media_temperature;
__u16 ctrl_temperature;
__u32 shutdown_count;
__u8 ait_status;
__u16 pmic_temperature;
__u8 reserved1[8];
__u8 shutdown_state;
__u32 vendor_size;
__u8 vendor_data[92];
} __packed;
__u8 data[128];
};
} __packed;
struct nd_intel_smart_threshold {
__u32 status;
union {
struct {
__u16 alarm_control;
__u8 spares;
__u16 media_temperature;
__u16 ctrl_temperature;
__u8 reserved[1];
} __packed;
__u8 data[8];
};
} __packed;
struct nd_intel_smart_set_threshold {
__u16 alarm_control;
__u8 spares;
__u16 media_temperature;
__u16 ctrl_temperature;
__u32 status;
} __packed;
#define INTEL_FW_STORAGE_SIZE 0x100000
#define INTEL_FW_MAX_SEND_LEN 0xFFEC
#define INTEL_FW_QUERY_INTERVAL 250000
#define INTEL_FW_QUERY_MAX_TIME 3000000
#define INTEL_FW_FIS_VERSION 0x0105
#define INTEL_FW_FAKE_VERSION 0xffffffffabcd
enum intel_fw_update_state {
FW_STATE_NEW = 0,
FW_STATE_IN_PROGRESS,
FW_STATE_VERIFY,
FW_STATE_UPDATED,
};
struct nd_intel_fw_info {
__u32 status;
__u32 storage_size;
__u32 max_send_len;
__u32 query_interval;
__u32 max_query_time;
__u8 update_cap;
__u8 reserved[3];
__u32 fis_version;
__u64 run_version;
__u64 updated_version;
} __packed;
struct nd_intel_fw_start {
__u32 status;
__u32 context;
} __packed;
/* this one has the output first because the variable input data size */
struct nd_intel_fw_send_data {
__u32 context;
__u32 offset;
__u32 length;
__u8 data[0];
/* this field is not declared due ot variable data from input */
/* __u32 status; */
} __packed;
struct nd_intel_fw_finish_update {
__u8 ctrl_flags;
__u8 reserved[3];
__u32 context;
__u32 status;
} __packed;
struct nd_intel_fw_finish_query {
__u32 context;
__u32 status;
__u64 updated_fw_rev;
} __packed;
struct nd_intel_lss {
__u8 enable;
__u32 status;
} __packed;
union acpi_object; union acpi_object;
typedef void *acpi_handle; typedef void *acpi_handle;

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright(c) 2018 Intel Corporation. All rights reserved.
#ifndef _TEST_NVDIMM_WATERMARK_H_
#define _TEST_NVDIMM_WATERMARK_H_
int pmem_test(void);
int libnvdimm_test(void);
int acpi_nfit_test(void);
int device_dax_test(void);
/*
* dummy routine for nfit_test to validate it is linking to the properly
* mocked module and not the standard one from the base tree.
*/
#define nfit_test_watermark(x) \
int x##_test(void) \
{ \
pr_debug("%s for nfit_test\n", KBUILD_MODNAME); \
return 0; \
} \
EXPORT_SYMBOL(x##_test)
#endif /* _TEST_NVDIMM_WATERMARK_H_ */