mm: kmsan: maintain KMSAN metadata for page operations

Insert KMSAN hooks that make the necessary bookkeeping changes:
 - poison page shadow and origins in alloc_pages()/free_page();
 - clear page shadow and origins in clear_page(), copy_user_highpage();
 - copy page metadata in copy_highpage(), wp_page_copy();
 - handle vmap()/vunmap()/iounmap();

Link: https://lkml.kernel.org/r/20220915150417.722975-15-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Eric Biggers <ebiggers@kernel.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Ilya Leoshkevich <iii@linux.ibm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Marco Elver <elver@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vegard Nossum <vegard.nossum@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Alexander Potapenko 2022-09-15 17:03:48 +02:00 committed by Andrew Morton
parent d596b04f59
commit b073d7f8ae
10 changed files with 394 additions and 2 deletions

View File

@ -8,6 +8,8 @@
#include <asm/cpufeatures.h>
#include <asm/alternative.h>
#include <linux/kmsan-checks.h>
/* duplicated to the one in bootmem.h */
extern unsigned long max_pfn;
extern unsigned long phys_base;
@ -47,6 +49,11 @@ void clear_page_erms(void *page);
static inline void clear_page(void *page)
{
/*
* Clean up KMSAN metadata for the page being cleared. The assembly call
* below clobbers @page, so we perform unpoisoning before it.
*/
kmsan_unpoison_memory(page, PAGE_SIZE);
alternative_call_2(clear_page_orig,
clear_page_rep, X86_FEATURE_REP_GOOD,
clear_page_erms, X86_FEATURE_ERMS,

View File

@ -17,6 +17,7 @@
#include <linux/cc_platform.h>
#include <linux/efi.h>
#include <linux/pgtable.h>
#include <linux/kmsan.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@ -479,6 +480,8 @@ void iounmap(volatile void __iomem *addr)
return;
}
kmsan_iounmap_page_range((unsigned long)addr,
(unsigned long)addr + get_vm_area_size(p));
memtype_free(p->phys_addr, p->phys_addr + get_vm_area_size(p));
/* Finally remove it */

View File

@ -6,6 +6,7 @@
#include <linux/kernel.h>
#include <linux/bug.h>
#include <linux/cacheflush.h>
#include <linux/kmsan.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
#include <linux/hardirq.h>
@ -311,6 +312,7 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
vfrom = kmap_local_page(from);
vto = kmap_local_page(to);
copy_user_page(vto, vfrom, vaddr, to);
kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
kunmap_local(vto);
kunmap_local(vfrom);
}
@ -326,6 +328,7 @@ static inline void copy_highpage(struct page *to, struct page *from)
vfrom = kmap_local_page(from);
vto = kmap_local_page(to);
copy_page(vto, vfrom);
kmsan_copy_page_meta(to, from);
kunmap_local(vto);
kunmap_local(vfrom);
}

145
include/linux/kmsan.h Normal file
View File

@ -0,0 +1,145 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* KMSAN API for subsystems.
*
* Copyright (C) 2017-2022 Google LLC
* Author: Alexander Potapenko <glider@google.com>
*
*/
#ifndef _LINUX_KMSAN_H
#define _LINUX_KMSAN_H
#include <linux/gfp.h>
#include <linux/kmsan-checks.h>
#include <linux/types.h>
struct page;
#ifdef CONFIG_KMSAN
/**
* kmsan_alloc_page() - Notify KMSAN about an alloc_pages() call.
* @page: struct page pointer returned by alloc_pages().
* @order: order of allocated struct page.
* @flags: GFP flags used by alloc_pages()
*
* KMSAN marks 1<<@order pages starting at @page as uninitialized, unless
* @flags contain __GFP_ZERO.
*/
void kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags);
/**
* kmsan_free_page() - Notify KMSAN about a free_pages() call.
* @page: struct page pointer passed to free_pages().
* @order: order of deallocated struct page.
*
* KMSAN marks freed memory as uninitialized.
*/
void kmsan_free_page(struct page *page, unsigned int order);
/**
* kmsan_copy_page_meta() - Copy KMSAN metadata between two pages.
* @dst: destination page.
* @src: source page.
*
* KMSAN copies the contents of metadata pages for @src into the metadata pages
* for @dst. If @dst has no associated metadata pages, nothing happens.
* If @src has no associated metadata pages, @dst metadata pages are unpoisoned.
*/
void kmsan_copy_page_meta(struct page *dst, struct page *src);
/**
* kmsan_map_kernel_range_noflush() - Notify KMSAN about a vmap.
* @start: start of vmapped range.
* @end: end of vmapped range.
* @prot: page protection flags used for vmap.
* @pages: array of pages.
* @page_shift: page_shift passed to vmap_range_noflush().
*
* KMSAN maps shadow and origin pages of @pages into contiguous ranges in
* vmalloc metadata address range.
*/
void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages,
unsigned int page_shift);
/**
* kmsan_vunmap_kernel_range_noflush() - Notify KMSAN about a vunmap.
* @start: start of vunmapped range.
* @end: end of vunmapped range.
*
* KMSAN unmaps the contiguous metadata ranges created by
* kmsan_map_kernel_range_noflush().
*/
void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end);
/**
* kmsan_ioremap_page_range() - Notify KMSAN about a ioremap_page_range() call.
* @addr: range start.
* @end: range end.
* @phys_addr: physical range start.
* @prot: page protection flags used for ioremap_page_range().
* @page_shift: page_shift argument passed to vmap_range_noflush().
*
* KMSAN creates new metadata pages for the physical pages mapped into the
* virtual memory.
*/
void kmsan_ioremap_page_range(unsigned long addr, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
unsigned int page_shift);
/**
* kmsan_iounmap_page_range() - Notify KMSAN about a iounmap_page_range() call.
* @start: range start.
* @end: range end.
*
* KMSAN unmaps the metadata pages for the given range and, unlike for
* vunmap_page_range(), also deallocates them.
*/
void kmsan_iounmap_page_range(unsigned long start, unsigned long end);
#else
static inline int kmsan_alloc_page(struct page *page, unsigned int order,
gfp_t flags)
{
return 0;
}
static inline void kmsan_free_page(struct page *page, unsigned int order)
{
}
static inline void kmsan_copy_page_meta(struct page *dst, struct page *src)
{
}
static inline void kmsan_vmap_pages_range_noflush(unsigned long start,
unsigned long end,
pgprot_t prot,
struct page **pages,
unsigned int page_shift)
{
}
static inline void kmsan_vunmap_range_noflush(unsigned long start,
unsigned long end)
{
}
static inline void kmsan_ioremap_page_range(unsigned long start,
unsigned long end,
phys_addr_t phys_addr,
pgprot_t prot,
unsigned int page_shift)
{
}
static inline void kmsan_iounmap_page_range(unsigned long start,
unsigned long end)
{
}
#endif
#endif /* _LINUX_KMSAN_H */

View File

@ -818,8 +818,14 @@ int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
}
#endif
int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages,
unsigned int page_shift);
void vunmap_range_noflush(unsigned long start, unsigned long end);
void __vunmap_range_noflush(unsigned long start, unsigned long end);
int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
unsigned long addr, int page_nid, int *flags);

View File

@ -11,6 +11,7 @@
#include <linux/cacheflush.h>
#include <linux/gfp.h>
#include <linux/kmsan.h>
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/slab.h>
@ -26,6 +27,91 @@
* skipping effects of functions like memset() inside instrumented code.
*/
static unsigned long vmalloc_shadow(unsigned long addr)
{
return (unsigned long)kmsan_get_metadata((void *)addr,
KMSAN_META_SHADOW);
}
static unsigned long vmalloc_origin(unsigned long addr)
{
return (unsigned long)kmsan_get_metadata((void *)addr,
KMSAN_META_ORIGIN);
}
void kmsan_vunmap_range_noflush(unsigned long start, unsigned long end)
{
__vunmap_range_noflush(vmalloc_shadow(start), vmalloc_shadow(end));
__vunmap_range_noflush(vmalloc_origin(start), vmalloc_origin(end));
flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end));
flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end));
}
/*
* This function creates new shadow/origin pages for the physical pages mapped
* into the virtual memory. If those physical pages already had shadow/origin,
* those are ignored.
*/
void kmsan_ioremap_page_range(unsigned long start, unsigned long end,
phys_addr_t phys_addr, pgprot_t prot,
unsigned int page_shift)
{
gfp_t gfp_mask = GFP_KERNEL | __GFP_ZERO;
struct page *shadow, *origin;
unsigned long off = 0;
int nr;
if (!kmsan_enabled || kmsan_in_runtime())
return;
nr = (end - start) / PAGE_SIZE;
kmsan_enter_runtime();
for (int i = 0; i < nr; i++, off += PAGE_SIZE) {
shadow = alloc_pages(gfp_mask, 1);
origin = alloc_pages(gfp_mask, 1);
__vmap_pages_range_noflush(
vmalloc_shadow(start + off),
vmalloc_shadow(start + off + PAGE_SIZE), prot, &shadow,
PAGE_SHIFT);
__vmap_pages_range_noflush(
vmalloc_origin(start + off),
vmalloc_origin(start + off + PAGE_SIZE), prot, &origin,
PAGE_SHIFT);
}
flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end));
flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end));
kmsan_leave_runtime();
}
void kmsan_iounmap_page_range(unsigned long start, unsigned long end)
{
unsigned long v_shadow, v_origin;
struct page *shadow, *origin;
int nr;
if (!kmsan_enabled || kmsan_in_runtime())
return;
nr = (end - start) / PAGE_SIZE;
kmsan_enter_runtime();
v_shadow = (unsigned long)vmalloc_shadow(start);
v_origin = (unsigned long)vmalloc_origin(start);
for (int i = 0; i < nr;
i++, v_shadow += PAGE_SIZE, v_origin += PAGE_SIZE) {
shadow = kmsan_vmalloc_to_page_or_null((void *)v_shadow);
origin = kmsan_vmalloc_to_page_or_null((void *)v_origin);
__vunmap_range_noflush(v_shadow, vmalloc_shadow(end));
__vunmap_range_noflush(v_origin, vmalloc_origin(end));
if (shadow)
__free_pages(shadow, 1);
if (origin)
__free_pages(origin, 1);
}
flush_cache_vmap(vmalloc_shadow(start), vmalloc_shadow(end));
flush_cache_vmap(vmalloc_origin(start), vmalloc_origin(end));
kmsan_leave_runtime();
}
/* Functions from kmsan-checks.h follow. */
void kmsan_poison_memory(const void *address, size_t size, gfp_t flags)
{

View File

@ -145,3 +145,116 @@ void *kmsan_get_metadata(void *address, bool is_origin)
return (is_origin ? origin_ptr_for(page) : shadow_ptr_for(page)) + off;
}
void kmsan_copy_page_meta(struct page *dst, struct page *src)
{
if (!kmsan_enabled || kmsan_in_runtime())
return;
if (!dst || !page_has_metadata(dst))
return;
if (!src || !page_has_metadata(src)) {
kmsan_internal_unpoison_memory(page_address(dst), PAGE_SIZE,
/*checked*/ false);
return;
}
kmsan_enter_runtime();
__memcpy(shadow_ptr_for(dst), shadow_ptr_for(src), PAGE_SIZE);
__memcpy(origin_ptr_for(dst), origin_ptr_for(src), PAGE_SIZE);
kmsan_leave_runtime();
}
void kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags)
{
bool initialized = (flags & __GFP_ZERO) || !kmsan_enabled;
struct page *shadow, *origin;
depot_stack_handle_t handle;
int pages = 1 << order;
if (!page)
return;
shadow = shadow_page_for(page);
origin = origin_page_for(page);
if (initialized) {
__memset(page_address(shadow), 0, PAGE_SIZE * pages);
__memset(page_address(origin), 0, PAGE_SIZE * pages);
return;
}
/* Zero pages allocated by the runtime should also be initialized. */
if (kmsan_in_runtime())
return;
__memset(page_address(shadow), -1, PAGE_SIZE * pages);
kmsan_enter_runtime();
handle = kmsan_save_stack_with_flags(flags, /*extra_bits*/ 0);
kmsan_leave_runtime();
/*
* Addresses are page-aligned, pages are contiguous, so it's ok
* to just fill the origin pages with @handle.
*/
for (int i = 0; i < PAGE_SIZE * pages / sizeof(handle); i++)
((depot_stack_handle_t *)page_address(origin))[i] = handle;
}
void kmsan_free_page(struct page *page, unsigned int order)
{
if (!kmsan_enabled || kmsan_in_runtime())
return;
kmsan_enter_runtime();
kmsan_internal_poison_memory(page_address(page),
PAGE_SIZE << compound_order(page),
GFP_KERNEL,
KMSAN_POISON_CHECK | KMSAN_POISON_FREE);
kmsan_leave_runtime();
}
void kmsan_vmap_pages_range_noflush(unsigned long start, unsigned long end,
pgprot_t prot, struct page **pages,
unsigned int page_shift)
{
unsigned long shadow_start, origin_start, shadow_end, origin_end;
struct page **s_pages, **o_pages;
int nr, mapped;
if (!kmsan_enabled)
return;
shadow_start = vmalloc_meta((void *)start, KMSAN_META_SHADOW);
shadow_end = vmalloc_meta((void *)end, KMSAN_META_SHADOW);
if (!shadow_start)
return;
nr = (end - start) / PAGE_SIZE;
s_pages = kcalloc(nr, sizeof(*s_pages), GFP_KERNEL);
o_pages = kcalloc(nr, sizeof(*o_pages), GFP_KERNEL);
if (!s_pages || !o_pages)
goto ret;
for (int i = 0; i < nr; i++) {
s_pages[i] = shadow_page_for(pages[i]);
o_pages[i] = origin_page_for(pages[i]);
}
prot = __pgprot(pgprot_val(prot) | _PAGE_NX);
prot = PAGE_KERNEL;
origin_start = vmalloc_meta((void *)start, KMSAN_META_ORIGIN);
origin_end = vmalloc_meta((void *)end, KMSAN_META_ORIGIN);
kmsan_enter_runtime();
mapped = __vmap_pages_range_noflush(shadow_start, shadow_end, prot,
s_pages, page_shift);
KMSAN_WARN_ON(mapped);
mapped = __vmap_pages_range_noflush(origin_start, origin_end, prot,
o_pages, page_shift);
KMSAN_WARN_ON(mapped);
kmsan_leave_runtime();
flush_tlb_kernel_range(shadow_start, shadow_end);
flush_tlb_kernel_range(origin_start, origin_end);
flush_cache_vmap(shadow_start, shadow_end);
flush_cache_vmap(origin_start, origin_end);
ret:
kfree(s_pages);
kfree(o_pages);
}

View File

@ -52,6 +52,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/memremap.h>
#include <linux/kmsan.h>
#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/export.h>
@ -3136,6 +3137,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
delayacct_wpcopy_end();
return 0;
}
kmsan_copy_page_meta(new_page, old_page);
}
if (mem_cgroup_charge(page_folio(new_page), mm, GFP_KERNEL))

View File

@ -27,6 +27,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/kasan.h>
#include <linux/kmsan.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
@ -1400,6 +1401,7 @@ static __always_inline bool free_pages_prepare(struct page *page,
VM_BUG_ON_PAGE(PageTail(page), page);
trace_mm_page_free(page, order);
kmsan_free_page(page, order);
if (unlikely(PageHWPoison(page)) && !order) {
/*
@ -3808,6 +3810,14 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
/*
* Allocate a page from the given zone. Use pcplists for order-0 allocations.
*/
/*
* Do not instrument rmqueue() with KMSAN. This function may call
* __msan_poison_alloca() through a call to set_pfnblock_flags_mask().
* If __msan_poison_alloca() attempts to allocate pages for the stack depot, it
* may call rmqueue() again, which will result in a deadlock.
*/
__no_sanitize_memory
static inline
struct page *rmqueue(struct zone *preferred_zone,
struct zone *zone, unsigned int order,
@ -5560,6 +5570,7 @@ out:
}
trace_mm_page_alloc(page, order, alloc_gfp, ac.migratetype);
kmsan_alloc_page(page, order, alloc_gfp);
return page;
}

View File

@ -320,6 +320,9 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
ioremap_max_page_shift);
flush_cache_vmap(addr, end);
if (!err)
kmsan_ioremap_page_range(addr, end, phys_addr, prot,
ioremap_max_page_shift);
return err;
}
@ -416,7 +419,7 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end,
*
* This is an internal function only. Do not use outside mm/.
*/
void vunmap_range_noflush(unsigned long start, unsigned long end)
void __vunmap_range_noflush(unsigned long start, unsigned long end)
{
unsigned long next;
pgd_t *pgd;
@ -438,6 +441,12 @@ void vunmap_range_noflush(unsigned long start, unsigned long end)
arch_sync_kernel_mappings(start, end);
}
void vunmap_range_noflush(unsigned long start, unsigned long end)
{
kmsan_vunmap_range_noflush(start, end);
__vunmap_range_noflush(start, end);
}
/**
* vunmap_range - unmap kernel virtual addresses
* @addr: start of the VM area to unmap
@ -575,7 +584,7 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end,
*
* This is an internal function only. Do not use outside mm/.
*/
int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
{
unsigned int i, nr = (end - addr) >> PAGE_SHIFT;
@ -601,6 +610,13 @@ int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
return 0;
}
int vmap_pages_range_noflush(unsigned long addr, unsigned long end,
pgprot_t prot, struct page **pages, unsigned int page_shift)
{
kmsan_vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
return __vmap_pages_range_noflush(addr, end, prot, pages, page_shift);
}
/**
* vmap_pages_range - map pages to a kernel virtual address
* @addr: start of the VM area to map