mirror of
https://github.com/torvalds/linux.git
synced 2024-11-10 22:21:40 +00:00
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "118 patches: - The rest of MM. Includes kfence - another runtime memory validator. Not as thorough as KASAN, but it has unmeasurable overhead and is intended to be usable in production builds. - Everything else Subsystems affected by this patch series: alpha, procfs, sysctl, misc, core-kernel, MAINTAINERS, lib, bitops, checkpatch, init, coredump, seq_file, gdb, ubsan, initramfs, and mm (thp, cma, vmstat, memory-hotplug, mlock, rmap, zswap, zsmalloc, cleanups, kfence, kasan2, and pagemap2)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (118 commits) MIPS: make userspace mapping young by default initramfs: panic with memory information ubsan: remove overflow checks kgdb: fix to kill breakpoints on initmem after boot scripts/gdb: fix list_for_each x86: fix seq_file iteration for pat/memtype.c seq_file: document how per-entry resources are managed. fs/coredump: use kmap_local_page() init/Kconfig: fix a typo in CC_VERSION_TEXT help text init: clean up early_param_on_off() macro init/version.c: remove Version_<LINUX_VERSION_CODE> symbol checkpatch: do not apply "initialise globals to 0" check to BPF progs checkpatch: don't warn about colon termination in linker scripts checkpatch: add kmalloc_array_node to unnecessary OOM message check checkpatch: add warning for avoiding .L prefix symbols in assembly files checkpatch: improve TYPECAST_INT_CONSTANT test message checkpatch: prefer ftrace over function entry/exit printks checkpatch: trivial style fixes checkpatch: ignore warning designated initializers using NR_CPUS checkpatch: improve blank line after declaration test ...
This commit is contained in:
commit
245137cdf0
1
.mailmap
1
.mailmap
@ -237,6 +237,7 @@ Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
|
||||
Mayuresh Janorkar <mayur@ti.com>
|
||||
Michael Buesch <m@bues.ch>
|
||||
Michel Dänzer <michel@tungstengraphics.com>
|
||||
Miguel Ojeda <ojeda@kernel.org> <miguel.ojeda.sandonis@gmail.com>
|
||||
Mike Rapoport <rppt@kernel.org> <mike@compulab.co.il>
|
||||
Mike Rapoport <rppt@kernel.org> <mike.rapoport@gmail.com>
|
||||
Mike Rapoport <rppt@kernel.org> <rppt@linux.ibm.com>
|
||||
|
9
CREDITS
9
CREDITS
@ -2841,14 +2841,11 @@ S: Subiaco, 6008
|
||||
S: Perth, Western Australia
|
||||
S: Australia
|
||||
|
||||
N: Miguel Ojeda Sandonis
|
||||
E: miguel.ojeda.sandonis@gmail.com
|
||||
W: http://miguelojeda.es
|
||||
W: http://jair.lab.fi.uva.es/~migojed/
|
||||
N: Miguel Ojeda
|
||||
E: ojeda@kernel.org
|
||||
W: https://ojeda.dev
|
||||
D: Author of the ks0108, cfag12864b and cfag12864bfb auxiliary display drivers.
|
||||
D: Maintainer of the auxiliary display drivers tree (drivers/auxdisplay/*)
|
||||
S: C/ Mieses 20, 9-B
|
||||
S: Valladolid 47009
|
||||
S: Spain
|
||||
|
||||
N: Peter Oruba
|
||||
|
@ -13,21 +13,22 @@ What: /sys/devices/system/memory/memoryX/removable
|
||||
Date: June 2008
|
||||
Contact: Badari Pulavarty <pbadari@us.ibm.com>
|
||||
Description:
|
||||
The file /sys/devices/system/memory/memoryX/removable
|
||||
indicates whether this memory block is removable or not.
|
||||
This is useful for a user-level agent to determine
|
||||
identify removable sections of the memory before attempting
|
||||
potentially expensive hot-remove memory operation
|
||||
The file /sys/devices/system/memory/memoryX/removable is a
|
||||
legacy interface used to indicated whether a memory block is
|
||||
likely to be offlineable or not. Newer kernel versions return
|
||||
"1" if and only if the kernel supports memory offlining.
|
||||
Users: hotplug memory remove tools
|
||||
http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
|
||||
lsmem/chmem part of util-linux
|
||||
|
||||
What: /sys/devices/system/memory/memoryX/phys_device
|
||||
Date: September 2008
|
||||
Contact: Badari Pulavarty <pbadari@us.ibm.com>
|
||||
Description:
|
||||
The file /sys/devices/system/memory/memoryX/phys_device
|
||||
is read-only and is designed to show the name of physical
|
||||
memory device. Implementation is currently incomplete.
|
||||
is read-only; it is a legacy interface only ever used on s390x
|
||||
to expose the covered storage increment.
|
||||
Users: Legacy s390-tools lsmem/chmem
|
||||
|
||||
What: /sys/devices/system/memory/memoryX/phys_index
|
||||
Date: September 2008
|
||||
@ -43,23 +44,25 @@ Date: September 2008
|
||||
Contact: Badari Pulavarty <pbadari@us.ibm.com>
|
||||
Description:
|
||||
The file /sys/devices/system/memory/memoryX/state
|
||||
is read-write. When read, its contents show the
|
||||
online/offline state of the memory section. When written,
|
||||
root can toggle the the online/offline state of a removable
|
||||
memory section (see removable file description above)
|
||||
using the following commands::
|
||||
is read-write. When read, it returns the online/offline
|
||||
state of the memory block. When written, root can toggle
|
||||
the online/offline state of a memory block using the following
|
||||
commands::
|
||||
|
||||
# echo online > /sys/devices/system/memory/memoryX/state
|
||||
# echo offline > /sys/devices/system/memory/memoryX/state
|
||||
|
||||
For example, if /sys/devices/system/memory/memory22/removable
|
||||
contains a value of 1 and
|
||||
/sys/devices/system/memory/memory22/state contains the
|
||||
string "online" the following command can be executed by
|
||||
by root to offline that section::
|
||||
|
||||
# echo offline > /sys/devices/system/memory/memory22/state
|
||||
On newer kernel versions, advanced states can be specified
|
||||
when onlining to select a target zone: "online_movable"
|
||||
selects the movable zone. "online_kernel" selects the
|
||||
applicable kernel zone (DMA, DMA32, or Normal). However,
|
||||
after successfully setting one of the advanced states,
|
||||
reading the file will return "online"; the zone information
|
||||
can be obtained via "valid_zones" instead.
|
||||
|
||||
While onlining is unlikely to fail, there are no guarantees
|
||||
that offlining will succeed. Offlining is more likely to
|
||||
succeed if "valid_zones" indicates "Movable".
|
||||
Users: hotplug memory remove tools
|
||||
http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
|
||||
|
||||
@ -69,8 +72,19 @@ Date: July 2014
|
||||
Contact: Zhang Zhen <zhenzhang.zhang@huawei.com>
|
||||
Description:
|
||||
The file /sys/devices/system/memory/memoryX/valid_zones is
|
||||
read-only and is designed to show which zone this memory
|
||||
block can be onlined to.
|
||||
read-only.
|
||||
|
||||
For online memory blocks, it returns in which zone memory
|
||||
provided by a memory block is managed. If multiple zones
|
||||
apply (not applicable for hotplugged memory), "None" is returned
|
||||
and the memory block cannot be offlined.
|
||||
|
||||
For offline memory blocks, it returns by which zone memory
|
||||
provided by a memory block can be managed when onlining.
|
||||
The first returned zone ("default") will be used when setting
|
||||
the state of an offline memory block to "online". Only one of
|
||||
the kernel zones (DMA, DMA32, Normal) is applicable for a single
|
||||
memory block.
|
||||
|
||||
What: /sys/devices/system/memoryX/nodeY
|
||||
Date: October 2009
|
||||
|
@ -3,7 +3,7 @@ cfag12864b LCD Driver Documentation
|
||||
===================================
|
||||
|
||||
:License: GPLv2
|
||||
:Author & Maintainer: Miguel Ojeda Sandonis
|
||||
:Author & Maintainer: Miguel Ojeda <ojeda@kernel.org>
|
||||
:Date: 2006-10-27
|
||||
|
||||
|
||||
|
@ -3,7 +3,7 @@ ks0108 LCD Controller Driver Documentation
|
||||
==========================================
|
||||
|
||||
:License: GPLv2
|
||||
:Author & Maintainer: Miguel Ojeda Sandonis
|
||||
:Author & Maintainer: Miguel Ojeda <ojeda@kernel.org>
|
||||
:Date: 2006-10-27
|
||||
|
||||
|
||||
|
@ -5182,6 +5182,12 @@
|
||||
growing up) the main stack are reserved for no other
|
||||
mapping. Default value is 256 pages.
|
||||
|
||||
stack_depot_disable= [KNL]
|
||||
Setting this to true through kernel command line will
|
||||
disable the stack depot thereby saving the static memory
|
||||
consumed by the stack hash table. By default this is set
|
||||
to false.
|
||||
|
||||
stacktrace [FTRACE]
|
||||
Enabled the stack tracer on boot up.
|
||||
|
||||
|
@ -160,16 +160,16 @@ Under each memory block, you can see 5 files:
|
||||
|
||||
"online_movable", "online", "offline" command
|
||||
which will be performed on all sections in the block.
|
||||
``phys_device`` read-only: designed to show the name of physical memory
|
||||
device. This is not well implemented now.
|
||||
``removable`` read-only: contains an integer value indicating
|
||||
whether the memory block is removable or not
|
||||
removable. A value of 1 indicates that the memory
|
||||
block is removable and a value of 0 indicates that
|
||||
it is not removable. A memory block is removable only if
|
||||
every section in the block is removable.
|
||||
``valid_zones`` read-only: designed to show which zones this memory block
|
||||
can be onlined to.
|
||||
``phys_device`` read-only: legacy interface only ever used on s390x to
|
||||
expose the covered storage increment.
|
||||
``removable`` read-only: legacy interface that indicated whether a memory
|
||||
block was likely to be offlineable or not. Newer kernel
|
||||
versions return "1" if and only if the kernel supports
|
||||
memory offlining.
|
||||
``valid_zones`` read-only: designed to show by which zone memory provided by
|
||||
a memory block is managed, and to show by which zone memory
|
||||
provided by an offline memory block could be managed when
|
||||
onlining.
|
||||
|
||||
The first column shows it`s default zone.
|
||||
|
||||
|
@ -22,6 +22,7 @@ whole; patches welcome!
|
||||
ubsan
|
||||
kmemleak
|
||||
kcsan
|
||||
kfence
|
||||
gdb-kernel-debugging
|
||||
kgdb
|
||||
kselftest
|
||||
|
@ -155,7 +155,7 @@ Boot parameters
|
||||
~~~~~~~~~~~~~~~
|
||||
|
||||
Hardware tag-based KASAN mode (see the section about various modes below) is
|
||||
intended for use in production as a security mitigation. Therefore it supports
|
||||
intended for use in production as a security mitigation. Therefore, it supports
|
||||
boot parameters that allow to disable KASAN competely or otherwise control
|
||||
particular KASAN features.
|
||||
|
||||
@ -165,7 +165,8 @@ particular KASAN features.
|
||||
traces collection (default: ``on``).
|
||||
|
||||
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
|
||||
report or also panic the kernel (default: ``report``).
|
||||
report or also panic the kernel (default: ``report``). Note, that tag
|
||||
checking gets disabled after the first reported bug.
|
||||
|
||||
For developers
|
||||
~~~~~~~~~~~~~~
|
||||
@ -295,6 +296,9 @@ Note, that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
|
||||
enabled. Even when kasan.mode=off is provided, or when the hardware doesn't
|
||||
support MTE (but supports TBI).
|
||||
|
||||
Hardware tag-based KASAN only reports the first found bug. After that MTE tag
|
||||
checking gets disabled.
|
||||
|
||||
What memory accesses are sanitised by KASAN?
|
||||
--------------------------------------------
|
||||
|
||||
|
298
Documentation/dev-tools/kfence.rst
Normal file
298
Documentation/dev-tools/kfence.rst
Normal file
@ -0,0 +1,298 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. Copyright (C) 2020, Google LLC.
|
||||
|
||||
Kernel Electric-Fence (KFENCE)
|
||||
==============================
|
||||
|
||||
Kernel Electric-Fence (KFENCE) is a low-overhead sampling-based memory safety
|
||||
error detector. KFENCE detects heap out-of-bounds access, use-after-free, and
|
||||
invalid-free errors.
|
||||
|
||||
KFENCE is designed to be enabled in production kernels, and has near zero
|
||||
performance overhead. Compared to KASAN, KFENCE trades performance for
|
||||
precision. The main motivation behind KFENCE's design, is that with enough
|
||||
total uptime KFENCE will detect bugs in code paths not typically exercised by
|
||||
non-production test workloads. One way to quickly achieve a large enough total
|
||||
uptime is when the tool is deployed across a large fleet of machines.
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
To enable KFENCE, configure the kernel with::
|
||||
|
||||
CONFIG_KFENCE=y
|
||||
|
||||
To build a kernel with KFENCE support, but disabled by default (to enable, set
|
||||
``kfence.sample_interval`` to non-zero value), configure the kernel with::
|
||||
|
||||
CONFIG_KFENCE=y
|
||||
CONFIG_KFENCE_SAMPLE_INTERVAL=0
|
||||
|
||||
KFENCE provides several other configuration options to customize behaviour (see
|
||||
the respective help text in ``lib/Kconfig.kfence`` for more info).
|
||||
|
||||
Tuning performance
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The most important parameter is KFENCE's sample interval, which can be set via
|
||||
the kernel boot parameter ``kfence.sample_interval`` in milliseconds. The
|
||||
sample interval determines the frequency with which heap allocations will be
|
||||
guarded by KFENCE. The default is configurable via the Kconfig option
|
||||
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
|
||||
disables KFENCE.
|
||||
|
||||
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
||||
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
||||
255), the number of available guarded objects can be controlled. Each object
|
||||
requires 2 pages, one for the object itself and the other one used as a guard
|
||||
page; object pages are interleaved with guard pages, and every object page is
|
||||
therefore surrounded by two guard pages.
|
||||
|
||||
The total memory dedicated to the KFENCE memory pool can be computed as::
|
||||
|
||||
( #objects + 1 ) * 2 * PAGE_SIZE
|
||||
|
||||
Using the default config, and assuming a page size of 4 KiB, results in
|
||||
dedicating 2 MiB to the KFENCE memory pool.
|
||||
|
||||
Note: On architectures that support huge pages, KFENCE will ensure that the
|
||||
pool is using pages of size ``PAGE_SIZE``. This will result in additional page
|
||||
tables being allocated.
|
||||
|
||||
Error reports
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
A typical out-of-bounds access looks like this::
|
||||
|
||||
==================================================================
|
||||
BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa3/0x22b
|
||||
|
||||
Out-of-bounds read at 0xffffffffb672efff (1B left of kfence-#17):
|
||||
test_out_of_bounds_read+0xa3/0x22b
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
kfence-#17 [0xffffffffb672f000-0xffffffffb672f01f, size=32, cache=kmalloc-32] allocated by task 507:
|
||||
test_alloc+0xf3/0x25b
|
||||
test_out_of_bounds_read+0x98/0x22b
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
CPU: 4 PID: 107 Comm: kunit_try_catch Not tainted 5.8.0-rc6+ #7
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
|
||||
==================================================================
|
||||
|
||||
The header of the report provides a short summary of the function involved in
|
||||
the access. It is followed by more detailed information about the access and
|
||||
its origin. Note that, real kernel addresses are only shown when using the
|
||||
kernel command line option ``no_hash_pointers``.
|
||||
|
||||
Use-after-free accesses are reported as::
|
||||
|
||||
==================================================================
|
||||
BUG: KFENCE: use-after-free read in test_use_after_free_read+0xb3/0x143
|
||||
|
||||
Use-after-free read at 0xffffffffb673dfe0 (in kfence-#24):
|
||||
test_use_after_free_read+0xb3/0x143
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
kfence-#24 [0xffffffffb673dfe0-0xffffffffb673dfff, size=32, cache=kmalloc-32] allocated by task 507:
|
||||
test_alloc+0xf3/0x25b
|
||||
test_use_after_free_read+0x76/0x143
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
freed by task 507:
|
||||
test_use_after_free_read+0xa8/0x143
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
CPU: 4 PID: 109 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
|
||||
==================================================================
|
||||
|
||||
KFENCE also reports on invalid frees, such as double-frees::
|
||||
|
||||
==================================================================
|
||||
BUG: KFENCE: invalid free in test_double_free+0xdc/0x171
|
||||
|
||||
Invalid free of 0xffffffffb6741000:
|
||||
test_double_free+0xdc/0x171
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
kfence-#26 [0xffffffffb6741000-0xffffffffb674101f, size=32, cache=kmalloc-32] allocated by task 507:
|
||||
test_alloc+0xf3/0x25b
|
||||
test_double_free+0x76/0x171
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
freed by task 507:
|
||||
test_double_free+0xa8/0x171
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
CPU: 4 PID: 111 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
|
||||
==================================================================
|
||||
|
||||
KFENCE also uses pattern-based redzones on the other side of an object's guard
|
||||
page, to detect out-of-bounds writes on the unprotected side of the object.
|
||||
These are reported on frees::
|
||||
|
||||
==================================================================
|
||||
BUG: KFENCE: memory corruption in test_kmalloc_aligned_oob_write+0xef/0x184
|
||||
|
||||
Corrupted memory at 0xffffffffb6797ff9 [ 0xac . . . . . . ] (in kfence-#69):
|
||||
test_kmalloc_aligned_oob_write+0xef/0x184
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
kfence-#69 [0xffffffffb6797fb0-0xffffffffb6797ff8, size=73, cache=kmalloc-96] allocated by task 507:
|
||||
test_alloc+0xf3/0x25b
|
||||
test_kmalloc_aligned_oob_write+0x57/0x184
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
CPU: 4 PID: 120 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
|
||||
==================================================================
|
||||
|
||||
For such errors, the address where the corruption occurred as well as the
|
||||
invalidly written bytes (offset from the address) are shown; in this
|
||||
representation, '.' denote untouched bytes. In the example above ``0xac`` is
|
||||
the value written to the invalid address at offset 0, and the remaining '.'
|
||||
denote that no following bytes have been touched. Note that, real values are
|
||||
only shown if the kernel was booted with ``no_hash_pointers``; to avoid
|
||||
information disclosure otherwise, '!' is used instead to denote invalidly
|
||||
written bytes.
|
||||
|
||||
And finally, KFENCE may also report on invalid accesses to any protected page
|
||||
where it was not possible to determine an associated object, e.g. if adjacent
|
||||
object pages had not yet been allocated::
|
||||
|
||||
==================================================================
|
||||
BUG: KFENCE: invalid read in test_invalid_access+0x26/0xe0
|
||||
|
||||
Invalid read at 0xffffffffb670b00a:
|
||||
test_invalid_access+0x26/0xe0
|
||||
kunit_try_run_case+0x51/0x85
|
||||
kunit_generic_run_threadfn_adapter+0x16/0x30
|
||||
kthread+0x137/0x160
|
||||
ret_from_fork+0x22/0x30
|
||||
|
||||
CPU: 4 PID: 124 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
|
||||
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
|
||||
==================================================================
|
||||
|
||||
DebugFS interface
|
||||
~~~~~~~~~~~~~~~~~
|
||||
|
||||
Some debugging information is exposed via debugfs:
|
||||
|
||||
* The file ``/sys/kernel/debug/kfence/stats`` provides runtime statistics.
|
||||
|
||||
* The file ``/sys/kernel/debug/kfence/objects`` provides a list of objects
|
||||
allocated via KFENCE, including those already freed but protected.
|
||||
|
||||
Implementation Details
|
||||
----------------------
|
||||
|
||||
Guarded allocations are set up based on the sample interval. After expiration
|
||||
of the sample interval, the next allocation through the main allocator (SLAB or
|
||||
SLUB) returns a guarded allocation from the KFENCE object pool (allocation
|
||||
sizes up to PAGE_SIZE are supported). At this point, the timer is reset, and
|
||||
the next allocation is set up after the expiration of the interval. To "gate" a
|
||||
KFENCE allocation through the main allocator's fast-path without overhead,
|
||||
KFENCE relies on static branches via the static keys infrastructure. The static
|
||||
branch is toggled to redirect the allocation to KFENCE.
|
||||
|
||||
KFENCE objects each reside on a dedicated page, at either the left or right
|
||||
page boundaries selected at random. The pages to the left and right of the
|
||||
object page are "guard pages", whose attributes are changed to a protected
|
||||
state, and cause page faults on any attempted access. Such page faults are then
|
||||
intercepted by KFENCE, which handles the fault gracefully by reporting an
|
||||
out-of-bounds access, and marking the page as accessible so that the faulting
|
||||
code can (wrongly) continue executing (set ``panic_on_warn`` to panic instead).
|
||||
|
||||
To detect out-of-bounds writes to memory within the object's page itself,
|
||||
KFENCE also uses pattern-based redzones. For each object page, a redzone is set
|
||||
up for all non-object memory. For typical alignments, the redzone is only
|
||||
required on the unguarded side of an object. Because KFENCE must honor the
|
||||
cache's requested alignment, special alignments may result in unprotected gaps
|
||||
on either side of an object, all of which are redzoned.
|
||||
|
||||
The following figure illustrates the page layout::
|
||||
|
||||
---+-----------+-----------+-----------+-----------+-----------+---
|
||||
| xxxxxxxxx | O : | xxxxxxxxx | : O | xxxxxxxxx |
|
||||
| xxxxxxxxx | B : | xxxxxxxxx | : B | xxxxxxxxx |
|
||||
| x GUARD x | J : RED- | x GUARD x | RED- : J | x GUARD x |
|
||||
| xxxxxxxxx | E : ZONE | xxxxxxxxx | ZONE : E | xxxxxxxxx |
|
||||
| xxxxxxxxx | C : | xxxxxxxxx | : C | xxxxxxxxx |
|
||||
| xxxxxxxxx | T : | xxxxxxxxx | : T | xxxxxxxxx |
|
||||
---+-----------+-----------+-----------+-----------+-----------+---
|
||||
|
||||
Upon deallocation of a KFENCE object, the object's page is again protected and
|
||||
the object is marked as freed. Any further access to the object causes a fault
|
||||
and KFENCE reports a use-after-free access. Freed objects are inserted at the
|
||||
tail of KFENCE's freelist, so that the least recently freed objects are reused
|
||||
first, and the chances of detecting use-after-frees of recently freed objects
|
||||
is increased.
|
||||
|
||||
Interface
|
||||
---------
|
||||
|
||||
The following describes the functions which are used by allocators as well as
|
||||
page handling code to set up and deal with KFENCE allocations.
|
||||
|
||||
.. kernel-doc:: include/linux/kfence.h
|
||||
:functions: is_kfence_address
|
||||
kfence_shutdown_cache
|
||||
kfence_alloc kfence_free __kfence_free
|
||||
kfence_ksize kfence_object_start
|
||||
kfence_handle_page_fault
|
||||
|
||||
Related Tools
|
||||
-------------
|
||||
|
||||
In userspace, a similar approach is taken by `GWP-ASan
|
||||
<http://llvm.org/docs/GwpAsan.html>`_. GWP-ASan also relies on guard pages and
|
||||
a sampling strategy to detect memory unsafety bugs at scale. KFENCE's design is
|
||||
directly influenced by GWP-ASan, and can be seen as its kernel sibling. Another
|
||||
similar but non-sampling approach, that also inspired the name "KFENCE", can be
|
||||
found in the userspace `Electric Fence Malloc Debugger
|
||||
<https://linux.die.net/man/3/efence>`_.
|
||||
|
||||
In the kernel, several tools exist to debug memory access errors, and in
|
||||
particular KASAN can detect all bug classes that KFENCE can detect. While KASAN
|
||||
is more precise, relying on compiler instrumentation, this comes at a
|
||||
performance cost.
|
||||
|
||||
It is worth highlighting that KASAN and KFENCE are complementary, with
|
||||
different target environments. For instance, KASAN is the better debugging-aid,
|
||||
where test cases or reproducers exists: due to the lower chance to detect the
|
||||
error, it would require more effort using KFENCE to debug. Deployments at scale
|
||||
that cannot afford to enable KASAN, however, would benefit from using KFENCE to
|
||||
discover bugs due to code paths not exercised by test cases or fuzzers.
|
@ -217,6 +217,12 @@ between the calls to start() and stop(), so holding a lock during that time
|
||||
is a reasonable thing to do. The seq_file code will also avoid taking any
|
||||
other locks while the iterator is active.
|
||||
|
||||
The iterater value returned by start() or next() is guaranteed to be
|
||||
passed to a subsequent next() or stop() call. This allows resources
|
||||
such as locks that were taken to be reliably released. There is *no*
|
||||
guarantee that the iterator will be passed to show(), though in practice
|
||||
it often will be.
|
||||
|
||||
|
||||
Formatted output
|
||||
================
|
||||
|
26
MAINTAINERS
26
MAINTAINERS
@ -261,6 +261,8 @@ ABI/API
|
||||
L: linux-api@vger.kernel.org
|
||||
F: include/linux/syscalls.h
|
||||
F: kernel/sys_ni.c
|
||||
F: include/uapi/
|
||||
F: arch/*/include/uapi/
|
||||
|
||||
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
|
||||
M: Hans de Goede <hdegoede@redhat.com>
|
||||
@ -2982,7 +2984,7 @@ F: include/uapi/linux/audit.h
|
||||
F: kernel/audit*
|
||||
|
||||
AUXILIARY DISPLAY DRIVERS
|
||||
M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
|
||||
M: Miguel Ojeda <ojeda@kernel.org>
|
||||
S: Maintained
|
||||
F: drivers/auxdisplay/
|
||||
F: include/linux/cfag12864b.h
|
||||
@ -4128,13 +4130,13 @@ F: scripts/extract-cert.c
|
||||
F: scripts/sign-file.c
|
||||
|
||||
CFAG12864B LCD DRIVER
|
||||
M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
|
||||
M: Miguel Ojeda <ojeda@kernel.org>
|
||||
S: Maintained
|
||||
F: drivers/auxdisplay/cfag12864b.c
|
||||
F: include/linux/cfag12864b.h
|
||||
|
||||
CFAG12864BFB LCD FRAMEBUFFER DRIVER
|
||||
M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
|
||||
M: Miguel Ojeda <ojeda@kernel.org>
|
||||
S: Maintained
|
||||
F: drivers/auxdisplay/cfag12864bfb.c
|
||||
F: include/linux/cfag12864b.h
|
||||
@ -4304,7 +4306,7 @@ S: Supported
|
||||
F: drivers/infiniband/hw/usnic/
|
||||
|
||||
CLANG-FORMAT FILE
|
||||
M: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
|
||||
M: Miguel Ojeda <ojeda@kernel.org>
|
||||
S: Maintained
|
||||
F: .clang-format
|
||||
|
||||
@ -4444,7 +4446,7 @@ S: Maintained
|
||||
F: drivers/platform/x86/compal-laptop.c
|
||||
|
||||
COMPILER ATTRIBUTES
|
||||
M: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
|
||||
M: Miguel Ojeda <ojeda@kernel.org>
|
||||
S: Maintained
|
||||
F: include/linux/compiler_attributes.h
|
||||
|
||||
@ -9867,6 +9869,18 @@ F: include/linux/keyctl.h
|
||||
F: include/uapi/linux/keyctl.h
|
||||
F: security/keys/
|
||||
|
||||
KFENCE
|
||||
M: Alexander Potapenko <glider@google.com>
|
||||
M: Marco Elver <elver@google.com>
|
||||
R: Dmitry Vyukov <dvyukov@google.com>
|
||||
L: kasan-dev@googlegroups.com
|
||||
S: Maintained
|
||||
F: Documentation/dev-tools/kfence.rst
|
||||
F: arch/*/include/asm/kfence.h
|
||||
F: include/linux/kfence.h
|
||||
F: lib/Kconfig.kfence
|
||||
F: mm/kfence/
|
||||
|
||||
KFIFO
|
||||
M: Stefani Seibold <stefani@seibold.net>
|
||||
S: Maintained
|
||||
@ -9927,7 +9941,7 @@ F: include/linux/kprobes.h
|
||||
F: kernel/kprobes.c
|
||||
|
||||
KS0108 LCD CONTROLLER DRIVER
|
||||
M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
|
||||
M: Miguel Ojeda <ojeda@kernel.org>
|
||||
S: Maintained
|
||||
F: Documentation/admin-guide/auxdisplay/ks0108.rst
|
||||
F: drivers/auxdisplay/ks0108.c
|
||||
|
@ -1,4 +1,3 @@
|
||||
CONFIG_EXPERIMENTAL=y
|
||||
CONFIG_SYSVIPC=y
|
||||
CONFIG_POSIX_MQUEUE=y
|
||||
CONFIG_LOG_BUF_SHIFT=14
|
||||
|
@ -140,6 +140,7 @@ config ARM64
|
||||
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
|
||||
select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
|
||||
select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
|
||||
select HAVE_ARCH_KFENCE
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_MMAP_RND_BITS
|
||||
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
|
||||
|
@ -6,7 +6,6 @@
|
||||
#define __ASM_CACHE_H
|
||||
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/mte-kasan.h>
|
||||
|
||||
#define CTR_L1IP_SHIFT 14
|
||||
#define CTR_L1IP_MASK 3
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/memory.h>
|
||||
#include <asm/mte-kasan.h>
|
||||
#include <asm/pgtable-types.h>
|
||||
|
||||
#define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag)
|
||||
|
22
arch/arm64/include/asm/kfence.h
Normal file
22
arch/arm64/include/asm/kfence.h
Normal file
@ -0,0 +1,22 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* arm64 KFENCE support.
|
||||
*
|
||||
* Copyright (C) 2020, Google LLC.
|
||||
*/
|
||||
|
||||
#ifndef __ASM_KFENCE_H
|
||||
#define __ASM_KFENCE_H
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
|
||||
static inline bool arch_kfence_init_pool(void) { return true; }
|
||||
|
||||
static inline bool kfence_protect_page(unsigned long addr, bool protect)
|
||||
{
|
||||
set_memory_valid(addr, 1, !protect);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* __ASM_KFENCE_H */
|
@ -11,4 +11,6 @@
|
||||
#define MTE_TAG_SIZE 4
|
||||
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
|
||||
|
||||
#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
|
||||
|
||||
#endif /* __ASM_MTE_DEF_H */
|
||||
|
@ -11,12 +11,15 @@
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
/*
|
||||
* The functions below are meant to be used only for the
|
||||
* KASAN_HW_TAGS interface defined in asm/memory.h.
|
||||
*/
|
||||
#ifdef CONFIG_ARM64_MTE
|
||||
|
||||
/*
|
||||
* These functions are meant to be only used from KASAN runtime through
|
||||
* the arch_*() interface defined in asm/memory.h.
|
||||
* These functions don't include system_supports_mte() checks,
|
||||
* as KASAN only calls them when MTE is supported and enabled.
|
||||
*/
|
||||
|
||||
static inline u8 mte_get_ptr_tag(void *ptr)
|
||||
{
|
||||
/* Note: The format of KASAN tags is 0xF<x> */
|
||||
@ -25,9 +28,54 @@ static inline u8 mte_get_ptr_tag(void *ptr)
|
||||
return tag;
|
||||
}
|
||||
|
||||
u8 mte_get_mem_tag(void *addr);
|
||||
u8 mte_get_random_tag(void);
|
||||
void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag);
|
||||
/* Get allocation tag for the address. */
|
||||
static inline u8 mte_get_mem_tag(void *addr)
|
||||
{
|
||||
asm(__MTE_PREAMBLE "ldg %0, [%0]"
|
||||
: "+r" (addr));
|
||||
|
||||
return mte_get_ptr_tag(addr);
|
||||
}
|
||||
|
||||
/* Generate a random tag. */
|
||||
static inline u8 mte_get_random_tag(void)
|
||||
{
|
||||
void *addr;
|
||||
|
||||
asm(__MTE_PREAMBLE "irg %0, %0"
|
||||
: "=r" (addr));
|
||||
|
||||
return mte_get_ptr_tag(addr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Assign allocation tags for a region of memory based on the pointer tag.
|
||||
* Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
|
||||
* size must be non-zero and MTE_GRANULE_SIZE aligned.
|
||||
*/
|
||||
static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
|
||||
{
|
||||
u64 curr, end;
|
||||
|
||||
if (!size)
|
||||
return;
|
||||
|
||||
curr = (u64)__tag_set(addr, tag);
|
||||
end = curr + size;
|
||||
|
||||
do {
|
||||
/*
|
||||
* 'asm volatile' is required to prevent the compiler to move
|
||||
* the statement outside of the loop.
|
||||
*/
|
||||
asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
|
||||
:
|
||||
: "r" (curr)
|
||||
: "memory");
|
||||
|
||||
curr += MTE_GRANULE_SIZE;
|
||||
} while (curr != end);
|
||||
}
|
||||
|
||||
void mte_enable_kernel(void);
|
||||
void mte_init_tags(u64 max_tag);
|
||||
@ -46,13 +94,14 @@ static inline u8 mte_get_mem_tag(void *addr)
|
||||
{
|
||||
return 0xFF;
|
||||
}
|
||||
|
||||
static inline u8 mte_get_random_tag(void)
|
||||
{
|
||||
return 0xFF;
|
||||
}
|
||||
static inline void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
|
||||
|
||||
static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
|
||||
{
|
||||
return addr;
|
||||
}
|
||||
|
||||
static inline void mte_enable_kernel(void)
|
||||
|
@ -8,8 +8,6 @@
|
||||
#include <asm/compiler.h>
|
||||
#include <asm/mte-def.h>
|
||||
|
||||
#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <asm/barrier.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/mte.h>
|
||||
#include <asm/mte-kasan.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
@ -88,51 +87,6 @@ int memcmp_pages(struct page *page1, struct page *page2)
|
||||
return ret;
|
||||
}
|
||||
|
||||
u8 mte_get_mem_tag(void *addr)
|
||||
{
|
||||
if (!system_supports_mte())
|
||||
return 0xFF;
|
||||
|
||||
asm(__MTE_PREAMBLE "ldg %0, [%0]"
|
||||
: "+r" (addr));
|
||||
|
||||
return mte_get_ptr_tag(addr);
|
||||
}
|
||||
|
||||
u8 mte_get_random_tag(void)
|
||||
{
|
||||
void *addr;
|
||||
|
||||
if (!system_supports_mte())
|
||||
return 0xFF;
|
||||
|
||||
asm(__MTE_PREAMBLE "irg %0, %0"
|
||||
: "+r" (addr));
|
||||
|
||||
return mte_get_ptr_tag(addr);
|
||||
}
|
||||
|
||||
void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
|
||||
{
|
||||
void *ptr = addr;
|
||||
|
||||
if ((!system_supports_mte()) || (size == 0))
|
||||
return addr;
|
||||
|
||||
/* Make sure that size is MTE granule aligned. */
|
||||
WARN_ON(size & (MTE_GRANULE_SIZE - 1));
|
||||
|
||||
/* Make sure that the address is MTE granule aligned. */
|
||||
WARN_ON((u64)addr & (MTE_GRANULE_SIZE - 1));
|
||||
|
||||
tag = 0xF0 | tag;
|
||||
ptr = (void *)__tag_set(ptr, tag);
|
||||
|
||||
mte_assign_mem_tag_range(ptr, size);
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void mte_init_tags(u64 max_tag)
|
||||
{
|
||||
static bool gcr_kernel_excl_initialized;
|
||||
|
@ -149,19 +149,3 @@ SYM_FUNC_START(mte_restore_page_tags)
|
||||
|
||||
ret
|
||||
SYM_FUNC_END(mte_restore_page_tags)
|
||||
|
||||
/*
|
||||
* Assign allocation tags for a region of memory based on the pointer tag
|
||||
* x0 - source pointer
|
||||
* x1 - size
|
||||
*
|
||||
* Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
|
||||
* size must be non-zero and MTE_GRANULE_SIZE aligned.
|
||||
*/
|
||||
SYM_FUNC_START(mte_assign_mem_tag_range)
|
||||
1: stg x0, [x0]
|
||||
add x0, x0, #MTE_GRANULE_SIZE
|
||||
subs x1, x1, #MTE_GRANULE_SIZE
|
||||
b.gt 1b
|
||||
ret
|
||||
SYM_FUNC_END(mte_assign_mem_tag_range)
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/extable.h>
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/signal.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/hardirq.h>
|
||||
@ -389,6 +390,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
|
||||
} else if (addr < PAGE_SIZE) {
|
||||
msg = "NULL pointer dereference";
|
||||
} else {
|
||||
if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
|
||||
return;
|
||||
|
||||
msg = "paging request";
|
||||
}
|
||||
|
||||
|
@ -1444,16 +1444,19 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
|
||||
free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);
|
||||
}
|
||||
|
||||
static bool inside_linear_region(u64 start, u64 size)
|
||||
struct range arch_get_mappable_range(void)
|
||||
{
|
||||
struct range mhp_range;
|
||||
|
||||
/*
|
||||
* Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)]
|
||||
* accommodating both its ends but excluding PAGE_END. Max physical
|
||||
* range which can be mapped inside this linear mapping range, must
|
||||
* also be derived from its end points.
|
||||
*/
|
||||
return start >= __pa(_PAGE_OFFSET(vabits_actual)) &&
|
||||
(start + size - 1) <= __pa(PAGE_END - 1);
|
||||
mhp_range.start = __pa(_PAGE_OFFSET(vabits_actual));
|
||||
mhp_range.end = __pa(PAGE_END - 1);
|
||||
return mhp_range;
|
||||
}
|
||||
|
||||
int arch_add_memory(int nid, u64 start, u64 size,
|
||||
@ -1461,12 +1464,14 @@ int arch_add_memory(int nid, u64 start, u64 size,
|
||||
{
|
||||
int ret, flags = 0;
|
||||
|
||||
if (!inside_linear_region(start, size)) {
|
||||
pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size);
|
||||
return -EINVAL;
|
||||
}
|
||||
VM_BUG_ON(!mhp_range_allowed(start, size, true));
|
||||
|
||||
if (rodata_full || debug_pagealloc_enabled())
|
||||
/*
|
||||
* KFENCE requires linear map to be mapped at page granularity, so that
|
||||
* it is possible to protect/unprotect single pages in the KFENCE pool.
|
||||
*/
|
||||
if (rodata_full || debug_pagealloc_enabled() ||
|
||||
IS_ENABLED(CONFIG_KFENCE))
|
||||
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
|
||||
|
||||
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
|
||||
|
@ -157,29 +157,31 @@ unsigned long _page_cachable_default;
|
||||
EXPORT_SYMBOL(_page_cachable_default);
|
||||
|
||||
#define PM(p) __pgprot(_page_cachable_default | (p))
|
||||
#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p))
|
||||
|
||||
static inline void setup_protection_map(void)
|
||||
{
|
||||
protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
|
||||
protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
|
||||
protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
|
||||
protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
|
||||
protection_map[4] = PM(_PAGE_PRESENT);
|
||||
protection_map[5] = PM(_PAGE_PRESENT);
|
||||
protection_map[6] = PM(_PAGE_PRESENT);
|
||||
protection_map[7] = PM(_PAGE_PRESENT);
|
||||
protection_map[1] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
|
||||
protection_map[2] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
|
||||
protection_map[3] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
|
||||
protection_map[4] = PVA(_PAGE_PRESENT);
|
||||
protection_map[5] = PVA(_PAGE_PRESENT);
|
||||
protection_map[6] = PVA(_PAGE_PRESENT);
|
||||
protection_map[7] = PVA(_PAGE_PRESENT);
|
||||
|
||||
protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
|
||||
protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
|
||||
protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
|
||||
protection_map[9] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
|
||||
protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
|
||||
_PAGE_NO_READ);
|
||||
protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
|
||||
protection_map[12] = PM(_PAGE_PRESENT);
|
||||
protection_map[13] = PM(_PAGE_PRESENT);
|
||||
protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
|
||||
protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
|
||||
protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
|
||||
protection_map[12] = PVA(_PAGE_PRESENT);
|
||||
protection_map[13] = PVA(_PAGE_PRESENT);
|
||||
protection_map[14] = PVA(_PAGE_PRESENT);
|
||||
protection_map[15] = PVA(_PAGE_PRESENT);
|
||||
}
|
||||
|
||||
#undef _PVA
|
||||
#undef PM
|
||||
|
||||
void cpu_cache_init(void)
|
||||
|
@ -297,6 +297,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
|
||||
if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
|
||||
return -EINVAL;
|
||||
|
||||
VM_BUG_ON(!mhp_range_allowed(start, size, true));
|
||||
rc = vmem_add_mapping(start, size);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
@ -4,6 +4,7 @@
|
||||
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
|
||||
*/
|
||||
|
||||
#include <linux/memory_hotplug.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/pfn.h>
|
||||
#include <linux/mm.h>
|
||||
@ -532,11 +533,22 @@ void vmem_remove_mapping(unsigned long start, unsigned long size)
|
||||
mutex_unlock(&vmem_mutex);
|
||||
}
|
||||
|
||||
struct range arch_get_mappable_range(void)
|
||||
{
|
||||
struct range mhp_range;
|
||||
|
||||
mhp_range.start = 0;
|
||||
mhp_range.end = VMEM_MAX_PHYS - 1;
|
||||
return mhp_range;
|
||||
}
|
||||
|
||||
int vmem_add_mapping(unsigned long start, unsigned long size)
|
||||
{
|
||||
struct range range = arch_get_mappable_range();
|
||||
int ret;
|
||||
|
||||
if (start + size > VMEM_MAX_PHYS ||
|
||||
if (start < range.start ||
|
||||
start + size > range.end + 1 ||
|
||||
start + size < start)
|
||||
return -ERANGE;
|
||||
|
||||
|
@ -151,6 +151,7 @@ config X86
|
||||
select HAVE_ARCH_JUMP_LABEL_RELATIVE
|
||||
select HAVE_ARCH_KASAN if X86_64
|
||||
select HAVE_ARCH_KASAN_VMALLOC if X86_64
|
||||
select HAVE_ARCH_KFENCE
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
|
||||
|
64
arch/x86/include/asm/kfence.h
Normal file
64
arch/x86/include/asm/kfence.h
Normal file
@ -0,0 +1,64 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* x86 KFENCE support.
|
||||
*
|
||||
* Copyright (C) 2020, Google LLC.
|
||||
*/
|
||||
|
||||
#ifndef _ASM_X86_KFENCE_H
|
||||
#define _ASM_X86_KFENCE_H
|
||||
|
||||
#include <linux/bug.h>
|
||||
#include <linux/kfence.h>
|
||||
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/set_memory.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
/* Force 4K pages for __kfence_pool. */
|
||||
static inline bool arch_kfence_init_pool(void)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
|
||||
addr += PAGE_SIZE) {
|
||||
unsigned int level;
|
||||
|
||||
if (!lookup_address(addr, &level))
|
||||
return false;
|
||||
|
||||
if (level != PG_LEVEL_4K)
|
||||
set_memory_4k(addr, 1);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Protect the given page and flush TLB. */
|
||||
static inline bool kfence_protect_page(unsigned long addr, bool protect)
|
||||
{
|
||||
unsigned int level;
|
||||
pte_t *pte = lookup_address(addr, &level);
|
||||
|
||||
if (WARN_ON(!pte || level != PG_LEVEL_4K))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* We need to avoid IPIs, as we may get KFENCE allocations or faults
|
||||
* with interrupts disabled. Therefore, the below is best-effort, and
|
||||
* does not flush TLBs on all CPUs. We can tolerate some inaccuracy;
|
||||
* lazy fault handling takes care of faults after the page is PRESENT.
|
||||
*/
|
||||
|
||||
if (protect)
|
||||
set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
|
||||
else
|
||||
set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
|
||||
|
||||
/* Flush this CPU's TLB. */
|
||||
flush_tlb_one_kernel(addr);
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_KFENCE_H */
|
@ -9,6 +9,7 @@
|
||||
#include <linux/kdebug.h> /* oops_begin/end, ... */
|
||||
#include <linux/extable.h> /* search_exception_tables */
|
||||
#include <linux/memblock.h> /* max_low_pfn */
|
||||
#include <linux/kfence.h> /* kfence_handle_page_fault */
|
||||
#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */
|
||||
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
|
||||
#include <linux/perf_event.h> /* perf_sw_event */
|
||||
@ -680,6 +681,11 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||
if (IS_ENABLED(CONFIG_EFI))
|
||||
efi_crash_gracefully_on_page_fault(address);
|
||||
|
||||
/* Only not-present faults should be handled by KFENCE. */
|
||||
if (!(error_code & X86_PF_PROT) &&
|
||||
kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs))
|
||||
return;
|
||||
|
||||
oops:
|
||||
/*
|
||||
* Oops. The kernel tried to access some bad page. We'll have to
|
||||
|
@ -1164,12 +1164,14 @@ static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
|
||||
|
||||
static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
||||
{
|
||||
kfree(v);
|
||||
++*pos;
|
||||
return memtype_get_idx(*pos);
|
||||
}
|
||||
|
||||
static void memtype_seq_stop(struct seq_file *seq, void *v)
|
||||
{
|
||||
kfree(v);
|
||||
}
|
||||
|
||||
static int memtype_seq_show(struct seq_file *seq, void *v)
|
||||
@ -1181,8 +1183,6 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
|
||||
entry_print->end,
|
||||
cattr_name(entry_print->type));
|
||||
|
||||
kfree(entry_print);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Description: cfag12864b LCD driver
|
||||
* Depends: ks0108
|
||||
*
|
||||
* Author: Copyright (C) Miguel Ojeda Sandonis
|
||||
* Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
|
||||
* Date: 2006-10-31
|
||||
*/
|
||||
|
||||
@ -376,5 +376,5 @@ module_init(cfag12864b_init);
|
||||
module_exit(cfag12864b_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
|
||||
MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
|
||||
MODULE_DESCRIPTION("cfag12864b LCD driver");
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Description: cfag12864b LCD framebuffer driver
|
||||
* Depends: cfag12864b
|
||||
*
|
||||
* Author: Copyright (C) Miguel Ojeda Sandonis
|
||||
* Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
|
||||
* Date: 2006-10-31
|
||||
*/
|
||||
|
||||
@ -171,5 +171,5 @@ module_init(cfag12864bfb_init);
|
||||
module_exit(cfag12864bfb_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
|
||||
MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
|
||||
MODULE_DESCRIPTION("cfag12864b LCD framebuffer driver");
|
||||
|
@ -5,7 +5,7 @@
|
||||
* Description: ks0108 LCD Controller driver
|
||||
* Depends: parport
|
||||
*
|
||||
* Author: Copyright (C) Miguel Ojeda Sandonis
|
||||
* Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
|
||||
* Date: 2006-10-31
|
||||
*/
|
||||
|
||||
@ -182,6 +182,6 @@ module_init(ks0108_init);
|
||||
module_exit(ks0108_exit);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
|
||||
MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
|
||||
MODULE_DESCRIPTION("ks0108 LCD Controller driver");
|
||||
|
||||
|
@ -35,7 +35,7 @@ static const char *const online_type_to_str[] = {
|
||||
[MMOP_ONLINE_MOVABLE] = "online_movable",
|
||||
};
|
||||
|
||||
int memhp_online_type_from_str(const char *str)
|
||||
int mhp_online_type_from_str(const char *str)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -253,7 +253,7 @@ static int memory_subsys_offline(struct device *dev)
|
||||
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
const int online_type = memhp_online_type_from_str(buf);
|
||||
const int online_type = mhp_online_type_from_str(buf);
|
||||
struct memory_block *mem = to_memory_block(dev);
|
||||
int ret;
|
||||
|
||||
@ -290,20 +290,20 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
|
||||
}
|
||||
|
||||
/*
|
||||
* phys_device is a bad name for this. What I really want
|
||||
* is a way to differentiate between memory ranges that
|
||||
* are part of physical devices that constitute
|
||||
* a complete removable unit or fru.
|
||||
* i.e. do these ranges belong to the same physical device,
|
||||
* s.t. if I offline all of these sections I can then
|
||||
* remove the physical device?
|
||||
* Legacy interface that we cannot remove: s390x exposes the storage increment
|
||||
* covered by a memory block, allowing for identifying which memory blocks
|
||||
* comprise a storage increment. Since a memory block spans complete
|
||||
* storage increments nowadays, this interface is basically unused. Other
|
||||
* archs never exposed != 0.
|
||||
*/
|
||||
static ssize_t phys_device_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
struct memory_block *mem = to_memory_block(dev);
|
||||
unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||
|
||||
return sysfs_emit(buf, "%d\n", mem->phys_device);
|
||||
return sysfs_emit(buf, "%d\n",
|
||||
arch_get_memory_phys_device(start_pfn));
|
||||
}
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTREMOVE
|
||||
@ -387,19 +387,19 @@ static ssize_t auto_online_blocks_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%s\n",
|
||||
online_type_to_str[memhp_default_online_type]);
|
||||
online_type_to_str[mhp_default_online_type]);
|
||||
}
|
||||
|
||||
static ssize_t auto_online_blocks_store(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
const int online_type = memhp_online_type_from_str(buf);
|
||||
const int online_type = mhp_online_type_from_str(buf);
|
||||
|
||||
if (online_type < 0)
|
||||
return -EINVAL;
|
||||
|
||||
memhp_default_online_type = online_type;
|
||||
mhp_default_online_type = online_type;
|
||||
return count;
|
||||
}
|
||||
|
||||
@ -488,11 +488,7 @@ static DEVICE_ATTR_WO(soft_offline_page);
|
||||
static DEVICE_ATTR_WO(hard_offline_page);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Note that phys_device is optional. It is here to allow for
|
||||
* differentiation between which *physical* devices each
|
||||
* section belongs to...
|
||||
*/
|
||||
/* See phys_device_show(). */
|
||||
int __weak arch_get_memory_phys_device(unsigned long start_pfn)
|
||||
{
|
||||
return 0;
|
||||
@ -574,7 +570,6 @@ int register_memory(struct memory_block *memory)
|
||||
static int init_memory_block(unsigned long block_id, unsigned long state)
|
||||
{
|
||||
struct memory_block *mem;
|
||||
unsigned long start_pfn;
|
||||
int ret = 0;
|
||||
|
||||
mem = find_memory_block_by_id(block_id);
|
||||
@ -588,8 +583,6 @@ static int init_memory_block(unsigned long block_id, unsigned long state)
|
||||
|
||||
mem->start_section_nr = block_id * sections_per_block;
|
||||
mem->state = state;
|
||||
start_pfn = section_nr_to_pfn(mem->start_section_nr);
|
||||
mem->phys_device = arch_get_memory_phys_device(start_pfn);
|
||||
mem->nid = NUMA_NO_NODE;
|
||||
|
||||
ret = register_memory(mem);
|
||||
|
@ -1081,7 +1081,7 @@ static ssize_t mm_stat_show(struct device *dev,
|
||||
zram->limit_pages << PAGE_SHIFT,
|
||||
max_used << PAGE_SHIFT,
|
||||
(u64)atomic64_read(&zram->stats.same_pages),
|
||||
pool_stats.pages_compacted,
|
||||
atomic_long_read(&pool_stats.pages_compacted),
|
||||
(u64)atomic64_read(&zram->stats.huge_pages),
|
||||
(u64)atomic64_read(&zram->stats.huge_pages_since));
|
||||
up_read(&zram->init_lock);
|
||||
|
@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
|
||||
|
||||
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
|
||||
ret = add_memory(nid, PFN_PHYS((start_pfn)),
|
||||
(HA_CHUNK << PAGE_SHIFT), MEMHP_MERGE_RESOURCE);
|
||||
(HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE);
|
||||
|
||||
if (ret) {
|
||||
pr_err("hot_add memory failed error is %d\n", ret);
|
||||
|
@ -623,7 +623,7 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
|
||||
/* Memory might get onlined immediately. */
|
||||
atomic64_add(size, &vm->offline_size);
|
||||
rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name,
|
||||
MEMHP_MERGE_RESOURCE);
|
||||
MHP_MERGE_RESOURCE);
|
||||
if (rc) {
|
||||
atomic64_sub(size, &vm->offline_size);
|
||||
dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
|
||||
@ -2222,7 +2222,7 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
|
||||
*/
|
||||
static void virtio_mem_refresh_config(struct virtio_mem *vm)
|
||||
{
|
||||
const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
|
||||
const struct range pluggable_range = mhp_get_pluggable_range(true);
|
||||
uint64_t new_plugged_size, usable_region_size, end_addr;
|
||||
|
||||
/* the plugged_size is just a reflection of what _we_ did previously */
|
||||
@ -2234,15 +2234,25 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm)
|
||||
/* calculate the last usable memory block id */
|
||||
virtio_cread_le(vm->vdev, struct virtio_mem_config,
|
||||
usable_region_size, &usable_region_size);
|
||||
end_addr = vm->addr + usable_region_size;
|
||||
end_addr = min(end_addr, phys_limit);
|
||||
end_addr = min(vm->addr + usable_region_size - 1,
|
||||
pluggable_range.end);
|
||||
|
||||
if (vm->in_sbm)
|
||||
vm->sbm.last_usable_mb_id =
|
||||
virtio_mem_phys_to_mb_id(end_addr) - 1;
|
||||
else
|
||||
vm->bbm.last_usable_bb_id =
|
||||
virtio_mem_phys_to_bb_id(vm, end_addr) - 1;
|
||||
if (vm->in_sbm) {
|
||||
vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr);
|
||||
if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes()))
|
||||
vm->sbm.last_usable_mb_id--;
|
||||
} else {
|
||||
vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm,
|
||||
end_addr);
|
||||
if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size))
|
||||
vm->bbm.last_usable_bb_id--;
|
||||
}
|
||||
/*
|
||||
* If we cannot plug any of our device memory (e.g., nothing in the
|
||||
* usable region is addressable), the last usable memory block id will
|
||||
* be smaller than the first usable memory block id. We'll stop
|
||||
* attempting to add memory with -ENOSPC from our main loop.
|
||||
*/
|
||||
|
||||
/* see if there is a request to change the size */
|
||||
virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
|
||||
@ -2364,7 +2374,7 @@ static int virtio_mem_init_vq(struct virtio_mem *vm)
|
||||
|
||||
static int virtio_mem_init(struct virtio_mem *vm)
|
||||
{
|
||||
const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
|
||||
const struct range pluggable_range = mhp_get_pluggable_range(true);
|
||||
uint64_t sb_size, addr;
|
||||
uint16_t node_id;
|
||||
|
||||
@ -2405,9 +2415,10 @@ static int virtio_mem_init(struct virtio_mem *vm)
|
||||
if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes()))
|
||||
dev_warn(&vm->vdev->dev,
|
||||
"The alignment of the physical end address can make some memory unusable.\n");
|
||||
if (vm->addr + vm->region_size > phys_limit)
|
||||
if (vm->addr < pluggable_range.start ||
|
||||
vm->addr + vm->region_size - 1 > pluggable_range.end)
|
||||
dev_warn(&vm->vdev->dev,
|
||||
"Some memory is not addressable. This can make some memory unusable.\n");
|
||||
"Some device memory is not addressable/pluggable. This can make some memory unusable.\n");
|
||||
|
||||
/*
|
||||
* We want subblocks to span at least MAX_ORDER_NR_PAGES and
|
||||
@ -2429,7 +2440,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
|
||||
vm->sbm.sb_size;
|
||||
|
||||
/* Round up to the next full memory block */
|
||||
addr = vm->addr + memory_block_size_bytes() - 1;
|
||||
addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
|
||||
memory_block_size_bytes() - 1;
|
||||
vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr);
|
||||
vm->sbm.next_mb_id = vm->sbm.first_mb_id;
|
||||
} else {
|
||||
@ -2450,7 +2462,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
|
||||
}
|
||||
|
||||
/* Round up to the next aligned big block */
|
||||
addr = vm->addr + vm->bbm.bb_size - 1;
|
||||
addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
|
||||
vm->bbm.bb_size - 1;
|
||||
vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
|
||||
vm->bbm.next_bb_id = vm->bbm.first_bb_id;
|
||||
}
|
||||
|
@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void)
|
||||
mutex_unlock(&balloon_mutex);
|
||||
/* add_memory_resource() requires the device_hotplug lock */
|
||||
lock_device_hotplug();
|
||||
rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE);
|
||||
rc = add_memory_resource(nid, resource, MHP_MERGE_RESOURCE);
|
||||
unlock_device_hotplug();
|
||||
mutex_lock(&balloon_mutex);
|
||||
|
||||
|
@ -897,10 +897,10 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
|
||||
*/
|
||||
page = get_dump_page(addr);
|
||||
if (page) {
|
||||
void *kaddr = kmap(page);
|
||||
void *kaddr = kmap_local_page(page);
|
||||
|
||||
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
|
||||
kunmap(page);
|
||||
kunmap_local(kaddr);
|
||||
put_page(page);
|
||||
} else {
|
||||
stop = !dump_skip(cprm, PAGE_SIZE);
|
||||
|
125
fs/iomap/seek.c
125
fs/iomap/seek.c
@ -10,122 +10,17 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/pagevec.h>
|
||||
|
||||
/*
|
||||
* Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
|
||||
* Returns true if found and updates @lastoff to the offset in file.
|
||||
*/
|
||||
static bool
|
||||
page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
|
||||
int whence)
|
||||
{
|
||||
const struct address_space_operations *ops = inode->i_mapping->a_ops;
|
||||
unsigned int bsize = i_blocksize(inode), off;
|
||||
bool seek_data = whence == SEEK_DATA;
|
||||
loff_t poff = page_offset(page);
|
||||
|
||||
if (WARN_ON_ONCE(*lastoff >= poff + PAGE_SIZE))
|
||||
return false;
|
||||
|
||||
if (*lastoff < poff) {
|
||||
/*
|
||||
* Last offset smaller than the start of the page means we found
|
||||
* a hole:
|
||||
*/
|
||||
if (whence == SEEK_HOLE)
|
||||
return true;
|
||||
*lastoff = poff;
|
||||
}
|
||||
|
||||
/*
|
||||
* Just check the page unless we can and should check block ranges:
|
||||
*/
|
||||
if (bsize == PAGE_SIZE || !ops->is_partially_uptodate)
|
||||
return PageUptodate(page) == seek_data;
|
||||
|
||||
lock_page(page);
|
||||
if (unlikely(page->mapping != inode->i_mapping))
|
||||
goto out_unlock_not_found;
|
||||
|
||||
for (off = 0; off < PAGE_SIZE; off += bsize) {
|
||||
if (offset_in_page(*lastoff) >= off + bsize)
|
||||
continue;
|
||||
if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
|
||||
unlock_page(page);
|
||||
return true;
|
||||
}
|
||||
*lastoff = poff + off + bsize;
|
||||
}
|
||||
|
||||
out_unlock_not_found:
|
||||
unlock_page(page);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Seek for SEEK_DATA / SEEK_HOLE in the page cache.
|
||||
*
|
||||
* Within unwritten extents, the page cache determines which parts are holes
|
||||
* and which are data: uptodate buffer heads count as data; everything else
|
||||
* counts as a hole.
|
||||
*
|
||||
* Returns the resulting offset on successs, and -ENOENT otherwise.
|
||||
*/
|
||||
static loff_t
|
||||
page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
|
||||
int whence)
|
||||
{
|
||||
pgoff_t index = offset >> PAGE_SHIFT;
|
||||
pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
|
||||
loff_t lastoff = offset;
|
||||
struct pagevec pvec;
|
||||
|
||||
if (length <= 0)
|
||||
return -ENOENT;
|
||||
|
||||
pagevec_init(&pvec);
|
||||
|
||||
do {
|
||||
unsigned nr_pages, i;
|
||||
|
||||
nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
|
||||
end - 1);
|
||||
if (nr_pages == 0)
|
||||
break;
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
struct page *page = pvec.pages[i];
|
||||
|
||||
if (page_seek_hole_data(inode, page, &lastoff, whence))
|
||||
goto check_range;
|
||||
lastoff = page_offset(page) + PAGE_SIZE;
|
||||
}
|
||||
pagevec_release(&pvec);
|
||||
} while (index < end);
|
||||
|
||||
/* When no page at lastoff and we are not done, we found a hole. */
|
||||
if (whence != SEEK_HOLE)
|
||||
goto not_found;
|
||||
|
||||
check_range:
|
||||
if (lastoff < offset + length)
|
||||
goto out;
|
||||
not_found:
|
||||
lastoff = -ENOENT;
|
||||
out:
|
||||
pagevec_release(&pvec);
|
||||
return lastoff;
|
||||
}
|
||||
|
||||
|
||||
static loff_t
|
||||
iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
|
||||
iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
|
||||
void *data, struct iomap *iomap, struct iomap *srcmap)
|
||||
{
|
||||
loff_t offset = start;
|
||||
|
||||
switch (iomap->type) {
|
||||
case IOMAP_UNWRITTEN:
|
||||
offset = page_cache_seek_hole_data(inode, offset, length,
|
||||
SEEK_HOLE);
|
||||
if (offset < 0)
|
||||
offset = mapping_seek_hole_data(inode->i_mapping, start,
|
||||
start + length, SEEK_HOLE);
|
||||
if (offset == start + length)
|
||||
return length;
|
||||
fallthrough;
|
||||
case IOMAP_HOLE:
|
||||
@ -164,15 +59,17 @@ iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
|
||||
EXPORT_SYMBOL_GPL(iomap_seek_hole);
|
||||
|
||||
static loff_t
|
||||
iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
|
||||
iomap_seek_data_actor(struct inode *inode, loff_t start, loff_t length,
|
||||
void *data, struct iomap *iomap, struct iomap *srcmap)
|
||||
{
|
||||
loff_t offset = start;
|
||||
|
||||
switch (iomap->type) {
|
||||
case IOMAP_HOLE:
|
||||
return length;
|
||||
case IOMAP_UNWRITTEN:
|
||||
offset = page_cache_seek_hole_data(inode, offset, length,
|
||||
SEEK_DATA);
|
||||
offset = mapping_seek_hole_data(inode->i_mapping, start,
|
||||
start + length, SEEK_DATA);
|
||||
if (offset < 0)
|
||||
return length;
|
||||
fallthrough;
|
||||
|
@ -67,7 +67,6 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/stacktrace.h>
|
||||
#include <linux/resource.h>
|
||||
#include <linux/module.h>
|
||||
@ -386,19 +385,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
|
||||
struct pid *pid, struct task_struct *task)
|
||||
{
|
||||
unsigned long wchan;
|
||||
char symname[KSYM_NAME_LEN];
|
||||
|
||||
if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
|
||||
goto print0;
|
||||
if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
|
||||
wchan = get_wchan(task);
|
||||
else
|
||||
wchan = 0;
|
||||
|
||||
wchan = get_wchan(task);
|
||||
if (wchan && !lookup_symbol_name(wchan, symname)) {
|
||||
seq_puts(m, symname);
|
||||
return 0;
|
||||
}
|
||||
if (wchan)
|
||||
seq_printf(m, "%ps", (void *) wchan);
|
||||
else
|
||||
seq_putc(m, '0');
|
||||
|
||||
print0:
|
||||
seq_putc(m, '0');
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_KALLSYMS */
|
||||
|
@ -571,7 +571,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
|
||||
error = -ENOMEM;
|
||||
if (count >= KMALLOC_MAX_SIZE)
|
||||
goto out;
|
||||
kbuf = kzalloc(count + 1, GFP_KERNEL);
|
||||
kbuf = kvzalloc(count + 1, GFP_KERNEL);
|
||||
if (!kbuf)
|
||||
goto out;
|
||||
|
||||
@ -600,7 +600,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
|
||||
|
||||
error = count;
|
||||
out_free_buf:
|
||||
kfree(kbuf);
|
||||
kvfree(kbuf);
|
||||
out:
|
||||
sysctl_head_finish(head);
|
||||
|
||||
|
@ -214,7 +214,7 @@ static inline int get_count_order_long(unsigned long l)
|
||||
* __ffs64 - find first set bit in a 64 bit word
|
||||
* @word: The 64 bit word
|
||||
*
|
||||
* On 64 bit arches this is a synomyn for __ffs
|
||||
* On 64 bit arches this is a synonym for __ffs
|
||||
* The result is not defined if no bits are set, so check that @word
|
||||
* is non-zero before calling this.
|
||||
*/
|
||||
|
@ -4,7 +4,7 @@
|
||||
* Version: 0.1.0
|
||||
* Description: cfag12864b LCD driver header
|
||||
*
|
||||
* Author: Copyright (C) Miguel Ojeda Sandonis
|
||||
* Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
|
||||
* Date: 2006-10-12
|
||||
*/
|
||||
|
||||
|
@ -25,7 +25,7 @@ struct inode;
|
||||
struct group_info {
|
||||
atomic_t usage;
|
||||
int ngroups;
|
||||
kgid_t gid[0];
|
||||
kgid_t gid[];
|
||||
} __randomize_layout;
|
||||
|
||||
/**
|
||||
|
302
include/linux/fortify-string.h
Normal file
302
include/linux/fortify-string.h
Normal file
@ -0,0 +1,302 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef _LINUX_FORTIFY_STRING_H_
|
||||
#define _LINUX_FORTIFY_STRING_H_
|
||||
|
||||
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
|
||||
extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
|
||||
extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
|
||||
extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove);
|
||||
extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset);
|
||||
extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat);
|
||||
extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy);
|
||||
extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
|
||||
extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
|
||||
extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
|
||||
#else
|
||||
#define __underlying_memchr __builtin_memchr
|
||||
#define __underlying_memcmp __builtin_memcmp
|
||||
#define __underlying_memcpy __builtin_memcpy
|
||||
#define __underlying_memmove __builtin_memmove
|
||||
#define __underlying_memset __builtin_memset
|
||||
#define __underlying_strcat __builtin_strcat
|
||||
#define __underlying_strcpy __builtin_strcpy
|
||||
#define __underlying_strlen __builtin_strlen
|
||||
#define __underlying_strncat __builtin_strncat
|
||||
#define __underlying_strncpy __builtin_strncpy
|
||||
#endif
|
||||
|
||||
__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__write_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_strncpy(p, q, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE char *strcat(char *p, const char *q)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
|
||||
if (p_size == (size_t)-1)
|
||||
return __underlying_strcat(p, q);
|
||||
if (strlcat(p, q, p_size) >= p_size)
|
||||
fortify_panic(__func__);
|
||||
return p;
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
|
||||
{
|
||||
__kernel_size_t ret;
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
|
||||
/* Work around gcc excess stack consumption issue */
|
||||
if (p_size == (size_t)-1 ||
|
||||
(__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
|
||||
return __underlying_strlen(p);
|
||||
ret = strnlen(p, p_size);
|
||||
if (p_size <= ret)
|
||||
fortify_panic(__func__);
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
|
||||
__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
|
||||
|
||||
if (p_size <= ret && maxlen != ret)
|
||||
fortify_panic(__func__);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* defined after fortified strlen to reuse it */
|
||||
extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
|
||||
__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
|
||||
{
|
||||
size_t ret;
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
|
||||
if (p_size == (size_t)-1 && q_size == (size_t)-1)
|
||||
return __real_strlcpy(p, q, size);
|
||||
ret = strlen(q);
|
||||
if (size) {
|
||||
size_t len = (ret >= size) ? size - 1 : ret;
|
||||
|
||||
if (__builtin_constant_p(len) && len >= p_size)
|
||||
__write_overflow();
|
||||
if (len >= p_size)
|
||||
fortify_panic(__func__);
|
||||
__underlying_memcpy(p, q, len);
|
||||
p[len] = '\0';
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* defined after fortified strnlen to reuse it */
|
||||
extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
|
||||
__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size)
|
||||
{
|
||||
size_t len;
|
||||
/* Use string size rather than possible enclosing struct size. */
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
|
||||
/* If we cannot get size of p and q default to call strscpy. */
|
||||
if (p_size == (size_t) -1 && q_size == (size_t) -1)
|
||||
return __real_strscpy(p, q, size);
|
||||
|
||||
/*
|
||||
* If size can be known at compile time and is greater than
|
||||
* p_size, generate a compile time write overflow error.
|
||||
*/
|
||||
if (__builtin_constant_p(size) && size > p_size)
|
||||
__write_overflow();
|
||||
|
||||
/*
|
||||
* This call protects from read overflow, because len will default to q
|
||||
* length if it smaller than size.
|
||||
*/
|
||||
len = strnlen(q, size);
|
||||
/*
|
||||
* If len equals size, we will copy only size bytes which leads to
|
||||
* -E2BIG being returned.
|
||||
* Otherwise we will copy len + 1 because of the final '\O'.
|
||||
*/
|
||||
len = len == size ? size : len + 1;
|
||||
|
||||
/*
|
||||
* Generate a runtime write overflow error if len is greater than
|
||||
* p_size.
|
||||
*/
|
||||
if (len > p_size)
|
||||
fortify_panic(__func__);
|
||||
|
||||
/*
|
||||
* We can now safely call vanilla strscpy because we are protected from:
|
||||
* 1. Read overflow thanks to call to strnlen().
|
||||
* 2. Write overflow thanks to above ifs.
|
||||
*/
|
||||
return __real_strscpy(p, q, len);
|
||||
}
|
||||
|
||||
/* defined after fortified strlen and strnlen to reuse them */
|
||||
__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
|
||||
{
|
||||
size_t p_len, copy_len;
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
|
||||
if (p_size == (size_t)-1 && q_size == (size_t)-1)
|
||||
return __underlying_strncat(p, q, count);
|
||||
p_len = strlen(p);
|
||||
copy_len = strnlen(q, count);
|
||||
if (p_size < p_len + copy_len + 1)
|
||||
fortify_panic(__func__);
|
||||
__underlying_memcpy(p + p_len, q, copy_len);
|
||||
p[p_len + copy_len] = '\0';
|
||||
return p;
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__write_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memset(p, c, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
size_t q_size = __builtin_object_size(q, 0);
|
||||
|
||||
if (__builtin_constant_p(size)) {
|
||||
if (p_size < size)
|
||||
__write_overflow();
|
||||
if (q_size < size)
|
||||
__read_overflow2();
|
||||
}
|
||||
if (p_size < size || q_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memcpy(p, q, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
size_t q_size = __builtin_object_size(q, 0);
|
||||
|
||||
if (__builtin_constant_p(size)) {
|
||||
if (p_size < size)
|
||||
__write_overflow();
|
||||
if (q_size < size)
|
||||
__read_overflow2();
|
||||
}
|
||||
if (p_size < size || q_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memmove(p, q, size);
|
||||
}
|
||||
|
||||
extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
|
||||
__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __real_memscan(p, c, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
size_t q_size = __builtin_object_size(q, 0);
|
||||
|
||||
if (__builtin_constant_p(size)) {
|
||||
if (p_size < size)
|
||||
__read_overflow();
|
||||
if (q_size < size)
|
||||
__read_overflow2();
|
||||
}
|
||||
if (p_size < size || q_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memcmp(p, q, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memchr(p, c, size);
|
||||
}
|
||||
|
||||
void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
|
||||
__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __real_memchr_inv(p, c, size);
|
||||
}
|
||||
|
||||
extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
|
||||
__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __real_kmemdup(p, size, gfp);
|
||||
}
|
||||
|
||||
/* defined after fortified strlen and memcpy to reuse them */
|
||||
__FORTIFY_INLINE char *strcpy(char *p, const char *q)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
size_t size;
|
||||
|
||||
if (p_size == (size_t)-1 && q_size == (size_t)-1)
|
||||
return __underlying_strcpy(p, q);
|
||||
size = strlen(q) + 1;
|
||||
/* test here to use the more stringent object size */
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
memcpy(p, q, size);
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Don't use these outside the FORITFY_SOURCE implementation */
|
||||
#undef __underlying_memchr
|
||||
#undef __underlying_memcmp
|
||||
#undef __underlying_memcpy
|
||||
#undef __underlying_memmove
|
||||
#undef __underlying_memset
|
||||
#undef __underlying_strcat
|
||||
#undef __underlying_strcpy
|
||||
#undef __underlying_strlen
|
||||
#undef __underlying_strncat
|
||||
#undef __underlying_strncpy
|
||||
|
||||
#endif /* _LINUX_FORTIFY_STRING_H_ */
|
@ -634,6 +634,8 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
|
||||
extern void pm_restrict_gfp_mask(void);
|
||||
extern void pm_restore_gfp_mask(void);
|
||||
|
||||
extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
|
||||
|
||||
#ifdef CONFIG_PM_SLEEP
|
||||
extern bool pm_suspended_storage(void);
|
||||
#else
|
||||
|
@ -338,14 +338,14 @@ struct obs_kernel_param {
|
||||
var = 1; \
|
||||
return 0; \
|
||||
} \
|
||||
__setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \
|
||||
early_param(str_on, parse_##var##_on); \
|
||||
\
|
||||
static int __init parse_##var##_off(char *arg) \
|
||||
{ \
|
||||
var = 0; \
|
||||
return 0; \
|
||||
} \
|
||||
__setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
|
||||
early_param(str_off, parse_##var##_off)
|
||||
|
||||
/* Relies on boot_command_line being set */
|
||||
void __init parse_early_param(void);
|
||||
|
@ -83,6 +83,7 @@ static inline void kasan_disable_current(void) {}
|
||||
struct kasan_cache {
|
||||
int alloc_meta_offset;
|
||||
int free_meta_offset;
|
||||
bool is_kmalloc;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_KASAN_HW_TAGS
|
||||
@ -143,6 +144,13 @@ static __always_inline void kasan_cache_create(struct kmem_cache *cache,
|
||||
__kasan_cache_create(cache, size, flags);
|
||||
}
|
||||
|
||||
void __kasan_cache_create_kmalloc(struct kmem_cache *cache);
|
||||
static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
|
||||
{
|
||||
if (kasan_enabled())
|
||||
__kasan_cache_create_kmalloc(cache);
|
||||
}
|
||||
|
||||
size_t __kasan_metadata_size(struct kmem_cache *cache);
|
||||
static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
|
||||
{
|
||||
@ -192,6 +200,13 @@ static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object)
|
||||
return false;
|
||||
}
|
||||
|
||||
void __kasan_kfree_large(void *ptr, unsigned long ip);
|
||||
static __always_inline void kasan_kfree_large(void *ptr)
|
||||
{
|
||||
if (kasan_enabled())
|
||||
__kasan_kfree_large(ptr, _RET_IP_);
|
||||
}
|
||||
|
||||
void __kasan_slab_free_mempool(void *ptr, unsigned long ip);
|
||||
static __always_inline void kasan_slab_free_mempool(void *ptr)
|
||||
{
|
||||
@ -239,13 +254,6 @@ static __always_inline void * __must_check kasan_krealloc(const void *object,
|
||||
return (void *)object;
|
||||
}
|
||||
|
||||
void __kasan_kfree_large(void *ptr, unsigned long ip);
|
||||
static __always_inline void kasan_kfree_large(void *ptr)
|
||||
{
|
||||
if (kasan_enabled())
|
||||
__kasan_kfree_large(ptr, _RET_IP_);
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlike kasan_check_read/write(), kasan_check_byte() is performed even for
|
||||
* the hardware tag-based mode that doesn't rely on compiler instrumentation.
|
||||
@ -278,6 +286,7 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {}
|
||||
static inline void kasan_cache_create(struct kmem_cache *cache,
|
||||
unsigned int *size,
|
||||
slab_flags_t *flags) {}
|
||||
static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
|
||||
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
|
||||
static inline void kasan_poison_slab(struct page *page) {}
|
||||
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
|
||||
@ -293,6 +302,7 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
static inline void kasan_kfree_large(void *ptr) {}
|
||||
static inline void kasan_slab_free_mempool(void *ptr) {}
|
||||
static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
|
||||
gfp_t flags)
|
||||
@ -313,7 +323,6 @@ static inline void *kasan_krealloc(const void *object, size_t new_size,
|
||||
{
|
||||
return (void *)object;
|
||||
}
|
||||
static inline void kasan_kfree_large(void *ptr) {}
|
||||
static inline bool kasan_check_byte(const void *address)
|
||||
{
|
||||
return true;
|
||||
|
222
include/linux/kfence.h
Normal file
222
include/linux/kfence.h
Normal file
@ -0,0 +1,222 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Kernel Electric-Fence (KFENCE). Public interface for allocator and fault
|
||||
* handler integration. For more info see Documentation/dev-tools/kfence.rst.
|
||||
*
|
||||
* Copyright (C) 2020, Google LLC.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX_KFENCE_H
|
||||
#define _LINUX_KFENCE_H
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#ifdef CONFIG_KFENCE
|
||||
|
||||
/*
|
||||
* We allocate an even number of pages, as it simplifies calculations to map
|
||||
* address to metadata indices; effectively, the very first page serves as an
|
||||
* extended guard page, but otherwise has no special purpose.
|
||||
*/
|
||||
#define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE)
|
||||
extern char *__kfence_pool;
|
||||
|
||||
#ifdef CONFIG_KFENCE_STATIC_KEYS
|
||||
#include <linux/static_key.h>
|
||||
DECLARE_STATIC_KEY_FALSE(kfence_allocation_key);
|
||||
#else
|
||||
#include <linux/atomic.h>
|
||||
extern atomic_t kfence_allocation_gate;
|
||||
#endif
|
||||
|
||||
/**
|
||||
* is_kfence_address() - check if an address belongs to KFENCE pool
|
||||
* @addr: address to check
|
||||
*
|
||||
* Return: true or false depending on whether the address is within the KFENCE
|
||||
* object range.
|
||||
*
|
||||
* KFENCE objects live in a separate page range and are not to be intermixed
|
||||
* with regular heap objects (e.g. KFENCE objects must never be added to the
|
||||
* allocator freelists). Failing to do so may and will result in heap
|
||||
* corruptions, therefore is_kfence_address() must be used to check whether
|
||||
* an object requires specific handling.
|
||||
*
|
||||
* Note: This function may be used in fast-paths, and is performance critical.
|
||||
* Future changes should take this into account; for instance, we want to avoid
|
||||
* introducing another load and therefore need to keep KFENCE_POOL_SIZE a
|
||||
* constant (until immediate patching support is added to the kernel).
|
||||
*/
|
||||
static __always_inline bool is_kfence_address(const void *addr)
|
||||
{
|
||||
/*
|
||||
* The non-NULL check is required in case the __kfence_pool pointer was
|
||||
* never initialized; keep it in the slow-path after the range-check.
|
||||
*/
|
||||
return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr);
|
||||
}
|
||||
|
||||
/**
|
||||
* kfence_alloc_pool() - allocate the KFENCE pool via memblock
|
||||
*/
|
||||
void __init kfence_alloc_pool(void);
|
||||
|
||||
/**
|
||||
* kfence_init() - perform KFENCE initialization at boot time
|
||||
*
|
||||
* Requires that kfence_alloc_pool() was called before. This sets up the
|
||||
* allocation gate timer, and requires that workqueues are available.
|
||||
*/
|
||||
void __init kfence_init(void);
|
||||
|
||||
/**
|
||||
* kfence_shutdown_cache() - handle shutdown_cache() for KFENCE objects
|
||||
* @s: cache being shut down
|
||||
*
|
||||
* Before shutting down a cache, one must ensure there are no remaining objects
|
||||
* allocated from it. Because KFENCE objects are not referenced from the cache
|
||||
* directly, we need to check them here.
|
||||
*
|
||||
* Note that shutdown_cache() is internal to SL*B, and kmem_cache_destroy() does
|
||||
* not return if allocated objects still exist: it prints an error message and
|
||||
* simply aborts destruction of a cache, leaking memory.
|
||||
*
|
||||
* If the only such objects are KFENCE objects, we will not leak the entire
|
||||
* cache, but instead try to provide more useful debug info by making allocated
|
||||
* objects "zombie allocations". Objects may then still be used or freed (which
|
||||
* is handled gracefully), but usage will result in showing KFENCE error reports
|
||||
* which include stack traces to the user of the object, the original allocation
|
||||
* site, and caller to shutdown_cache().
|
||||
*/
|
||||
void kfence_shutdown_cache(struct kmem_cache *s);
|
||||
|
||||
/*
|
||||
* Allocate a KFENCE object. Allocators must not call this function directly,
|
||||
* use kfence_alloc() instead.
|
||||
*/
|
||||
void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
|
||||
|
||||
/**
|
||||
* kfence_alloc() - allocate a KFENCE object with a low probability
|
||||
* @s: struct kmem_cache with object requirements
|
||||
* @size: exact size of the object to allocate (can be less than @s->size
|
||||
* e.g. for kmalloc caches)
|
||||
* @flags: GFP flags
|
||||
*
|
||||
* Return:
|
||||
* * NULL - must proceed with allocating as usual,
|
||||
* * non-NULL - pointer to a KFENCE object.
|
||||
*
|
||||
* kfence_alloc() should be inserted into the heap allocation fast path,
|
||||
* allowing it to transparently return KFENCE-allocated objects with a low
|
||||
* probability using a static branch (the probability is controlled by the
|
||||
* kfence.sample_interval boot parameter).
|
||||
*/
|
||||
static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
|
||||
{
|
||||
#ifdef CONFIG_KFENCE_STATIC_KEYS
|
||||
if (static_branch_unlikely(&kfence_allocation_key))
|
||||
#else
|
||||
if (unlikely(!atomic_read(&kfence_allocation_gate)))
|
||||
#endif
|
||||
return __kfence_alloc(s, size, flags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* kfence_ksize() - get actual amount of memory allocated for a KFENCE object
|
||||
* @addr: pointer to a heap object
|
||||
*
|
||||
* Return:
|
||||
* * 0 - not a KFENCE object, must call __ksize() instead,
|
||||
* * non-0 - this many bytes can be accessed without causing a memory error.
|
||||
*
|
||||
* kfence_ksize() returns the number of bytes requested for a KFENCE object at
|
||||
* allocation time. This number may be less than the object size of the
|
||||
* corresponding struct kmem_cache.
|
||||
*/
|
||||
size_t kfence_ksize(const void *addr);
|
||||
|
||||
/**
|
||||
* kfence_object_start() - find the beginning of a KFENCE object
|
||||
* @addr: address within a KFENCE-allocated object
|
||||
*
|
||||
* Return: address of the beginning of the object.
|
||||
*
|
||||
* SL[AU]B-allocated objects are laid out within a page one by one, so it is
|
||||
* easy to calculate the beginning of an object given a pointer inside it and
|
||||
* the object size. The same is not true for KFENCE, which places a single
|
||||
* object at either end of the page. This helper function is used to find the
|
||||
* beginning of a KFENCE-allocated object.
|
||||
*/
|
||||
void *kfence_object_start(const void *addr);
|
||||
|
||||
/**
|
||||
* __kfence_free() - release a KFENCE heap object to KFENCE pool
|
||||
* @addr: object to be freed
|
||||
*
|
||||
* Requires: is_kfence_address(addr)
|
||||
*
|
||||
* Release a KFENCE object and mark it as freed.
|
||||
*/
|
||||
void __kfence_free(void *addr);
|
||||
|
||||
/**
|
||||
* kfence_free() - try to release an arbitrary heap object to KFENCE pool
|
||||
* @addr: object to be freed
|
||||
*
|
||||
* Return:
|
||||
* * false - object doesn't belong to KFENCE pool and was ignored,
|
||||
* * true - object was released to KFENCE pool.
|
||||
*
|
||||
* Release a KFENCE object and mark it as freed. May be called on any object,
|
||||
* even non-KFENCE objects, to simplify integration of the hooks into the
|
||||
* allocator's free codepath. The allocator must check the return value to
|
||||
* determine if it was a KFENCE object or not.
|
||||
*/
|
||||
static __always_inline __must_check bool kfence_free(void *addr)
|
||||
{
|
||||
if (!is_kfence_address(addr))
|
||||
return false;
|
||||
__kfence_free(addr);
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* kfence_handle_page_fault() - perform page fault handling for KFENCE pages
|
||||
* @addr: faulting address
|
||||
* @is_write: is access a write
|
||||
* @regs: current struct pt_regs (can be NULL, but shows full stack trace)
|
||||
*
|
||||
* Return:
|
||||
* * false - address outside KFENCE pool,
|
||||
* * true - page fault handled by KFENCE, no additional handling required.
|
||||
*
|
||||
* A page fault inside KFENCE pool indicates a memory error, such as an
|
||||
* out-of-bounds access, a use-after-free or an invalid memory access. In these
|
||||
* cases KFENCE prints an error message and marks the offending page as
|
||||
* present, so that the kernel can proceed.
|
||||
*/
|
||||
bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs);
|
||||
|
||||
#else /* CONFIG_KFENCE */
|
||||
|
||||
static inline bool is_kfence_address(const void *addr) { return false; }
|
||||
static inline void kfence_alloc_pool(void) { }
|
||||
static inline void kfence_init(void) { }
|
||||
static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
|
||||
static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
|
||||
static inline size_t kfence_ksize(const void *addr) { return 0; }
|
||||
static inline void *kfence_object_start(const void *addr) { return NULL; }
|
||||
static inline void __kfence_free(void *addr) { }
|
||||
static inline bool __must_check kfence_free(void *addr) { return false; }
|
||||
static inline bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write,
|
||||
struct pt_regs *regs)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_KFENCE_H */
|
@ -359,9 +359,11 @@ extern atomic_t kgdb_active;
|
||||
extern bool dbg_is_early;
|
||||
extern void __init dbg_late_init(void);
|
||||
extern void kgdb_panic(const char *msg);
|
||||
extern void kgdb_free_init_mem(void);
|
||||
#else /* ! CONFIG_KGDB */
|
||||
#define in_dbg_master() (0)
|
||||
#define dbg_late_init()
|
||||
static inline void kgdb_panic(const char *msg) {}
|
||||
static inline void kgdb_free_init_mem(void) { }
|
||||
#endif /* ! CONFIG_KGDB */
|
||||
#endif /* _KGDB_H_ */
|
||||
|
@ -3,6 +3,7 @@
|
||||
#define _LINUX_KHUGEPAGED_H
|
||||
|
||||
#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
|
||||
#include <linux/shmem_fs.h>
|
||||
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
@ -57,6 +58,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
|
||||
{
|
||||
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
|
||||
if ((khugepaged_always() ||
|
||||
(shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) ||
|
||||
(khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
|
||||
!(vm_flags & VM_NOHUGEPAGE) &&
|
||||
!test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
|
||||
|
@ -4,7 +4,7 @@
|
||||
* Version: 0.1.0
|
||||
* Description: ks0108 LCD Controller driver header
|
||||
*
|
||||
* Author: Copyright (C) Miguel Ojeda Sandonis
|
||||
* Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
|
||||
* Date: 2006-10-31
|
||||
*/
|
||||
|
||||
|
@ -42,7 +42,7 @@ struct device *mdev_get_iommu_device(struct device *dev);
|
||||
* @mdev: mdev_device structure on of mediated device
|
||||
* that is being created
|
||||
* Returns integer: success (0) or error (< 0)
|
||||
* @remove: Called to free resources in parent device's driver for a
|
||||
* @remove: Called to free resources in parent device's driver for
|
||||
* a mediated device. It is mandatory to provide 'remove'
|
||||
* ops.
|
||||
* @mdev: mdev_device device structure which is being
|
||||
|
@ -27,9 +27,8 @@ struct memory_block {
|
||||
unsigned long start_section_nr;
|
||||
unsigned long state; /* serialized by the dev->lock */
|
||||
int online_type; /* for passing data to online routine */
|
||||
int phys_device; /* to which fru does this belong? */
|
||||
struct device dev;
|
||||
int nid; /* NID for this memory block */
|
||||
struct device dev;
|
||||
};
|
||||
|
||||
int arch_get_memory_phys_device(unsigned long start_pfn);
|
||||
|
@ -16,22 +16,7 @@ struct resource;
|
||||
struct vmem_altmap;
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
/*
|
||||
* Return page for the valid pfn only if the page is online. All pfn
|
||||
* walkers which rely on the fully initialized page->flags and others
|
||||
* should use this rather than pfn_valid && pfn_to_page
|
||||
*/
|
||||
#define pfn_to_online_page(pfn) \
|
||||
({ \
|
||||
struct page *___page = NULL; \
|
||||
unsigned long ___pfn = pfn; \
|
||||
unsigned long ___nr = pfn_to_section_nr(___pfn); \
|
||||
\
|
||||
if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
|
||||
pfn_valid_within(___pfn)) \
|
||||
___page = pfn_to_page(___pfn); \
|
||||
___page; \
|
||||
})
|
||||
struct page *pfn_to_online_page(unsigned long pfn);
|
||||
|
||||
/*
|
||||
* Types for free bootmem stored in page->lru.next. These have to be in
|
||||
@ -68,7 +53,7 @@ typedef int __bitwise mhp_t;
|
||||
* with this flag set, the resource pointer must no longer be used as it
|
||||
* might be stale, or the resource might have changed.
|
||||
*/
|
||||
#define MEMHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
|
||||
#define MHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
|
||||
|
||||
/*
|
||||
* Extended parameters for memory hotplug:
|
||||
@ -81,6 +66,9 @@ struct mhp_params {
|
||||
pgprot_t pgprot;
|
||||
};
|
||||
|
||||
bool mhp_range_allowed(u64 start, u64 size, bool need_mapping);
|
||||
struct range mhp_get_pluggable_range(bool need_mapping);
|
||||
|
||||
/*
|
||||
* Zone resizing functions
|
||||
*
|
||||
@ -131,10 +119,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size,
|
||||
struct mhp_params *params);
|
||||
extern u64 max_mem_size;
|
||||
|
||||
extern int memhp_online_type_from_str(const char *str);
|
||||
extern int mhp_online_type_from_str(const char *str);
|
||||
|
||||
/* Default online_type (MMOP_*) when new memory blocks are added. */
|
||||
extern int memhp_default_online_type;
|
||||
extern int mhp_default_online_type;
|
||||
/* If movable_node boot option specified */
|
||||
extern bool movable_node_enabled;
|
||||
static inline bool movable_node_is_enabled(void)
|
||||
@ -281,6 +269,13 @@ static inline bool movable_node_is_enabled(void)
|
||||
}
|
||||
#endif /* ! CONFIG_MEMORY_HOTPLUG */
|
||||
|
||||
/*
|
||||
* Keep this declaration outside CONFIG_MEMORY_HOTPLUG as some
|
||||
* platforms might override and use arch_get_mappable_range()
|
||||
* for internal non memory hotplug purposes.
|
||||
*/
|
||||
struct range arch_get_mappable_range(void);
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
|
||||
/*
|
||||
* pgdat resizing functions
|
||||
|
@ -137,6 +137,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
|
||||
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
|
||||
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
|
||||
struct dev_pagemap *pgmap);
|
||||
bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
|
||||
|
||||
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
|
||||
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
|
||||
@ -165,6 +166,11 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
|
||||
{
|
||||
return 0;
|
||||
|
@ -503,6 +503,9 @@ struct zone {
|
||||
* bootmem allocator):
|
||||
* managed_pages = present_pages - reserved_pages;
|
||||
*
|
||||
* cma pages is present pages that are assigned for CMA use
|
||||
* (MIGRATE_CMA).
|
||||
*
|
||||
* So present_pages may be used by memory hotplug or memory power
|
||||
* management logic to figure out unmanaged pages by checking
|
||||
* (present_pages - managed_pages). And managed_pages should be used
|
||||
@ -527,6 +530,9 @@ struct zone {
|
||||
atomic_long_t managed_pages;
|
||||
unsigned long spanned_pages;
|
||||
unsigned long present_pages;
|
||||
#ifdef CONFIG_CMA
|
||||
unsigned long cma_pages;
|
||||
#endif
|
||||
|
||||
const char *name;
|
||||
|
||||
@ -624,6 +630,15 @@ static inline unsigned long zone_managed_pages(struct zone *zone)
|
||||
return (unsigned long)atomic_long_read(&zone->managed_pages);
|
||||
}
|
||||
|
||||
static inline unsigned long zone_cma_pages(struct zone *zone)
|
||||
{
|
||||
#ifdef CONFIG_CMA
|
||||
return zone->cma_pages;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned long zone_end_pfn(const struct zone *zone)
|
||||
{
|
||||
return zone->zone_start_pfn + zone->spanned_pages;
|
||||
@ -903,6 +918,18 @@ static inline int local_memory_node(int node_id) { return node_id; };
|
||||
*/
|
||||
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
|
||||
|
||||
#ifdef CONFIG_ZONE_DEVICE
|
||||
static inline bool zone_is_zone_device(struct zone *zone)
|
||||
{
|
||||
return zone_idx(zone) == ZONE_DEVICE;
|
||||
}
|
||||
#else
|
||||
static inline bool zone_is_zone_device(struct zone *zone)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Returns true if a zone has pages managed by the buddy allocator.
|
||||
* All the reclaim decisions have to use this function rather than
|
||||
@ -1291,13 +1318,14 @@ extern size_t mem_section_usage_size(void);
|
||||
* which results in PFN_SECTION_SHIFT equal 6.
|
||||
* To sum it up, at least 6 bits are available.
|
||||
*/
|
||||
#define SECTION_MARKED_PRESENT (1UL<<0)
|
||||
#define SECTION_HAS_MEM_MAP (1UL<<1)
|
||||
#define SECTION_IS_ONLINE (1UL<<2)
|
||||
#define SECTION_IS_EARLY (1UL<<3)
|
||||
#define SECTION_MAP_LAST_BIT (1UL<<4)
|
||||
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
|
||||
#define SECTION_NID_SHIFT 3
|
||||
#define SECTION_MARKED_PRESENT (1UL<<0)
|
||||
#define SECTION_HAS_MEM_MAP (1UL<<1)
|
||||
#define SECTION_IS_ONLINE (1UL<<2)
|
||||
#define SECTION_IS_EARLY (1UL<<3)
|
||||
#define SECTION_TAINT_ZONE_DEVICE (1UL<<4)
|
||||
#define SECTION_MAP_LAST_BIT (1UL<<5)
|
||||
#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
|
||||
#define SECTION_NID_SHIFT 3
|
||||
|
||||
static inline struct page *__section_mem_map_addr(struct mem_section *section)
|
||||
{
|
||||
@ -1336,6 +1364,13 @@ static inline int online_section(struct mem_section *section)
|
||||
return (section && (section->section_mem_map & SECTION_IS_ONLINE));
|
||||
}
|
||||
|
||||
static inline int online_device_section(struct mem_section *section)
|
||||
{
|
||||
unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
|
||||
|
||||
return section && ((section->section_mem_map & flags) == flags);
|
||||
}
|
||||
|
||||
static inline int online_section_nr(unsigned long nr)
|
||||
{
|
||||
return online_section(__nr_to_section(nr));
|
||||
|
@ -810,7 +810,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
|
||||
|
||||
/*
|
||||
* Flags checked when a page is freed. Pages being freed should not have
|
||||
* these flags set. It they are, there is a problem.
|
||||
* these flags set. If they are, there is a problem.
|
||||
*/
|
||||
#define PAGE_FLAGS_CHECK_AT_FREE \
|
||||
(1UL << PG_lru | 1UL << PG_locked | \
|
||||
@ -821,7 +821,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
|
||||
|
||||
/*
|
||||
* Flags checked when a page is prepped for return by the page allocator.
|
||||
* Pages being prepped should not have these flags set. It they are set,
|
||||
* Pages being prepped should not have these flags set. If they are set,
|
||||
* there has been a kernel bug or struct page corruption.
|
||||
*
|
||||
* __PG_HWPOISON is exceptional because it needs to be kept beyond page's
|
||||
|
@ -315,6 +315,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
|
||||
#define FGP_NOWAIT 0x00000020
|
||||
#define FGP_FOR_MMAP 0x00000040
|
||||
#define FGP_HEAD 0x00000080
|
||||
#define FGP_ENTRY 0x00000100
|
||||
|
||||
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
|
||||
int fgp_flags, gfp_t cache_gfp_mask);
|
||||
@ -450,8 +451,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
|
||||
}
|
||||
|
||||
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
|
||||
unsigned int nr_entries, struct page **entries,
|
||||
pgoff_t *indices);
|
||||
pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
|
||||
unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
|
||||
pgoff_t end, unsigned int nr_pages,
|
||||
struct page **pages);
|
||||
@ -759,6 +759,8 @@ extern void __delete_from_page_cache(struct page *page, void *shadow);
|
||||
void replace_page_cache_page(struct page *old, struct page *new);
|
||||
void delete_from_page_cache_batch(struct address_space *mapping,
|
||||
struct pagevec *pvec);
|
||||
loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
|
||||
int whence);
|
||||
|
||||
/*
|
||||
* Like add_to_page_cache_locked, but used to add newly allocated pages:
|
||||
|
@ -25,10 +25,6 @@ struct pagevec {
|
||||
|
||||
void __pagevec_release(struct pagevec *pvec);
|
||||
void __pagevec_lru_add(struct pagevec *pvec);
|
||||
unsigned pagevec_lookup_entries(struct pagevec *pvec,
|
||||
struct address_space *mapping,
|
||||
pgoff_t start, unsigned nr_entries,
|
||||
pgoff_t *indices);
|
||||
void pagevec_remove_exceptionals(struct pagevec *pvec);
|
||||
unsigned pagevec_lookup_range(struct pagevec *pvec,
|
||||
struct address_space *mapping,
|
||||
|
@ -432,14 +432,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
|
||||
* To be differentiate with macro pte_mkyoung, this macro is used on platforms
|
||||
* where software maintains page access bit.
|
||||
*/
|
||||
#ifndef pte_sw_mkyoung
|
||||
static inline pte_t pte_sw_mkyoung(pte_t pte)
|
||||
{
|
||||
return pte;
|
||||
}
|
||||
#define pte_sw_mkyoung pte_sw_mkyoung
|
||||
#endif
|
||||
|
||||
#ifndef pte_savedwrite
|
||||
#define pte_savedwrite pte_write
|
||||
#endif
|
||||
|
@ -171,7 +171,7 @@ static inline void ptrace_event(int event, unsigned long message)
|
||||
*
|
||||
* Check whether @event is enabled and, if so, report @event and @pid
|
||||
* to the ptrace parent. @pid is reported as the pid_t seen from the
|
||||
* the ptrace parent's pid namespace.
|
||||
* ptrace parent's pid namespace.
|
||||
*
|
||||
* Called without locks.
|
||||
*/
|
||||
|
@ -213,7 +213,8 @@ struct page_vma_mapped_walk {
|
||||
|
||||
static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
|
||||
{
|
||||
if (pvmw->pte)
|
||||
/* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
|
||||
if (pvmw->pte && !PageHuge(pvmw->page))
|
||||
pte_unmap(pvmw->pte);
|
||||
if (pvmw->ptl)
|
||||
spin_unlock(pvmw->ptl);
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef _LINUX_SLAB_DEF_H
|
||||
#define _LINUX_SLAB_DEF_H
|
||||
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/reciprocal_div.h>
|
||||
|
||||
/*
|
||||
@ -114,6 +115,8 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
static inline int objs_per_slab_page(const struct kmem_cache *cache,
|
||||
const struct page *page)
|
||||
{
|
||||
if (is_kfence_address(page_address(page)))
|
||||
return 1;
|
||||
return cache->num;
|
||||
}
|
||||
|
||||
|
@ -7,6 +7,7 @@
|
||||
*
|
||||
* (C) 2007 SGI, Christoph Lameter
|
||||
*/
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/kobject.h>
|
||||
#include <linux/reciprocal_div.h>
|
||||
|
||||
@ -185,6 +186,8 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
|
||||
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
|
||||
const struct page *page, void *obj)
|
||||
{
|
||||
if (is_kfence_address(obj))
|
||||
return 0;
|
||||
return __obj_to_index(cache, page_address(page), obj);
|
||||
}
|
||||
|
||||
|
@ -21,4 +21,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
|
||||
|
||||
unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
|
||||
|
||||
#ifdef CONFIG_STACKDEPOT
|
||||
int stack_depot_init(void);
|
||||
#else
|
||||
static inline int stack_depot_init(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_STACKDEPOT */
|
||||
|
||||
#endif
|
||||
|
@ -266,287 +266,7 @@ void __read_overflow3(void) __compiletime_error("detected read beyond size of ob
|
||||
void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
|
||||
|
||||
#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
|
||||
|
||||
#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
|
||||
extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
|
||||
extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
|
||||
extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
|
||||
extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove);
|
||||
extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset);
|
||||
extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat);
|
||||
extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy);
|
||||
extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
|
||||
extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
|
||||
extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
|
||||
#else
|
||||
#define __underlying_memchr __builtin_memchr
|
||||
#define __underlying_memcmp __builtin_memcmp
|
||||
#define __underlying_memcpy __builtin_memcpy
|
||||
#define __underlying_memmove __builtin_memmove
|
||||
#define __underlying_memset __builtin_memset
|
||||
#define __underlying_strcat __builtin_strcat
|
||||
#define __underlying_strcpy __builtin_strcpy
|
||||
#define __underlying_strlen __builtin_strlen
|
||||
#define __underlying_strncat __builtin_strncat
|
||||
#define __underlying_strncpy __builtin_strncpy
|
||||
#endif
|
||||
|
||||
__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__write_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_strncpy(p, q, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE char *strcat(char *p, const char *q)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
if (p_size == (size_t)-1)
|
||||
return __underlying_strcat(p, q);
|
||||
if (strlcat(p, q, p_size) >= p_size)
|
||||
fortify_panic(__func__);
|
||||
return p;
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
|
||||
{
|
||||
__kernel_size_t ret;
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
|
||||
/* Work around gcc excess stack consumption issue */
|
||||
if (p_size == (size_t)-1 ||
|
||||
(__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
|
||||
return __underlying_strlen(p);
|
||||
ret = strnlen(p, p_size);
|
||||
if (p_size <= ret)
|
||||
fortify_panic(__func__);
|
||||
return ret;
|
||||
}
|
||||
|
||||
extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
|
||||
__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
__kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
|
||||
if (p_size <= ret && maxlen != ret)
|
||||
fortify_panic(__func__);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* defined after fortified strlen to reuse it */
|
||||
extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
|
||||
__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
|
||||
{
|
||||
size_t ret;
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
if (p_size == (size_t)-1 && q_size == (size_t)-1)
|
||||
return __real_strlcpy(p, q, size);
|
||||
ret = strlen(q);
|
||||
if (size) {
|
||||
size_t len = (ret >= size) ? size - 1 : ret;
|
||||
if (__builtin_constant_p(len) && len >= p_size)
|
||||
__write_overflow();
|
||||
if (len >= p_size)
|
||||
fortify_panic(__func__);
|
||||
__underlying_memcpy(p, q, len);
|
||||
p[len] = '\0';
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* defined after fortified strnlen to reuse it */
|
||||
extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
|
||||
__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size)
|
||||
{
|
||||
size_t len;
|
||||
/* Use string size rather than possible enclosing struct size. */
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
|
||||
/* If we cannot get size of p and q default to call strscpy. */
|
||||
if (p_size == (size_t) -1 && q_size == (size_t) -1)
|
||||
return __real_strscpy(p, q, size);
|
||||
|
||||
/*
|
||||
* If size can be known at compile time and is greater than
|
||||
* p_size, generate a compile time write overflow error.
|
||||
*/
|
||||
if (__builtin_constant_p(size) && size > p_size)
|
||||
__write_overflow();
|
||||
|
||||
/*
|
||||
* This call protects from read overflow, because len will default to q
|
||||
* length if it smaller than size.
|
||||
*/
|
||||
len = strnlen(q, size);
|
||||
/*
|
||||
* If len equals size, we will copy only size bytes which leads to
|
||||
* -E2BIG being returned.
|
||||
* Otherwise we will copy len + 1 because of the final '\O'.
|
||||
*/
|
||||
len = len == size ? size : len + 1;
|
||||
|
||||
/*
|
||||
* Generate a runtime write overflow error if len is greater than
|
||||
* p_size.
|
||||
*/
|
||||
if (len > p_size)
|
||||
fortify_panic(__func__);
|
||||
|
||||
/*
|
||||
* We can now safely call vanilla strscpy because we are protected from:
|
||||
* 1. Read overflow thanks to call to strnlen().
|
||||
* 2. Write overflow thanks to above ifs.
|
||||
*/
|
||||
return __real_strscpy(p, q, len);
|
||||
}
|
||||
|
||||
/* defined after fortified strlen and strnlen to reuse them */
|
||||
__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
|
||||
{
|
||||
size_t p_len, copy_len;
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
if (p_size == (size_t)-1 && q_size == (size_t)-1)
|
||||
return __underlying_strncat(p, q, count);
|
||||
p_len = strlen(p);
|
||||
copy_len = strnlen(q, count);
|
||||
if (p_size < p_len + copy_len + 1)
|
||||
fortify_panic(__func__);
|
||||
__underlying_memcpy(p + p_len, q, copy_len);
|
||||
p[p_len + copy_len] = '\0';
|
||||
return p;
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__write_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memset(p, c, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
size_t q_size = __builtin_object_size(q, 0);
|
||||
if (__builtin_constant_p(size)) {
|
||||
if (p_size < size)
|
||||
__write_overflow();
|
||||
if (q_size < size)
|
||||
__read_overflow2();
|
||||
}
|
||||
if (p_size < size || q_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memcpy(p, q, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
size_t q_size = __builtin_object_size(q, 0);
|
||||
if (__builtin_constant_p(size)) {
|
||||
if (p_size < size)
|
||||
__write_overflow();
|
||||
if (q_size < size)
|
||||
__read_overflow2();
|
||||
}
|
||||
if (p_size < size || q_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memmove(p, q, size);
|
||||
}
|
||||
|
||||
extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
|
||||
__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __real_memscan(p, c, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
size_t q_size = __builtin_object_size(q, 0);
|
||||
if (__builtin_constant_p(size)) {
|
||||
if (p_size < size)
|
||||
__read_overflow();
|
||||
if (q_size < size)
|
||||
__read_overflow2();
|
||||
}
|
||||
if (p_size < size || q_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memcmp(p, q, size);
|
||||
}
|
||||
|
||||
__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __underlying_memchr(p, c, size);
|
||||
}
|
||||
|
||||
void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
|
||||
__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __real_memchr_inv(p, c, size);
|
||||
}
|
||||
|
||||
extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
|
||||
__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 0);
|
||||
if (__builtin_constant_p(size) && p_size < size)
|
||||
__read_overflow();
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
return __real_kmemdup(p, size, gfp);
|
||||
}
|
||||
|
||||
/* defined after fortified strlen and memcpy to reuse them */
|
||||
__FORTIFY_INLINE char *strcpy(char *p, const char *q)
|
||||
{
|
||||
size_t p_size = __builtin_object_size(p, 1);
|
||||
size_t q_size = __builtin_object_size(q, 1);
|
||||
size_t size;
|
||||
if (p_size == (size_t)-1 && q_size == (size_t)-1)
|
||||
return __underlying_strcpy(p, q);
|
||||
size = strlen(q) + 1;
|
||||
/* test here to use the more stringent object size */
|
||||
if (p_size < size)
|
||||
fortify_panic(__func__);
|
||||
memcpy(p, q, size);
|
||||
return p;
|
||||
}
|
||||
|
||||
/* Don't use these outside the FORITFY_SOURCE implementation */
|
||||
#undef __underlying_memchr
|
||||
#undef __underlying_memcmp
|
||||
#undef __underlying_memcpy
|
||||
#undef __underlying_memmove
|
||||
#undef __underlying_memset
|
||||
#undef __underlying_strcat
|
||||
#undef __underlying_strcpy
|
||||
#undef __underlying_strlen
|
||||
#undef __underlying_strncat
|
||||
#undef __underlying_strncpy
|
||||
#include <linux/fortify-string.h>
|
||||
#endif
|
||||
|
||||
/**
|
||||
|
@ -313,6 +313,12 @@ static inline void __mod_node_page_state(struct pglist_data *pgdat,
|
||||
enum node_stat_item item, int delta)
|
||||
{
|
||||
if (vmstat_item_in_bytes(item)) {
|
||||
/*
|
||||
* Only cgroups use subpage accounting right now; at
|
||||
* the global level, these items still change in
|
||||
* multiples of whole pages. Store them as pages
|
||||
* internally to keep the per-cpu counters compact.
|
||||
*/
|
||||
VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
|
||||
delta >>= PAGE_SHIFT;
|
||||
}
|
||||
|
@ -73,6 +73,7 @@ u64 zpool_get_total_size(struct zpool *pool);
|
||||
* @malloc: allocate mem from a pool.
|
||||
* @free: free mem from a pool.
|
||||
* @shrink: shrink the pool.
|
||||
* @sleep_mapped: whether zpool driver can sleep during map.
|
||||
* @map: map a handle.
|
||||
* @unmap: unmap a handle.
|
||||
* @total_size: get total size of a pool.
|
||||
@ -100,6 +101,7 @@ struct zpool_driver {
|
||||
int (*shrink)(void *pool, unsigned int pages,
|
||||
unsigned int *reclaimed);
|
||||
|
||||
bool sleep_mapped;
|
||||
void *(*map)(void *pool, unsigned long handle,
|
||||
enum zpool_mapmode mm);
|
||||
void (*unmap)(void *pool, unsigned long handle);
|
||||
@ -112,5 +114,6 @@ void zpool_register_driver(struct zpool_driver *driver);
|
||||
int zpool_unregister_driver(struct zpool_driver *driver);
|
||||
|
||||
bool zpool_evictable(struct zpool *pool);
|
||||
bool zpool_can_sleep_mapped(struct zpool *pool);
|
||||
|
||||
#endif
|
||||
|
@ -35,7 +35,7 @@ enum zs_mapmode {
|
||||
|
||||
struct zs_pool_stats {
|
||||
/* How many pages were migrated (freed) */
|
||||
unsigned long pages_compacted;
|
||||
atomic_long_t pages_compacted;
|
||||
};
|
||||
|
||||
struct zs_pool;
|
||||
|
74
include/trace/events/error_report.h
Normal file
74
include/trace/events/error_report.h
Normal file
@ -0,0 +1,74 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Declarations for error reporting tracepoints.
|
||||
*
|
||||
* Copyright (C) 2021, Google LLC.
|
||||
*/
|
||||
#undef TRACE_SYSTEM
|
||||
#define TRACE_SYSTEM error_report
|
||||
|
||||
#if !defined(_TRACE_ERROR_REPORT_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_ERROR_REPORT_H
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#ifndef __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY
|
||||
#define __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY
|
||||
|
||||
enum error_detector {
|
||||
ERROR_DETECTOR_KFENCE,
|
||||
ERROR_DETECTOR_KASAN
|
||||
};
|
||||
|
||||
#endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */
|
||||
|
||||
#define error_detector_list \
|
||||
EM(ERROR_DETECTOR_KFENCE, "kfence") \
|
||||
EMe(ERROR_DETECTOR_KASAN, "kasan")
|
||||
/* Always end the list with an EMe. */
|
||||
|
||||
#undef EM
|
||||
#undef EMe
|
||||
|
||||
#define EM(a, b) TRACE_DEFINE_ENUM(a);
|
||||
#define EMe(a, b) TRACE_DEFINE_ENUM(a);
|
||||
|
||||
error_detector_list
|
||||
|
||||
#undef EM
|
||||
#undef EMe
|
||||
|
||||
#define EM(a, b) { a, b },
|
||||
#define EMe(a, b) { a, b }
|
||||
|
||||
#define show_error_detector_list(val) \
|
||||
__print_symbolic(val, error_detector_list)
|
||||
|
||||
DECLARE_EVENT_CLASS(error_report_template,
|
||||
TP_PROTO(enum error_detector error_detector, unsigned long id),
|
||||
TP_ARGS(error_detector, id),
|
||||
TP_STRUCT__entry(__field(enum error_detector, error_detector)
|
||||
__field(unsigned long, id)),
|
||||
TP_fast_assign(__entry->error_detector = error_detector;
|
||||
__entry->id = id;),
|
||||
TP_printk("[%s] %lx",
|
||||
show_error_detector_list(__entry->error_detector),
|
||||
__entry->id));
|
||||
|
||||
/**
|
||||
* error_report_end - called after printing the error report
|
||||
* @error_detector: short string describing the error detection tool
|
||||
* @id: pseudo-unique descriptor identifying the report
|
||||
* (e.g. the memory access address)
|
||||
*
|
||||
* This event occurs right after a debugging tool finishes printing the error
|
||||
* report.
|
||||
*/
|
||||
DEFINE_EVENT(error_report_template, error_report_end,
|
||||
TP_PROTO(enum error_detector error_detector, unsigned long id),
|
||||
TP_ARGS(error_detector, id));
|
||||
|
||||
#endif /* _TRACE_ERROR_REPORT_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
#include <trace/define_trace.h>
|
@ -844,7 +844,7 @@ struct fw_cdev_queue_iso {
|
||||
* struct fw_cdev_start_iso - Start an isochronous transmission or reception
|
||||
* @cycle: Cycle in which to start I/O. If @cycle is greater than or
|
||||
* equal to 0, the I/O will start on that cycle.
|
||||
* @sync: Determines the value to wait for for receive packets that have
|
||||
* @sync: Determines the value to wait for receive packets that have
|
||||
* the %FW_CDEV_ISO_SYNC bit set
|
||||
* @tags: Tag filter bit mask. Only valid for isochronous reception.
|
||||
* Determines the tag values for which packets will be accepted.
|
||||
|
@ -84,7 +84,7 @@ struct input_id {
|
||||
* in units per radian.
|
||||
* When INPUT_PROP_ACCELEROMETER is set the resolution changes.
|
||||
* The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in
|
||||
* in units per g (units/g) and in units per degree per second
|
||||
* units per g (units/g) and in units per degree per second
|
||||
* (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ).
|
||||
*/
|
||||
struct input_absinfo {
|
||||
|
@ -19,7 +19,7 @@ config CC_VERSION_TEXT
|
||||
CC_VERSION_TEXT so it is recorded in include/config/auto.conf.cmd.
|
||||
When the compiler is updated, Kconfig will be invoked.
|
||||
|
||||
- Ensure full rebuild when the compier is updated
|
||||
- Ensure full rebuild when the compiler is updated
|
||||
include/linux/kconfig.h contains this option in the comment line so
|
||||
fixdep adds include/config/cc/version/text.h into the auto-generated
|
||||
dependency. When the compiler is updated, syncconfig will touch it
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/utime.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/init_syscalls.h>
|
||||
|
||||
@ -45,6 +46,16 @@ static void __init error(char *x)
|
||||
message = x;
|
||||
}
|
||||
|
||||
static void panic_show_mem(const char *fmt, ...)
|
||||
{
|
||||
va_list args;
|
||||
|
||||
show_mem(0, NULL);
|
||||
va_start(args, fmt);
|
||||
panic(fmt, args);
|
||||
va_end(args);
|
||||
}
|
||||
|
||||
/* link hash */
|
||||
|
||||
#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
|
||||
@ -80,7 +91,7 @@ static char __init *find_link(int major, int minor, int ino,
|
||||
}
|
||||
q = kmalloc(sizeof(struct hash), GFP_KERNEL);
|
||||
if (!q)
|
||||
panic("can't allocate link hash entry");
|
||||
panic_show_mem("can't allocate link hash entry");
|
||||
q->major = major;
|
||||
q->minor = minor;
|
||||
q->ino = ino;
|
||||
@ -125,7 +136,7 @@ static void __init dir_add(const char *name, time64_t mtime)
|
||||
{
|
||||
struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL);
|
||||
if (!de)
|
||||
panic("can't allocate dir_entry buffer");
|
||||
panic_show_mem("can't allocate dir_entry buffer");
|
||||
INIT_LIST_HEAD(&de->list);
|
||||
de->name = kstrdup(name, GFP_KERNEL);
|
||||
de->mtime = mtime;
|
||||
@ -460,7 +471,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len)
|
||||
name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
|
||||
|
||||
if (!header_buf || !symlink_buf || !name_buf)
|
||||
panic("can't allocate buffers");
|
||||
panic_show_mem("can't allocate buffers");
|
||||
|
||||
state = Start;
|
||||
this_header = 0;
|
||||
@ -607,7 +618,7 @@ static int __init populate_rootfs(void)
|
||||
/* Load the built in initramfs */
|
||||
char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
|
||||
if (err)
|
||||
panic("%s", err); /* Failed to decompress INTERNAL initramfs */
|
||||
panic_show_mem("%s", err); /* Failed to decompress INTERNAL initramfs */
|
||||
|
||||
if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
|
||||
goto done;
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include <linux/security.h>
|
||||
#include <linux/smp.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/kfence.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/moduleparam.h>
|
||||
#include <linux/kallsyms.h>
|
||||
@ -96,6 +97,7 @@
|
||||
#include <linux/mem_encrypt.h>
|
||||
#include <linux/kcsan.h>
|
||||
#include <linux/init_syscalls.h>
|
||||
#include <linux/stackdepot.h>
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <asm/bugs.h>
|
||||
@ -824,7 +826,9 @@ static void __init mm_init(void)
|
||||
*/
|
||||
page_ext_init_flatmem();
|
||||
init_mem_debugging_and_hardening();
|
||||
kfence_alloc_pool();
|
||||
report_meminit();
|
||||
stack_depot_init();
|
||||
mem_init();
|
||||
/* page_owner must be initialized after buddy is ready */
|
||||
page_ext_init_flatmem_late();
|
||||
@ -955,6 +959,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
|
||||
hrtimers_init();
|
||||
softirq_init();
|
||||
timekeeping_init();
|
||||
kfence_init();
|
||||
|
||||
/*
|
||||
* For best initial stack canary entropy, prepare it after:
|
||||
@ -1421,6 +1426,7 @@ static int __ref kernel_init(void *unused)
|
||||
async_synchronize_full();
|
||||
kprobe_free_init_mem();
|
||||
ftrace_free_init_mem();
|
||||
kgdb_free_init_mem();
|
||||
free_initmem();
|
||||
mark_readonly();
|
||||
|
||||
|
@ -16,14 +16,6 @@
|
||||
#include <linux/version.h>
|
||||
#include <linux/proc_ns.h>
|
||||
|
||||
#ifndef CONFIG_KALLSYMS
|
||||
#define version(a) Version_ ## a
|
||||
#define version_string(a) version(a)
|
||||
|
||||
extern int version_string(LINUX_VERSION_CODE);
|
||||
int version_string(LINUX_VERSION_CODE);
|
||||
#endif
|
||||
|
||||
struct uts_namespace init_uts_ns = {
|
||||
.ns.count = REFCOUNT_INIT(2),
|
||||
.name = {
|
||||
|
@ -455,6 +455,17 @@ setundefined:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void kgdb_free_init_mem(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Clear init memory breakpoints. */
|
||||
for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
|
||||
if (init_section_contains((void *)kgdb_break[i].bpt_addr, 0))
|
||||
kgdb_break[i].state = BP_UNDEFINED;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KGDB_KDB
|
||||
void kdb_dump_stack_on_cpu(int cpu)
|
||||
{
|
||||
|
@ -269,7 +269,7 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
|
||||
if (!event->parent) {
|
||||
/*
|
||||
* If this is a !child event, we must hold ctx::mutex to
|
||||
* stabilize the the event->ctx relation. See
|
||||
* stabilize the event->ctx relation. See
|
||||
* perf_event_ctx_lock().
|
||||
*/
|
||||
lockdep_assert_held(&ctx->mutex);
|
||||
@ -1303,7 +1303,7 @@ static void put_ctx(struct perf_event_context *ctx)
|
||||
* life-time rules separate them. That is an exiting task cannot fork, and a
|
||||
* spawning task cannot (yet) exit.
|
||||
*
|
||||
* But remember that that these are parent<->child context relations, and
|
||||
* But remember that these are parent<->child context relations, and
|
||||
* migration does not affect children, therefore these two orderings should not
|
||||
* interact.
|
||||
*
|
||||
@ -1442,7 +1442,7 @@ static u64 primary_event_id(struct perf_event *event)
|
||||
/*
|
||||
* Get the perf_event_context for a task and lock it.
|
||||
*
|
||||
* This has to cope with with the fact that until it is locked,
|
||||
* This has to cope with the fact that until it is locked,
|
||||
* the context could get moved to another task.
|
||||
*/
|
||||
static struct perf_event_context *
|
||||
@ -2486,7 +2486,7 @@ static void perf_set_shadow_time(struct perf_event *event,
|
||||
* But this is a bit hairy.
|
||||
*
|
||||
* So instead, we have an explicit cgroup call to remain
|
||||
* within the time time source all along. We believe it
|
||||
* within the time source all along. We believe it
|
||||
* is cleaner and simpler to understand.
|
||||
*/
|
||||
if (is_cgroup_event(event))
|
||||
|
@ -1733,7 +1733,7 @@ void uprobe_free_utask(struct task_struct *t)
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a uprobe_task object for the task if if necessary.
|
||||
* Allocate a uprobe_task object for the task if necessary.
|
||||
* Called when the thread hits a breakpoint.
|
||||
*
|
||||
* Returns:
|
||||
|
@ -15,12 +15,7 @@
|
||||
struct group_info *groups_alloc(int gidsetsize)
|
||||
{
|
||||
struct group_info *gi;
|
||||
unsigned int len;
|
||||
|
||||
len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
|
||||
gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
|
||||
if (!gi)
|
||||
gi = __vmalloc(len, GFP_KERNEL_ACCOUNT);
|
||||
gi = kvmalloc(struct_size(gi, gid, gidsetsize), GFP_KERNEL_ACCOUNT);
|
||||
if (!gi)
|
||||
return NULL;
|
||||
|
||||
|
@ -1420,7 +1420,7 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
|
||||
}
|
||||
|
||||
/*
|
||||
* Performs the wakeup of the the top-waiter and re-enables preemption.
|
||||
* Performs the wakeup of the top-waiter and re-enables preemption.
|
||||
*/
|
||||
void rt_mutex_postunlock(struct wake_q_head *wake_q)
|
||||
{
|
||||
@ -1819,7 +1819,7 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
|
||||
* been started.
|
||||
* @waiter: the pre-initialized rt_mutex_waiter
|
||||
*
|
||||
* Wait for the the lock acquisition started on our behalf by
|
||||
* Wait for the lock acquisition started on our behalf by
|
||||
* rt_mutex_start_proxy_lock(). Upon failure, the caller must call
|
||||
* rt_mutex_cleanup_proxy_lock().
|
||||
*
|
||||
|
@ -1048,7 +1048,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
|
||||
|
||||
/*
|
||||
* If there were already threads queued before us and:
|
||||
* 1) there are no no active locks, wake the front
|
||||
* 1) there are no active locks, wake the front
|
||||
* queued process(es) as the handoff bit might be set.
|
||||
* 2) there are no active writers and some readers, the lock
|
||||
* must be read owned; so we try to wake any read lock
|
||||
|
@ -119,7 +119,7 @@ EXPORT_SYMBOL(down_killable);
|
||||
* @sem: the semaphore to be acquired
|
||||
*
|
||||
* Try to acquire the semaphore atomically. Returns 0 if the semaphore has
|
||||
* been acquired successfully or 1 if it it cannot be acquired.
|
||||
* been acquired successfully or 1 if it cannot be acquired.
|
||||
*
|
||||
* NOTE: This return value is inverted from both spin_trylock and
|
||||
* mutex_trylock! Be careful about this when converting code.
|
||||
|
@ -5126,7 +5126,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
|
||||
/*
|
||||
* When a group wakes up we want to make sure that its quota is not already
|
||||
* expired/exceeded, otherwise it may be allowed to steal additional ticks of
|
||||
* runtime as update_curr() throttling can not not trigger until it's on-rq.
|
||||
* runtime as update_curr() throttling can not trigger until it's on-rq.
|
||||
*/
|
||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
|
||||
{
|
||||
|
@ -454,7 +454,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
|
||||
|
||||
/*
|
||||
* For each cpu runqueue, if the task's mm match @mm, ensure that all
|
||||
* @mm's membarrier state set bits are also set in in the runqueue's
|
||||
* @mm's membarrier state set bits are also set in the runqueue's
|
||||
* membarrier state. This ensures that a runqueue scheduling
|
||||
* between threads which are users of @mm has its membarrier state
|
||||
* updated.
|
||||
|
@ -2962,7 +2962,7 @@ static struct ctl_table vm_table[] = {
|
||||
.data = &block_dump,
|
||||
.maxlen = sizeof(block_dump),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
},
|
||||
{
|
||||
@ -2970,7 +2970,7 @@ static struct ctl_table vm_table[] = {
|
||||
.data = &sysctl_vfs_cache_pressure,
|
||||
.maxlen = sizeof(sysctl_vfs_cache_pressure),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
},
|
||||
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
|
||||
@ -2980,7 +2980,7 @@ static struct ctl_table vm_table[] = {
|
||||
.data = &sysctl_legacy_va_layout,
|
||||
.maxlen = sizeof(sysctl_legacy_va_layout),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
},
|
||||
#endif
|
||||
@ -2990,7 +2990,7 @@ static struct ctl_table vm_table[] = {
|
||||
.data = &node_reclaim_mode,
|
||||
.maxlen = sizeof(node_reclaim_mode),
|
||||
.mode = 0644,
|
||||
.proc_handler = proc_dointvec,
|
||||
.proc_handler = proc_dointvec_minmax,
|
||||
.extra1 = SYSCTL_ZERO,
|
||||
},
|
||||
{
|
||||
|
@ -81,6 +81,7 @@ obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o
|
||||
obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
|
||||
obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
|
||||
obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
|
||||
obj-$(CONFIG_TRACEPOINTS) += error_report-traces.o
|
||||
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
|
||||
ifeq ($(CONFIG_PM),y)
|
||||
obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
|
||||
|
11
kernel/trace/error_report-traces.c
Normal file
11
kernel/trace/error_report-traces.c
Normal file
@ -0,0 +1,11 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Error reporting trace points.
|
||||
*
|
||||
* Copyright (C) 2021, Google LLC.
|
||||
*/
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <trace/events/error_report.h>
|
||||
|
||||
EXPORT_TRACEPOINT_SYMBOL_GPL(error_report_end);
|
@ -651,6 +651,15 @@ config STACKDEPOT
|
||||
bool
|
||||
select STACKTRACE
|
||||
|
||||
config STACK_HASH_ORDER
|
||||
int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
|
||||
range 12 20
|
||||
default 20
|
||||
depends on STACKDEPOT
|
||||
help
|
||||
Select the hash size as a power of 2 for the stackdepot hash table.
|
||||
Choose a lower value to reduce the memory impact.
|
||||
|
||||
config SBITMAP
|
||||
bool
|
||||
|
||||
|
@ -938,6 +938,7 @@ config DEBUG_STACKOVERFLOW
|
||||
If in doubt, say "N".
|
||||
|
||||
source "lib/Kconfig.kasan"
|
||||
source "lib/Kconfig.kfence"
|
||||
|
||||
endmenu # "Memory Debugging"
|
||||
|
||||
|
82
lib/Kconfig.kfence
Normal file
82
lib/Kconfig.kfence
Normal file
@ -0,0 +1,82 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
config HAVE_ARCH_KFENCE
|
||||
bool
|
||||
|
||||
menuconfig KFENCE
|
||||
bool "KFENCE: low-overhead sampling-based memory safety error detector"
|
||||
depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
|
||||
select STACKTRACE
|
||||
help
|
||||
KFENCE is a low-overhead sampling-based detector of heap out-of-bounds
|
||||
access, use-after-free, and invalid-free errors. KFENCE is designed
|
||||
to have negligible cost to permit enabling it in production
|
||||
environments.
|
||||
|
||||
See <file:Documentation/dev-tools/kfence.rst> for more details.
|
||||
|
||||
Note that, KFENCE is not a substitute for explicit testing with tools
|
||||
such as KASAN. KFENCE can detect a subset of bugs that KASAN can
|
||||
detect, albeit at very different performance profiles. If you can
|
||||
afford to use KASAN, continue using KASAN, for example in test
|
||||
environments. If your kernel targets production use, and cannot
|
||||
enable KASAN due to its cost, consider using KFENCE.
|
||||
|
||||
if KFENCE
|
||||
|
||||
config KFENCE_STATIC_KEYS
|
||||
bool "Use static keys to set up allocations"
|
||||
default y
|
||||
depends on JUMP_LABEL # To ensure performance, require jump labels
|
||||
help
|
||||
Use static keys (static branches) to set up KFENCE allocations. Using
|
||||
static keys is normally recommended, because it avoids a dynamic
|
||||
branch in the allocator's fast path. However, with very low sample
|
||||
intervals, or on systems that do not support jump labels, a dynamic
|
||||
branch may still be an acceptable performance trade-off.
|
||||
|
||||
config KFENCE_SAMPLE_INTERVAL
|
||||
int "Default sample interval in milliseconds"
|
||||
default 100
|
||||
help
|
||||
The KFENCE sample interval determines the frequency with which heap
|
||||
allocations will be guarded by KFENCE. May be overridden via boot
|
||||
parameter "kfence.sample_interval".
|
||||
|
||||
Set this to 0 to disable KFENCE by default, in which case only
|
||||
setting "kfence.sample_interval" to a non-zero value enables KFENCE.
|
||||
|
||||
config KFENCE_NUM_OBJECTS
|
||||
int "Number of guarded objects available"
|
||||
range 1 65535
|
||||
default 255
|
||||
help
|
||||
The number of guarded objects available. For each KFENCE object, 2
|
||||
pages are required; with one containing the object and two adjacent
|
||||
ones used as guard pages.
|
||||
|
||||
config KFENCE_STRESS_TEST_FAULTS
|
||||
int "Stress testing of fault handling and error reporting" if EXPERT
|
||||
default 0
|
||||
help
|
||||
The inverse probability with which to randomly protect KFENCE object
|
||||
pages, resulting in spurious use-after-frees. The main purpose of
|
||||
this option is to stress test KFENCE with concurrent error reports
|
||||
and allocations/frees. A value of 0 disables stress testing logic.
|
||||
|
||||
Only for KFENCE testing; set to 0 if you are not a KFENCE developer.
|
||||
|
||||
config KFENCE_KUNIT_TEST
|
||||
tristate "KFENCE integration test suite" if !KUNIT_ALL_TESTS
|
||||
default KUNIT_ALL_TESTS
|
||||
depends on TRACEPOINTS && KUNIT
|
||||
help
|
||||
Test suite for KFENCE, testing various error detection scenarios with
|
||||
various allocation types, and checking that reports are correctly
|
||||
output to console.
|
||||
|
||||
Say Y here if you want the test to be built into the kernel and run
|
||||
during boot; say M if you want the test to build as a module; say N
|
||||
if you are unsure.
|
||||
|
||||
endif # KFENCE
|
@ -112,23 +112,6 @@ config UBSAN_UNREACHABLE
|
||||
This option enables -fsanitize=unreachable which checks for control
|
||||
flow reaching an expected-to-be-unreachable position.
|
||||
|
||||
config UBSAN_SIGNED_OVERFLOW
|
||||
bool "Perform checking for signed arithmetic overflow"
|
||||
default UBSAN
|
||||
depends on $(cc-option,-fsanitize=signed-integer-overflow)
|
||||
help
|
||||
This option enables -fsanitize=signed-integer-overflow which checks
|
||||
for overflow of any arithmetic operations with signed integers.
|
||||
|
||||
config UBSAN_UNSIGNED_OVERFLOW
|
||||
bool "Perform checking for unsigned arithmetic overflow"
|
||||
depends on $(cc-option,-fsanitize=unsigned-integer-overflow)
|
||||
depends on !X86_32 # avoid excessive stack usage on x86-32/clang
|
||||
help
|
||||
This option enables -fsanitize=unsigned-integer-overflow which checks
|
||||
for overflow of any arithmetic operations with unsigned integers. This
|
||||
currently causes x86 to fail to boot.
|
||||
|
||||
config UBSAN_OBJECT_SIZE
|
||||
bool "Perform checking for accesses beyond the end of objects"
|
||||
default UBSAN
|
||||
|
@ -228,7 +228,6 @@ char *next_arg(char *args, char **param, char **val)
|
||||
{
|
||||
unsigned int i, equals = 0;
|
||||
int in_quote = 0, quoted = 0;
|
||||
char *next;
|
||||
|
||||
if (*args == '"') {
|
||||
args++;
|
||||
@ -266,10 +265,10 @@ char *next_arg(char *args, char **param, char **val)
|
||||
|
||||
if (args[i]) {
|
||||
args[i] = '\0';
|
||||
next = args + i + 1;
|
||||
args += i + 1;
|
||||
} else
|
||||
next = args + i;
|
||||
args += i;
|
||||
|
||||
/* Chew up trailing spaces. */
|
||||
return skip_spaces(next);
|
||||
return skip_spaces(args);
|
||||
}
|
||||
|
@ -81,7 +81,8 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
|
||||
* users set the same bit, one user will return remain bits, otherwise
|
||||
* return 0.
|
||||
*/
|
||||
static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
|
||||
static unsigned long
|
||||
bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
|
||||
{
|
||||
unsigned long *p = map + BIT_WORD(start);
|
||||
const unsigned long size = start + nr;
|
||||
|
@ -31,6 +31,7 @@
|
||||
#include <linux/stackdepot.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/memblock.h>
|
||||
|
||||
#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
|
||||
|
||||
@ -141,14 +142,38 @@ static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
|
||||
return stack;
|
||||
}
|
||||
|
||||
#define STACK_HASH_ORDER 20
|
||||
#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
|
||||
#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
|
||||
#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
|
||||
#define STACK_HASH_SEED 0x9747b28c
|
||||
|
||||
static struct stack_record *stack_table[STACK_HASH_SIZE] = {
|
||||
[0 ... STACK_HASH_SIZE - 1] = NULL
|
||||
};
|
||||
static bool stack_depot_disable;
|
||||
static struct stack_record **stack_table;
|
||||
|
||||
static int __init is_stack_depot_disabled(char *str)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = kstrtobool(str, &stack_depot_disable);
|
||||
if (!ret && stack_depot_disable) {
|
||||
pr_info("Stack Depot is disabled\n");
|
||||
stack_table = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
early_param("stack_depot_disable", is_stack_depot_disabled);
|
||||
|
||||
int __init stack_depot_init(void)
|
||||
{
|
||||
if (!stack_depot_disable) {
|
||||
size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
|
||||
int i;
|
||||
|
||||
stack_table = memblock_alloc(size, size);
|
||||
for (i = 0; i < STACK_HASH_SIZE; i++)
|
||||
stack_table[i] = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Calculate hash for a stack */
|
||||
static inline u32 hash_stack(unsigned long *entries, unsigned int size)
|
||||
@ -242,7 +267,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
|
||||
unsigned long flags;
|
||||
u32 hash;
|
||||
|
||||
if (unlikely(nr_entries == 0))
|
||||
if (unlikely(nr_entries == 0) || stack_depot_disable)
|
||||
goto fast_exit;
|
||||
|
||||
hash = hash_stack(entries, nr_entries);
|
||||
|
111
lib/test_kasan.c
111
lib/test_kasan.c
@ -252,11 +252,14 @@ static void kmalloc_large_oob_right(struct kunit *test)
|
||||
kfree(ptr);
|
||||
}
|
||||
|
||||
static void kmalloc_oob_krealloc_more(struct kunit *test)
|
||||
static void krealloc_more_oob_helper(struct kunit *test,
|
||||
size_t size1, size_t size2)
|
||||
{
|
||||
char *ptr1, *ptr2;
|
||||
size_t size1 = 17;
|
||||
size_t size2 = 19;
|
||||
size_t middle;
|
||||
|
||||
KUNIT_ASSERT_LT(test, size1, size2);
|
||||
middle = size1 + (size2 - size1) / 2;
|
||||
|
||||
ptr1 = kmalloc(size1, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
|
||||
@ -264,15 +267,31 @@ static void kmalloc_oob_krealloc_more(struct kunit *test)
|
||||
ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
|
||||
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
|
||||
/* All offsets up to size2 must be accessible. */
|
||||
ptr2[size1 - 1] = 'x';
|
||||
ptr2[size1] = 'x';
|
||||
ptr2[middle] = 'x';
|
||||
ptr2[size2 - 1] = 'x';
|
||||
|
||||
/* Generic mode is precise, so unaligned size2 must be inaccessible. */
|
||||
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
|
||||
|
||||
/* For all modes first aligned offset after size2 must be inaccessible. */
|
||||
KUNIT_EXPECT_KASAN_FAIL(test,
|
||||
ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
|
||||
|
||||
kfree(ptr2);
|
||||
}
|
||||
|
||||
static void kmalloc_oob_krealloc_less(struct kunit *test)
|
||||
static void krealloc_less_oob_helper(struct kunit *test,
|
||||
size_t size1, size_t size2)
|
||||
{
|
||||
char *ptr1, *ptr2;
|
||||
size_t size1 = 17;
|
||||
size_t size2 = 15;
|
||||
size_t middle;
|
||||
|
||||
KUNIT_ASSERT_LT(test, size2, size1);
|
||||
middle = size2 + (size1 - size2) / 2;
|
||||
|
||||
ptr1 = kmalloc(size1, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
|
||||
@ -280,10 +299,79 @@ static void kmalloc_oob_krealloc_less(struct kunit *test)
|
||||
ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
|
||||
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
|
||||
/* Must be accessible for all modes. */
|
||||
ptr2[size2 - 1] = 'x';
|
||||
|
||||
/* Generic mode is precise, so unaligned size2 must be inaccessible. */
|
||||
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
|
||||
|
||||
/* For all modes first aligned offset after size2 must be inaccessible. */
|
||||
KUNIT_EXPECT_KASAN_FAIL(test,
|
||||
ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
|
||||
|
||||
/*
|
||||
* For all modes all size2, middle, and size1 should land in separate
|
||||
* granules and thus the latter two offsets should be inaccessible.
|
||||
*/
|
||||
KUNIT_EXPECT_LE(test, round_up(size2, KASAN_GRANULE_SIZE),
|
||||
round_down(middle, KASAN_GRANULE_SIZE));
|
||||
KUNIT_EXPECT_LE(test, round_up(middle, KASAN_GRANULE_SIZE),
|
||||
round_down(size1, KASAN_GRANULE_SIZE));
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2[middle] = 'x');
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1 - 1] = 'x');
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1] = 'x');
|
||||
|
||||
kfree(ptr2);
|
||||
}
|
||||
|
||||
static void krealloc_more_oob(struct kunit *test)
|
||||
{
|
||||
krealloc_more_oob_helper(test, 201, 235);
|
||||
}
|
||||
|
||||
static void krealloc_less_oob(struct kunit *test)
|
||||
{
|
||||
krealloc_less_oob_helper(test, 235, 201);
|
||||
}
|
||||
|
||||
static void krealloc_pagealloc_more_oob(struct kunit *test)
|
||||
{
|
||||
/* page_alloc fallback in only implemented for SLUB. */
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
|
||||
|
||||
krealloc_more_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 201,
|
||||
KMALLOC_MAX_CACHE_SIZE + 235);
|
||||
}
|
||||
|
||||
static void krealloc_pagealloc_less_oob(struct kunit *test)
|
||||
{
|
||||
/* page_alloc fallback in only implemented for SLUB. */
|
||||
KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
|
||||
|
||||
krealloc_less_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 235,
|
||||
KMALLOC_MAX_CACHE_SIZE + 201);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that krealloc() detects a use-after-free, returns NULL,
|
||||
* and doesn't unpoison the freed object.
|
||||
*/
|
||||
static void krealloc_uaf(struct kunit *test)
|
||||
{
|
||||
char *ptr1, *ptr2;
|
||||
int size1 = 201;
|
||||
int size2 = 235;
|
||||
|
||||
ptr1 = kmalloc(size1, GFP_KERNEL);
|
||||
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
|
||||
kfree(ptr1);
|
||||
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, ptr2 = krealloc(ptr1, size2, GFP_KERNEL));
|
||||
KUNIT_ASSERT_PTR_EQ(test, (void *)ptr2, NULL);
|
||||
KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)ptr1);
|
||||
}
|
||||
|
||||
static void kmalloc_oob_16(struct kunit *test)
|
||||
{
|
||||
struct {
|
||||
@ -977,8 +1065,11 @@ static struct kunit_case kasan_kunit_test_cases[] = {
|
||||
KUNIT_CASE(pagealloc_oob_right),
|
||||
KUNIT_CASE(pagealloc_uaf),
|
||||
KUNIT_CASE(kmalloc_large_oob_right),
|
||||
KUNIT_CASE(kmalloc_oob_krealloc_more),
|
||||
KUNIT_CASE(kmalloc_oob_krealloc_less),
|
||||
KUNIT_CASE(krealloc_more_oob),
|
||||
KUNIT_CASE(krealloc_less_oob),
|
||||
KUNIT_CASE(krealloc_pagealloc_more_oob),
|
||||
KUNIT_CASE(krealloc_pagealloc_less_oob),
|
||||
KUNIT_CASE(krealloc_uaf),
|
||||
KUNIT_CASE(kmalloc_oob_16),
|
||||
KUNIT_CASE(kmalloc_uaf_16),
|
||||
KUNIT_CASE(kmalloc_oob_in_memset),
|
||||
|
@ -11,51 +11,6 @@ typedef void(*test_ubsan_fp)(void);
|
||||
#config, IS_ENABLED(config) ? "y" : "n"); \
|
||||
} while (0)
|
||||
|
||||
static void test_ubsan_add_overflow(void)
|
||||
{
|
||||
volatile int val = INT_MAX;
|
||||
volatile unsigned int uval = UINT_MAX;
|
||||
|
||||
UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
|
||||
val += 2;
|
||||
|
||||
UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
|
||||
uval += 2;
|
||||
}
|
||||
|
||||
static void test_ubsan_sub_overflow(void)
|
||||
{
|
||||
volatile int val = INT_MIN;
|
||||
volatile unsigned int uval = 0;
|
||||
volatile int val2 = 2;
|
||||
|
||||
UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
|
||||
val -= val2;
|
||||
|
||||
UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
|
||||
uval -= val2;
|
||||
}
|
||||
|
||||
static void test_ubsan_mul_overflow(void)
|
||||
{
|
||||
volatile int val = INT_MAX / 2;
|
||||
volatile unsigned int uval = UINT_MAX / 2;
|
||||
|
||||
UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
|
||||
val *= 3;
|
||||
|
||||
UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
|
||||
uval *= 3;
|
||||
}
|
||||
|
||||
static void test_ubsan_negate_overflow(void)
|
||||
{
|
||||
volatile int val = INT_MIN;
|
||||
|
||||
UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
|
||||
val = -val;
|
||||
}
|
||||
|
||||
static void test_ubsan_divrem_overflow(void)
|
||||
{
|
||||
volatile int val = 16;
|
||||
@ -155,10 +110,6 @@ static void test_ubsan_object_size_mismatch(void)
|
||||
}
|
||||
|
||||
static const test_ubsan_fp test_ubsan_array[] = {
|
||||
test_ubsan_add_overflow,
|
||||
test_ubsan_sub_overflow,
|
||||
test_ubsan_mul_overflow,
|
||||
test_ubsan_negate_overflow,
|
||||
test_ubsan_shift_out_of_bounds,
|
||||
test_ubsan_out_of_bounds,
|
||||
test_ubsan_load_invalid_value,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user