Merge tag 'v5.18' into next
Sync up with mainline to get updates to OMAP4 keypad driver and other upstream goodies.
This commit is contained in:
9
.mailmap
9
.mailmap
@@ -45,6 +45,7 @@ Andrey Konovalov <andreyknvl@gmail.com> <andreyknvl@google.com>
|
||||
Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
|
||||
Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
|
||||
Andrzej Hajda <andrzej.hajda@intel.com> <a.hajda@samsung.com>
|
||||
André Almeida <andrealmeid@igalia.com> <andrealmeid@collabora.com>
|
||||
Andy Adamson <andros@citi.umich.edu>
|
||||
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
|
||||
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
|
||||
@@ -71,6 +72,7 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
|
||||
Brian Avery <b.avery@hp.com>
|
||||
Brian King <brking@us.ibm.com>
|
||||
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
||||
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
|
||||
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
|
||||
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
|
||||
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
|
||||
@@ -203,6 +205,7 @@ Juha Yrjola <at solidboot.com>
|
||||
Juha Yrjola <juha.yrjola@nokia.com>
|
||||
Juha Yrjola <juha.yrjola@solidboot.com>
|
||||
Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
|
||||
Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
|
||||
Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
|
||||
Kay Sievers <kay.sievers@vrfy.org>
|
||||
Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
|
||||
@@ -212,6 +215,7 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
||||
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
||||
Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||
Kirill Tkhai <kirill.tkhai@openvz.org> <ktkhai@virtuozzo.com>
|
||||
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
|
||||
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
|
||||
Koushik <raghavendra.koushik@neterion.com>
|
||||
@@ -247,6 +251,7 @@ Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@ginzinger.com>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@puri.sm>
|
||||
Martin Kepplinger <martink@posteo.de> <martin.kepplinger@theobroma-systems.com>
|
||||
Martyna Szapar-Mudlaw <martyna.szapar-mudlaw@linux.intel.com> <martyna.szapar-mudlaw@intel.com>
|
||||
Mathieu Othacehe <m.othacehe@gmail.com>
|
||||
Matthew Wilcox <willy@infradead.org> <matthew.r.wilcox@intel.com>
|
||||
Matthew Wilcox <willy@infradead.org> <matthew@wil.cx>
|
||||
@@ -389,6 +394,10 @@ Uwe Kleine-König <ukleinek@strlen.de>
|
||||
Uwe Kleine-König <ukl@pengutronix.de>
|
||||
Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
|
||||
Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
|
||||
Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
|
||||
Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
|
||||
Vinod Koul <vkoul@kernel.org> <vkoul@infradead.org>
|
||||
|
||||
10
Documentation/ABI/obsolete/procfs-i8k
Normal file
10
Documentation/ABI/obsolete/procfs-i8k
Normal file
@@ -0,0 +1,10 @@
|
||||
What: /proc/i8k
|
||||
Date: November 2001
|
||||
KernelVersion: 2.4.14
|
||||
Contact: Pali Rohár <pali@kernel.org>
|
||||
Description: Legacy interface for getting/setting sensor information like
|
||||
fan speed, temperature, serial number, hotkey status etc
|
||||
on Dell Laptops.
|
||||
Since the driver is now using the standard hwmon sysfs interface,
|
||||
the procfs interface is deprecated.
|
||||
Users: https://github.com/vitorafsr/i8kutils
|
||||
37
Documentation/ABI/removed/sysfs-mce
Normal file
37
Documentation/ABI/removed/sysfs-mce
Normal file
@@ -0,0 +1,37 @@
|
||||
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
||||
Contact: Borislav Petkov <bp@suse.de>
|
||||
Date: Dec, 2021
|
||||
Description:
|
||||
Unused and obsolete after the advent of recoverable machine
|
||||
checks (see last sentence below) and those are present since
|
||||
2010 (Nehalem).
|
||||
|
||||
Original description:
|
||||
|
||||
The entries appear for each CPU, but they are truly shared
|
||||
between all CPUs.
|
||||
|
||||
Tolerance level. When a machine check exception occurs for a
|
||||
non corrected machine check the kernel can take different
|
||||
actions.
|
||||
|
||||
Since machine check exceptions can happen any time it is
|
||||
sometimes risky for the kernel to kill a process because it
|
||||
defies normal kernel locking rules. The tolerance level
|
||||
configures how hard the kernel tries to recover even at some
|
||||
risk of deadlock. Higher tolerant values trade potentially
|
||||
better uptime with the risk of a crash or even corruption
|
||||
(for tolerant >= 3).
|
||||
|
||||
== ===========================================================
|
||||
0 always panic on uncorrected errors, log corrected errors
|
||||
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
||||
2 SIGBUS or log uncorrected errors, log corrected errors
|
||||
3 never panic or SIGBUS, log all errors (for testing only)
|
||||
== ===========================================================
|
||||
|
||||
Default: 1
|
||||
|
||||
Note this only makes a difference if the CPU allows recovery
|
||||
from a machine check exception. Current x86 CPUs generally
|
||||
do not.
|
||||
@@ -155,6 +155,55 @@ Description:
|
||||
last zone of the device which may be smaller.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
The presence of this subdirectory of /sys/block/<disk>/queue/
|
||||
indicates that the device supports inline encryption. This
|
||||
subdirectory contains files which describe the inline encryption
|
||||
capabilities of the device. For more information about inline
|
||||
encryption, refer to Documentation/block/inline-encryption.rst.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/max_dun_bits
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This file shows the maximum length, in bits, of data unit
|
||||
numbers accepted by the device in inline encryption requests.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/modes/<mode>
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] For each crypto mode (i.e., encryption/decryption
|
||||
algorithm) the device supports with inline encryption, a file
|
||||
will exist at this location. It will contain a hexadecimal
|
||||
number that is a bitmask of the supported data unit sizes, in
|
||||
bytes, for that crypto mode.
|
||||
|
||||
Currently, the crypto modes that may be supported are:
|
||||
|
||||
* AES-256-XTS
|
||||
* AES-128-CBC-ESSIV
|
||||
* Adiantum
|
||||
|
||||
For example, if a device supports AES-256-XTS inline encryption
|
||||
with data unit sizes of 512 and 4096 bytes, the file
|
||||
/sys/block/<disk>/queue/crypto/modes/AES-256-XTS will exist and
|
||||
will contain "0x1200".
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/crypto/num_keyslots
|
||||
Date: February 2022
|
||||
Contact: linux-block@vger.kernel.org
|
||||
Description:
|
||||
[RO] This file shows the number of keyslots the device has for
|
||||
use with inline encryption.
|
||||
|
||||
|
||||
What: /sys/block/<disk>/queue/dax
|
||||
Date: June 2016
|
||||
Contact: linux-block@vger.kernel.org
|
||||
|
||||
@@ -86,6 +86,10 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus
|
||||
Description: internal kernel map of CPUs within the same die.
|
||||
Values: hexadecimal bitmask.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/topology/ppin
|
||||
Description: per-socket protected processor inventory number
|
||||
Values: hexadecimal.
|
||||
|
||||
What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list
|
||||
Description: human-readable list of CPUs within the same die.
|
||||
The format is like 0-3, 8-11, 14,17.
|
||||
|
||||
@@ -113,3 +113,144 @@ Description:
|
||||
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/health_status
|
||||
|
||||
Users: Xilinx
|
||||
|
||||
What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "Ronak Jain" <ronak.jain@xilinx.com>
|
||||
Description:
|
||||
This sysfs interface allows user to configure features at
|
||||
runtime. The user can enable or disable features running at
|
||||
firmware as well as the user can configure the parameters of
|
||||
the features at runtime. The supported features are over
|
||||
temperature and external watchdog. Here, the external watchdog
|
||||
is completely different than the /dev/watchdog as the external
|
||||
watchdog is running on the firmware and it is used to monitor
|
||||
the health of firmware not APU(Linux). Also, the external
|
||||
watchdog is interfaced outside of the zynqmp soc.
|
||||
|
||||
The supported config ids are for the feature configuration is,
|
||||
1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
|
||||
disable the over temperature feature.
|
||||
2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
|
||||
over temperature limit in Degree Celsius.
|
||||
3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
|
||||
the external watchdog feature.
|
||||
4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
|
||||
external watchdog feature.
|
||||
|
||||
Usage:
|
||||
|
||||
Select over temperature config ID to enable/disable feature
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check over temperature config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 1.
|
||||
|
||||
Select over temperature config ID to configure OT limit
|
||||
# echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check over temperature config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 2.
|
||||
|
||||
Select external watchdog config ID to enable/disable feature
|
||||
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check external watchdog config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 3.
|
||||
|
||||
Select external watchdog config ID to configure time interval
|
||||
# echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
|
||||
Check external watchdog config ID is selected or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
The expected result is 4.
|
||||
|
||||
Users: Xilinx
|
||||
|
||||
What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "Ronak Jain" <ronak.jain@xilinx.com>
|
||||
Description:
|
||||
This sysfs interface allows to configure features at runtime.
|
||||
The user can enable or disable features running at firmware.
|
||||
Also, the user can configure the parameters of the features
|
||||
at runtime. The supported features are over temperature and
|
||||
external watchdog. Here, the external watchdog is completely
|
||||
different than the /dev/watchdog as the external watchdog is
|
||||
running on the firmware and it is used to monitor the health
|
||||
of firmware not APU(Linux). Also, the external watchdog is
|
||||
interfaced outside of the zynqmp soc.
|
||||
|
||||
By default the features are disabled in the firmware. The user
|
||||
can enable features by querying appropriate config id of the
|
||||
features.
|
||||
|
||||
The default limit for the over temperature is 90 Degree Celsius.
|
||||
The default timer interval for the external watchdog is 570ms.
|
||||
|
||||
The supported config ids are for the feature configuration is,
|
||||
1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
|
||||
disable the over temperature feature.
|
||||
2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
|
||||
over temperature limit in Degree Celsius.
|
||||
3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
|
||||
the external watchdog feature.
|
||||
4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
|
||||
external watchdog feature.
|
||||
|
||||
Usage:
|
||||
|
||||
Enable over temperature feature
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the over temperature feature is enabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 1.
|
||||
|
||||
Disable over temperature feature
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the over temperature feature is disabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 0.
|
||||
|
||||
Configure over temperature limit to 50 Degree Celsius
|
||||
# echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 50 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the over temperature limit is configured or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 50.
|
||||
|
||||
Enable external watchdog feature
|
||||
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the external watchdog feature is enabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 1.
|
||||
|
||||
Disable external watchdog feature
|
||||
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the external watchdog feature is disabled or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 0.
|
||||
|
||||
Configure external watchdog timer interval to 500ms
|
||||
# echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||
# echo 500 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
|
||||
Check whether the external watchdog timer interval is configured or not
|
||||
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||
The expected result is 500.
|
||||
|
||||
Users: Xilinx
|
||||
|
||||
@@ -6,7 +6,7 @@ Description:
|
||||
|
||||
===================== =======================================
|
||||
c_chmask capture channel mask
|
||||
c_srate capture sampling rate
|
||||
c_srate list of capture sampling rates (comma-separated)
|
||||
c_ssize capture sample size (bytes)
|
||||
c_mute_present capture mute control enable
|
||||
c_volume_present capture volume control enable
|
||||
@@ -17,7 +17,7 @@ Description:
|
||||
c_volume_res capture volume control resolution
|
||||
(in 1/256 dB)
|
||||
p_chmask playback channel mask
|
||||
p_srate playback sampling rate
|
||||
p_srate list of playback sampling rates (comma-separated)
|
||||
p_ssize playback sample size (bytes)
|
||||
p_mute_present playback mute control enable
|
||||
p_volume_present playback volume control enable
|
||||
@@ -29,4 +29,5 @@ Description:
|
||||
(in 1/256 dB)
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
===================== =======================================
|
||||
|
||||
@@ -6,8 +6,9 @@ Description:
|
||||
|
||||
===================== =======================================
|
||||
c_chmask capture channel mask
|
||||
c_srate capture sampling rate
|
||||
c_srate list of capture sampling rates (comma-separated)
|
||||
c_ssize capture sample size (bytes)
|
||||
c_hs_bint capture bInterval for HS/SS (1-4: fixed, 0: auto)
|
||||
c_sync capture synchronization type
|
||||
(async/adaptive)
|
||||
c_mute_present capture mute control enable
|
||||
@@ -20,8 +21,9 @@ Description:
|
||||
(in 1/256 dB)
|
||||
fb_max maximum extra bandwidth in async mode
|
||||
p_chmask playback channel mask
|
||||
p_srate playback sampling rate
|
||||
p_srate list of playback sampling rates (comma-separated)
|
||||
p_ssize playback sample size (bytes)
|
||||
p_hs_bint playback bInterval for HS/SS (1-4: fixed, 0: auto)
|
||||
p_mute_present playback mute control enable
|
||||
p_volume_present playback volume control enable
|
||||
p_volume_min playback volume control min value
|
||||
@@ -32,4 +34,5 @@ Description:
|
||||
(in 1/256 dB)
|
||||
req_number the number of pre-allocated requests
|
||||
for both capture and playback
|
||||
function_name name of the interface
|
||||
===================== =======================================
|
||||
|
||||
@@ -12,24 +12,7 @@ What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
|
||||
Date: May 2020
|
||||
KernelVersion: 5.8
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Allow the root user to disable/enable in runtime the clock
|
||||
gating mechanism in Gaudi. Due to how Gaudi is built, the
|
||||
clock gating needs to be disabled in order to access the
|
||||
registers of the TPC and MME engines. This is sometimes needed
|
||||
during debug by the user and hence the user needs this option.
|
||||
The user can supply a bitmask value, each bit represents
|
||||
a different engine to disable/enable its clock gating feature.
|
||||
The bitmask is composed of 20 bits:
|
||||
|
||||
======= ============
|
||||
0 - 7 DMA channels
|
||||
8 - 11 MME engines
|
||||
12 - 19 TPC engines
|
||||
======= ============
|
||||
|
||||
The bit's location of a specific engine can be determined
|
||||
using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
|
||||
are defined in uapi habanalabs.h file in enum gaudi_engine_id
|
||||
Description: This setting is now deprecated as clock gating is handled solely by the f/w
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
|
||||
Date: Jan 2019
|
||||
@@ -239,6 +222,7 @@ KernelVersion: 5.6
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Sets the stop-on_error option for the device engines. Value of
|
||||
"0" is for disable, otherwise enable.
|
||||
Relevant only for GOYA and GAUDI.
|
||||
|
||||
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
|
||||
Date: Sep 2021
|
||||
|
||||
@@ -27,6 +27,16 @@ Description: One HPRE controller has one PF and multiple VFs, each function
|
||||
has a QM. Select the QM which below qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/alg_qos
|
||||
Date: Jun 2021
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: The <bdf> is related the function for PF and VF.
|
||||
HPRE driver supports to configure each function's QoS, the driver
|
||||
supports to write <bdf> value to alg_qos in the host. Such as
|
||||
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||
|
||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
|
||||
Date: Sep 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
|
||||
@@ -14,6 +14,16 @@ Description: One SEC controller has one PF and multiple VFs, each function
|
||||
qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/alg_qos
|
||||
Date: Jun 2021
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: The <bdf> is related the function for PF and VF.
|
||||
SEC driver supports to configure each function's QoS, the driver
|
||||
supports to write <bdf> value to alg_qos in the host. Such as
|
||||
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||
|
||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
|
||||
Date: Oct 2019
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
|
||||
@@ -26,6 +26,16 @@ Description: One ZIP controller has one PF and multiple VFs, each function
|
||||
has a QM. Select the QM which below qm refers to.
|
||||
Only available for PF.
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/alg_qos
|
||||
Date: Jun 2021
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
Description: The <bdf> is related the function for PF and VF.
|
||||
ZIP driver supports to configure each function's QoS, the driver
|
||||
supports to write <bdf> value to alg_qos in the host. Such as
|
||||
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||
|
||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
|
||||
Date: Nov 2018
|
||||
Contact: linux-crypto@vger.kernel.org
|
||||
|
||||
@@ -1,3 +1,12 @@
|
||||
What: /sys/bus/cxl/flush
|
||||
Date: Januarry, 2022
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
(WO) If userspace manually unbinds a port the kernel schedules
|
||||
all descendant memdevs for unbind. Writing '1' to this attribute
|
||||
flushes that work.
|
||||
|
||||
What: /sys/bus/cxl/devices/memX/firmware_version
|
||||
Date: December, 2020
|
||||
KernelVersion: v5.12
|
||||
@@ -25,6 +34,24 @@ Description:
|
||||
identically named field in the Identify Memory Device Output
|
||||
Payload in the CXL-2.0 specification.
|
||||
|
||||
What: /sys/bus/cxl/devices/memX/serial
|
||||
Date: January, 2022
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
(RO) 64-bit serial number per the PCIe Device Serial Number
|
||||
capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
|
||||
Memory Device PCIe Capabilities and Extended Capabilities.
|
||||
|
||||
What: /sys/bus/cxl/devices/memX/numa_node
|
||||
Date: January, 2022
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
(RO) If NUMA is enabled and the platform has affinitized the
|
||||
host PCI device for this memory device, emit the CPU node
|
||||
affinity for this device.
|
||||
|
||||
What: /sys/bus/cxl/devices/*/devtype
|
||||
Date: June, 2021
|
||||
KernelVersion: v5.14
|
||||
@@ -34,6 +61,15 @@ Description:
|
||||
the same value communicated in the DEVTYPE environment variable
|
||||
for uevents for devices on the "cxl" bus.
|
||||
|
||||
What: /sys/bus/cxl/devices/*/modalias
|
||||
Date: December, 2021
|
||||
KernelVersion: v5.18
|
||||
Contact: linux-cxl@vger.kernel.org
|
||||
Description:
|
||||
CXL device objects export the modalias attribute which mirrors
|
||||
the same value communicated in the MODALIAS environment variable
|
||||
for uevents for devices on the "cxl" bus.
|
||||
|
||||
What: /sys/bus/cxl/devices/portX/uport
|
||||
Date: June, 2021
|
||||
KernelVersion: v5.14
|
||||
|
||||
@@ -476,6 +476,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
|
||||
@@ -1213,6 +1214,32 @@ Description:
|
||||
number or direction is not specified, applies to all channels of
|
||||
this type.
|
||||
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_en
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Similar to in_accel_mag[_y][_rising|_falling]_en, but the event
|
||||
value is relative to a reference magnitude. The reference magnitude
|
||||
includes the graviational acceleration.
|
||||
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_value
|
||||
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_value
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
The value to which the reference magnitude of the channel is
|
||||
compared. If the axis is not specified, it applies to all channels
|
||||
of this type.
|
||||
|
||||
What: /sys/.../events/in_steps_change_en
|
||||
KernelVersion: 4.0
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
@@ -1252,6 +1279,10 @@ Description:
|
||||
Actually start the buffer capture up. Will start trigger
|
||||
if first device and appropriate.
|
||||
|
||||
Note that it might be impossible to configure other attributes,
|
||||
(e.g.: events, scale, sampling rate) if they impact the currently
|
||||
active buffer capture session.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/bufferY
|
||||
KernelVersion: 5.11
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
|
||||
13
Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
Normal file
13
Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
Normal file
@@ -0,0 +1,13 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_en
|
||||
KernelVersion: 5.14
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Used to enable an output for balancing cells for time
|
||||
controlled via in_voltage_Y-voltageZ_balance_switch_timer.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_timer
|
||||
KernelVersion: 5.14
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Time in seconds for which balance switch will be turned on.
|
||||
Multiple of 71.5 seconds.
|
||||
86
Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
Normal file
86
Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
Normal file
@@ -0,0 +1,86 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Dither enable. Write 1 to enable dither or 0 to disable it. This is useful
|
||||
for changing the dither parameters. They way it should be done is:
|
||||
|
||||
- disable dither operation;
|
||||
- change dither parameters (eg: frequency, phase...);
|
||||
- enabled dither operation
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
This raw, unscaled value refers to the dither signal amplitude.
|
||||
The same scale as in out_voltageY_raw applies. However, the
|
||||
offset might be different as it's always 0 for this attribute.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw_available
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Available range for dither raw amplitude values.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_offset
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Offset applied to out_voltageY_dither_raw. Read only attribute
|
||||
always set to 0.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Sets the dither signal frequency. Units are in Hz.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency_available
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Returns the available values for the dither frequency.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Sets the dither signal phase. Units are in Radians.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase_available
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Returns the available values for the dither phase.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
|
||||
useful when one wants to change the DAC output codes. The way it should
|
||||
be done is:
|
||||
|
||||
- disable toggle operation;
|
||||
- change out_voltageY_raw0 and out_voltageY_raw1;
|
||||
- enable toggle operation.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
It has the same meaning as out_voltageY_raw. This attribute is
|
||||
specific to toggle enabled channels and refers to the DAC output
|
||||
code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
|
||||
as in out_voltageY_raw applies.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Performs a SW toggle. This attribute is specific to toggle
|
||||
enabled channels and allows to toggle between out_voltageY_raw0
|
||||
and out_voltageY_raw1 through software. Writing 0 will select
|
||||
out_voltageY_raw0 while 1 selects out_voltageY_raw1.
|
||||
23
Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
Normal file
23
Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
Normal file
@@ -0,0 +1,23 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_coarse
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_I) with coarse steps.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_coarse
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_Q) with coarse steps.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_fine
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_I) with fine steps.
|
||||
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_fine
|
||||
KernelVersion: 5.18
|
||||
Contact: linux-iio@vger.kernel.org
|
||||
Description:
|
||||
Read/write value for the digital attenuator gain (IF_Q) with fine steps.
|
||||
28
Documentation/ABI/testing/sysfs-bus-iio-sx9324
Normal file
28
Documentation/ABI/testing/sysfs-bus-iio-sx9324
Normal file
@@ -0,0 +1,28 @@
|
||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity<id>_setup
|
||||
Date: November 2021
|
||||
KernelVersion: 5.17
|
||||
Contact: Gwendal Grignou <gwendal@chromium.org>
|
||||
Description:
|
||||
SX9324 has 3 inputs, CS0, CS1 and CS2. Hardware layout
|
||||
defines if the input is
|
||||
+ not connected (HZ),
|
||||
+ grounded (GD),
|
||||
+ connected to an antenna where it can act as a base
|
||||
(DS - data shield), or measured input (MI).
|
||||
|
||||
The sensor rotates measurement across 4 phases
|
||||
(PH0, PH1, PH2, PH3), where the inputs are configured
|
||||
and then measured.
|
||||
|
||||
By default, during the first phase, [PH0], CS0 is measured,
|
||||
while CS1 and CS2 are used as shields.
|
||||
`cat in_proximity0_setup` returns "MI,DS,DS".
|
||||
[PH1], CS1 is measured, CS0 and CS2 are shield:
|
||||
`cat in_proximity1_setup` returns "DS,MI,DS".
|
||||
[PH2], CS2 is measured, CS0 and CS1 are shield:
|
||||
`cat in_proximity1_setup` returns "DS,DS,MI".
|
||||
[PH3], CS1 and CS2 are measured (combo mode):
|
||||
`cat in_proximity1_setup` returns "DS,MI,MI".
|
||||
|
||||
Note, these are the chip default. Hardware layout will most
|
||||
likely dictate different output. The entry is read-only.
|
||||
@@ -6,3 +6,38 @@ Description:
|
||||
|
||||
The libnvdimm sub-system implements a common sysfs interface for
|
||||
platform nvdimm resources. See Documentation/driver-api/nvdimm/.
|
||||
|
||||
What: /sys/bus/event_source/devices/nmemX/format
|
||||
Date: February 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||
Description: (RO) Attribute group to describe the magic bits
|
||||
that go into perf_event_attr.config for a particular pmu.
|
||||
(See ABI/testing/sysfs-bus-event_source-devices-format).
|
||||
|
||||
Each attribute under this group defines a bit range of the
|
||||
perf_event_attr.config. Supported attribute is listed
|
||||
below::
|
||||
event = "config:0-4" - event ID
|
||||
|
||||
For example::
|
||||
ctl_res_cnt = "event=0x1"
|
||||
|
||||
What: /sys/bus/event_source/devices/nmemX/events
|
||||
Date: February 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||
Description: (RO) Attribute group to describe performance monitoring events
|
||||
for the nvdimm memory device. Each attribute in this group
|
||||
describes a single performance monitoring event supported by
|
||||
this nvdimm pmu. The name of the file is the name of the event.
|
||||
(See ABI/testing/sysfs-bus-event_source-devices-events). A
|
||||
listing of the events supported by a given nvdimm provider type
|
||||
can be found in Documentation/driver-api/nvdimm/$provider.
|
||||
|
||||
What: /sys/bus/event_source/devices/nmemX/cpumask
|
||||
Date: February 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||
Description: (RO) This sysfs file exposes the cpumask which is designated to
|
||||
to retrieve nvdimm pmu event counter data.
|
||||
|
||||
@@ -61,3 +61,15 @@ Description:
|
||||
* "CchRHCnt" : Cache Read Hit Count
|
||||
* "CchWHCnt" : Cache Write Hit Count
|
||||
* "FastWCnt" : Fast Write Count
|
||||
|
||||
What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject
|
||||
Date: Jan, 2022
|
||||
KernelVersion: v5.17
|
||||
Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
|
||||
Description:
|
||||
(RO) Reports the health bitmap inject bitmap that is applied to
|
||||
bitmap received from PowerVM via the H_SCM_HEALTH. This is used
|
||||
to forcibly set specific bits returned from Hcall. These is then
|
||||
used to simulate various health or shutdown states for an nvdimm
|
||||
and are set by user-space tools like ndctl by issuing a PAPR DSM.
|
||||
|
||||
|
||||
16
Documentation/ABI/testing/sysfs-bus-peci
Normal file
16
Documentation/ABI/testing/sysfs-bus-peci
Normal file
@@ -0,0 +1,16 @@
|
||||
What: /sys/bus/peci/rescan
|
||||
Date: July 2021
|
||||
KernelVersion: 5.18
|
||||
Contact: Iwona Winiarska <iwona.winiarska@intel.com>
|
||||
Description:
|
||||
Writing a non-zero value to this attribute will
|
||||
initiate scan for PECI devices on all PECI controllers
|
||||
in the system.
|
||||
|
||||
What: /sys/bus/peci/devices/<controller_id>-<device_addr>/remove
|
||||
Date: July 2021
|
||||
KernelVersion: 5.18
|
||||
Contact: Iwona Winiarska <iwona.winiarska@intel.com>
|
||||
Description:
|
||||
Writing a non-zero value to this attribute will
|
||||
remove the PECI device and any of its children.
|
||||
@@ -116,7 +116,7 @@ Description:
|
||||
<value>[ForceIf:<attribute>=<value>]
|
||||
<value>[ForceIfNot:<attribute>=<value>]
|
||||
|
||||
For example:
|
||||
For example::
|
||||
|
||||
LegacyOrom/dell_value_modifier has value:
|
||||
Disabled[ForceIf:SecureBoot=Enabled]
|
||||
@@ -212,7 +212,7 @@ Description:
|
||||
the next boot.
|
||||
|
||||
Lenovo specific class extensions
|
||||
------------------------------
|
||||
--------------------------------
|
||||
|
||||
On Lenovo systems the following additional settings are available:
|
||||
|
||||
@@ -246,6 +246,55 @@ Description:
|
||||
that is being referenced (e.g hdd0, hdd1 etc)
|
||||
This attribute defaults to device 0.
|
||||
|
||||
certificate, signature, save_signature:
|
||||
These attributes are used for certificate based authentication. This is
|
||||
used in conjunction with a signing server as an alternative to password
|
||||
based authentication.
|
||||
The user writes to the attribute(s) with a BASE64 encoded string obtained
|
||||
from the signing server.
|
||||
The attributes can be displayed to check the stored value.
|
||||
|
||||
Some usage examples:
|
||||
|
||||
Installing a certificate to enable feature::
|
||||
|
||||
echo "supervisor password" > authentication/Admin/current_password
|
||||
echo "signed certificate" > authentication/Admin/certificate
|
||||
|
||||
Updating the installed certificate::
|
||||
|
||||
echo "signature" > authentication/Admin/signature
|
||||
echo "signed certificate" > authentication/Admin/certificate
|
||||
|
||||
Removing the installed certificate::
|
||||
|
||||
echo "signature" > authentication/Admin/signature
|
||||
echo "" > authentication/Admin/certificate
|
||||
|
||||
Changing a BIOS setting::
|
||||
|
||||
echo "signature" > authentication/Admin/signature
|
||||
echo "save signature" > authentication/Admin/save_signature
|
||||
echo Enable > attribute/PasswordBeep/current_value
|
||||
|
||||
You cannot enable certificate authentication if a supervisor password
|
||||
has not been set.
|
||||
Clearing the certificate results in no bios-admin authentication method
|
||||
being configured allowing anyone to make changes.
|
||||
After any of these operations the system must reboot for the changes to
|
||||
take effect.
|
||||
|
||||
certificate_thumbprint:
|
||||
Read only attribute used to display the MD5, SHA1 and SHA256 thumbprints
|
||||
for the certificate installed in the BIOS.
|
||||
|
||||
certificate_to_password:
|
||||
Write only attribute used to switch from certificate based authentication
|
||||
back to password based.
|
||||
Usage::
|
||||
|
||||
echo "signature" > authentication/Admin/signature
|
||||
echo "password" > authentication/Admin/certificate_to_password
|
||||
|
||||
|
||||
What: /sys/class/firmware-attributes/*/attributes/pending_reboot
|
||||
@@ -300,7 +349,7 @@ Description:
|
||||
|
||||
# echo "factory" > /sys/class/firmware-attributes/*/device/attributes/reset_bios
|
||||
# cat /sys/class/firmware-attributes/*/device/attributes/reset_bios
|
||||
# builtinsafe lastknowngood [factory] custom
|
||||
builtinsafe lastknowngood [factory] custom
|
||||
|
||||
Note that any changes to this attribute requires a reboot
|
||||
for changes to take effect.
|
||||
|
||||
@@ -9,6 +9,14 @@ Description:
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/class/hwmon/hwmonX/label
|
||||
Description:
|
||||
A descriptive label that allows to uniquely identify a
|
||||
device within the system.
|
||||
The contents of the label are free-form.
|
||||
|
||||
RO
|
||||
|
||||
What: /sys/class/hwmon/hwmonX/update_interval
|
||||
Description:
|
||||
The interval at which the chip will update readings.
|
||||
|
||||
@@ -380,13 +380,17 @@ Description:
|
||||
algorithm to adjust the charge rate dynamically, without
|
||||
any user configuration required. "Custom" means that the charger
|
||||
uses the charge_control_* properties as configuration for some
|
||||
different algorithm.
|
||||
different algorithm. "Long Life" means the charger reduces its
|
||||
charging rate in order to prolong the battery health. "Bypass"
|
||||
means the charger bypasses the charging path around the
|
||||
integrated converter allowing for a "smart" wall adaptor to
|
||||
perform the power conversion externally.
|
||||
|
||||
Access: Read, Write
|
||||
|
||||
Valid values:
|
||||
"Unknown", "N/A", "Trickle", "Fast", "Standard",
|
||||
"Adaptive", "Custom"
|
||||
"Adaptive", "Custom", "Long Life", "Bypass"
|
||||
|
||||
What: /sys/class/power_supply/<supply_name>/charge_term_current
|
||||
Date: July 2014
|
||||
|
||||
@@ -73,6 +73,7 @@ What: /sys/devices/system/cpu/cpuX/topology/core_id
|
||||
/sys/devices/system/cpu/cpuX/topology/physical_package_id
|
||||
/sys/devices/system/cpu/cpuX/topology/thread_siblings
|
||||
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
|
||||
/sys/devices/system/cpu/cpuX/topology/ppin
|
||||
Date: December 2008
|
||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||
Description: CPU topology files that describe a logical CPU's relationship
|
||||
@@ -103,6 +104,11 @@ Description: CPU topology files that describe a logical CPU's relationship
|
||||
thread_siblings_list: human-readable list of cpuX's hardware
|
||||
threads within the same core as cpuX
|
||||
|
||||
ppin: human-readable Protected Processor Identification
|
||||
Number of the socket the cpu# belongs to. There should be
|
||||
one per physical_package_id. File is readable only to
|
||||
admin.
|
||||
|
||||
See Documentation/admin-guide/cputopology.rst for more information.
|
||||
|
||||
|
||||
@@ -662,6 +668,7 @@ Description: Preferred MTE tag checking mode
|
||||
|
||||
================ ==============================================
|
||||
"sync" Prefer synchronous mode
|
||||
"asymm" Prefer asymmetric mode
|
||||
"async" Prefer asynchronous mode
|
||||
================ ==============================================
|
||||
|
||||
|
||||
9
Documentation/ABI/testing/sysfs-driver-eud
Normal file
9
Documentation/ABI/testing/sysfs-driver-eud
Normal file
@@ -0,0 +1,9 @@
|
||||
What: /sys/bus/platform/drivers/eud/.../enable
|
||||
Date: February 2022
|
||||
Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
|
||||
Description:
|
||||
The Enable/Disable sysfs interface for Embedded
|
||||
USB Debugger(EUD). This enables and disables the
|
||||
EUD based on a 1 or a 0 value. By enabling EUD,
|
||||
the user is able to activate the mini-usb hub of
|
||||
EUD for debug and trace capabilities.
|
||||
@@ -69,6 +69,12 @@ KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Displays the device's version from the eFuse
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/fw_os_ver
|
||||
Date: Dec 2021
|
||||
KernelVersion: 5.18
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the firmware OS running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/hard_reset
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
@@ -115,7 +121,7 @@ What: /sys/class/habanalabs/hl<n>/infineon_ver
|
||||
Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's power supply F/W code
|
||||
Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/max_power
|
||||
Date: Jan 2019
|
||||
@@ -221,3 +227,9 @@ Date: Jan 2019
|
||||
KernelVersion: 5.1
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the u-boot running on the device's CPU
|
||||
|
||||
What: /sys/class/habanalabs/hl<n>/vrm_ver
|
||||
Date: Jan 2022
|
||||
KernelVersion: not yet upstreamed
|
||||
Contact: ogabbay@kernel.org
|
||||
Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
|
||||
|
||||
79
Documentation/ABI/testing/sysfs-driver-intel_sdsi
Normal file
79
Documentation/ABI/testing/sysfs-driver-intel_sdsi
Normal file
@@ -0,0 +1,79 @@
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
This directory contains interface files for accessing Intel
|
||||
Software Defined Silicon (SDSi) features on a CPU. X
|
||||
represents the socket instance (though not the socket ID).
|
||||
The socket ID is determined by reading the registers file
|
||||
and decoding it per the specification.
|
||||
|
||||
Some files communicate with SDSi hardware through a mailbox.
|
||||
Should the operation fail, one of the following error codes
|
||||
may be returned:
|
||||
|
||||
========== =====
|
||||
Error Code Cause
|
||||
========== =====
|
||||
EIO General mailbox failure. Log may indicate cause.
|
||||
EBUSY Mailbox is owned by another agent.
|
||||
EPERM SDSI capability is not enabled in hardware.
|
||||
EPROTO Failure in mailbox protocol detected by driver.
|
||||
See log for details.
|
||||
EOVERFLOW For provision commands, the size of the data
|
||||
exceeds what may be written.
|
||||
ESPIPE Seeking is not allowed.
|
||||
ETIMEDOUT Failure to complete mailbox transaction in time.
|
||||
========== =====
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/guid
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(RO) The GUID for the registers file. The GUID identifies
|
||||
the layout of the registers file in this directory.
|
||||
Information about the register layouts for a particular GUID
|
||||
is available at http://github.com/intel/intel-sdsi
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/registers
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(RO) Contains information needed by applications to provision
|
||||
a CPU and monitor status information. The layout of this file
|
||||
is determined by the GUID in this directory. Information about
|
||||
the layout for a particular GUID is available at
|
||||
http://github.com/intel/intel-sdsi
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_akc
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(WO) Used to write an Authentication Key Certificate (AKC) to
|
||||
the SDSi NVRAM for the CPU. The AKC is used to authenticate a
|
||||
Capability Activation Payload. Mailbox command.
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_cap
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(WO) Used to write a Capability Activation Payload (CAP) to the
|
||||
SDSi NVRAM for the CPU. CAPs are used to activate a given CPU
|
||||
feature. A CAP is validated by SDSi hardware using a previously
|
||||
provisioned AKC file. Upon successful authentication, the CPU
|
||||
configuration is updated. A cold reboot is required to fully
|
||||
activate the feature. Mailbox command.
|
||||
|
||||
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/state_certificate
|
||||
Date: Feb 2022
|
||||
KernelVersion: 5.18
|
||||
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||
Description:
|
||||
(RO) Used to read back the current State Certificate for the CPU
|
||||
from SDSi hardware. The State Certificate contains information
|
||||
about the current licenses on the CPU. Mailbox command.
|
||||
@@ -0,0 +1,29 @@
|
||||
What: /sys/firmware/papr/energy_scale_info
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: Directory hosting a set of platform attributes like
|
||||
energy/frequency on Linux running as a PAPR guest.
|
||||
|
||||
Each file in a directory contains a platform
|
||||
attribute hierarchy pertaining to performance/
|
||||
energy-savings mode and processor frequency.
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: Energy, frequency attributes directory for POWERVM servers
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>/desc
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: String description of the energy attribute of <id>
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>/value
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: Numeric value of the energy attribute of <id>
|
||||
|
||||
What: /sys/firmware/papr/energy_scale_info/<id>/value_desc
|
||||
Date: February 2022
|
||||
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||
Description: String value of the energy attribute of <id>
|
||||
@@ -9,8 +9,9 @@ Description: Shows all enabled kernel features.
|
||||
What: /sys/fs/erofs/<disk>/sync_decompress
|
||||
Date: November 2021
|
||||
Contact: "Huang Jianan" <huangjianan@oppo.com>
|
||||
Description: Control strategy of sync decompression
|
||||
Description: Control strategy of sync decompression:
|
||||
|
||||
- 0 (default, auto): enable for readpage, and enable for
|
||||
readahead on atomic contexts only,
|
||||
readahead on atomic contexts only.
|
||||
- 1 (force on): enable for readpage and readahead.
|
||||
- 2 (force off): disable for all situations.
|
||||
|
||||
@@ -55,8 +55,9 @@ Description: Controls the in-place-update policy.
|
||||
0x04 F2FS_IPU_UTIL
|
||||
0x08 F2FS_IPU_SSR_UTIL
|
||||
0x10 F2FS_IPU_FSYNC
|
||||
0x20 F2FS_IPU_ASYNC,
|
||||
0x20 F2FS_IPU_ASYNC
|
||||
0x40 F2FS_IPU_NOCACHE
|
||||
0x80 F2FS_IPU_HONOR_OPU_WRITE
|
||||
==== =================
|
||||
|
||||
Refer segment.h for details.
|
||||
@@ -98,6 +99,33 @@ Description: Controls the issue rate of discard commands that consist of small
|
||||
checkpoint is triggered, and issued during the checkpoint.
|
||||
By default, it is disabled with 0.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_discard_request
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the number of discards a thread will issue at a time.
|
||||
Higher number will allow the discard thread to finish its work
|
||||
faster, at the cost of higher latency for incomming I/O.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/min_discard_issue_time
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the interval the discard thread will wait between
|
||||
issuing discard requests when there are discards to be issued and
|
||||
no I/O aware interruptions occur.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/mid_discard_issue_time
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the interval the discard thread will wait between
|
||||
issuing discard requests when there are discards to be issued and
|
||||
an I/O aware interruption occurs.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_discard_issue_time
|
||||
Date: December 2021
|
||||
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||
Description: Controls the interval the discard thread will wait when there are
|
||||
no discard operations to be issued.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/discard_granularity
|
||||
Date: July 2017
|
||||
Contact: "Chao Yu" <yuchao0@huawei.com>
|
||||
@@ -269,11 +297,16 @@ Description: Shows current reserved blocks in system, it may be temporarily
|
||||
What: /sys/fs/f2fs/<disk>/gc_urgent
|
||||
Date: August 2017
|
||||
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||
Description: Do background GC aggressively when set. When gc_urgent = 1,
|
||||
background thread starts to do GC by given gc_urgent_sleep_time
|
||||
interval. When gc_urgent = 2, F2FS will lower the bar of
|
||||
checking idle in order to process outstanding discard commands
|
||||
and GC a little bit aggressively. It is set to 0 by default.
|
||||
Description: Do background GC aggressively when set. Set to 0 by default.
|
||||
gc urgent high(1): does GC forcibly in a period of given
|
||||
gc_urgent_sleep_time and ignores I/O idling check. uses greedy
|
||||
GC approach and turns SSR mode on.
|
||||
gc urgent low(2): lowers the bar of checking I/O idling in
|
||||
order to process outstanding discard commands and GC a
|
||||
little bit aggressively. uses cost benefit GC approach.
|
||||
gc urgent mid(3): does GC forcibly in a period of given
|
||||
gc_urgent_sleep_time and executes a mid level of I/O idling check.
|
||||
uses cost benefit GC approach.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
|
||||
Date: August 2017
|
||||
@@ -430,6 +463,7 @@ Description: Show status of f2fs superblock in real time.
|
||||
0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
|
||||
0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
|
||||
0x2000 SBI_IS_RESIZEFS resizefs is in process
|
||||
0x4000 SBI_IS_FREEZING freefs is in process
|
||||
====== ===================== =================================
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
|
||||
@@ -503,7 +537,7 @@ Date: July 2021
|
||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: Show how many segments have been reclaimed by GC during a specific
|
||||
GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
|
||||
3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
|
||||
3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
|
||||
You can re-initialize this value to "0".
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/gc_segment_mode
|
||||
@@ -540,3 +574,9 @@ Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||
Description: You can set the trial count limit for GC urgent high mode with this value.
|
||||
If GC thread gets to the limit, the mode will turn back to GC normal mode.
|
||||
By default, the value is zero, which means there is no limit like before.
|
||||
|
||||
What: /sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
|
||||
Date: January 2022
|
||||
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||
Description: Controls max # of node block writes to be used for roll forward
|
||||
recovery. This can limit the roll forward recovery time.
|
||||
|
||||
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
@@ -0,0 +1,274 @@
|
||||
what: /sys/kernel/mm/damon/
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Interface for Data Access MONitoring (DAMON). Contains files
|
||||
for controlling DAMON. For more details on DAMON itself,
|
||||
please refer to Documentation/admin-guide/mm/damon/index.rst.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Interface for privileged users of DAMON. Contains files for
|
||||
controlling DAMON that aimed to be used by privileged users.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/nr_kdamonds
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON worker thread (kdamond)
|
||||
named '0' to 'N-1' under the kdamonds/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/state
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing 'on' or 'off' to this file makes the kdamond starts or
|
||||
stops, respectively. Reading the file returns the keywords
|
||||
based on the current status. Writing 'update_schemes_stats' to
|
||||
the file updates contents of schemes stats files of the
|
||||
kdamond.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the pid of the kdamond if it is
|
||||
running.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON context named '0' to
|
||||
'N-1' under the contexts/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/operations
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a keyword for a monitoring operations set ('vaddr' for
|
||||
virtual address spaces monitoring, and 'paddr' for the physical
|
||||
address space monitoring) to this file makes the context to use
|
||||
the operations set. Reading the file returns the keyword for
|
||||
the operations set the context is set to use.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the sampling interval of the
|
||||
DAMON context in microseconds as the value. Reading this file
|
||||
returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/aggr_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the aggregation interval of
|
||||
the DAMON context in microseconds as the value. Reading this
|
||||
file returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/update_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the update interval of the
|
||||
DAMON context in microseconds as the value. Reading this file
|
||||
returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/min
|
||||
|
||||
WDate: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the minimum number of
|
||||
monitoring regions of the DAMON context as the value. Reading
|
||||
this file returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a value to this file sets the maximum number of
|
||||
monitoring regions of the DAMON context as the value. Reading
|
||||
this file returns the value.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/nr_targets
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON target of the context
|
||||
named '0' to 'N-1' under the contexts/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/pid_target
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the pid of
|
||||
the target process if the context is for virtual address spaces
|
||||
monitoring, respectively.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/nr_regions
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for setting each DAMON target memory region of the
|
||||
context named '0' to 'N-1' under the regions/ directory. In
|
||||
case of the virtual address space monitoring, DAMON
|
||||
automatically sets the target memory region based on the target
|
||||
processes' mappings.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/start
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the start
|
||||
address of the monitoring region.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/end
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the end
|
||||
address of the monitoring region.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/nr_schemes
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing a number 'N' to this file creates the number of
|
||||
directories for controlling each DAMON-based operation scheme
|
||||
of the context named '0' to 'N-1' under the schemes/ directory.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/action
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the action
|
||||
of the scheme.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/min
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the mimimum
|
||||
size of the scheme's target regions in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the maximum
|
||||
size of the scheme's target regions in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/min
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the manimum
|
||||
'nr_accesses' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the maximum
|
||||
'nr_accesses' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/min
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the minimum
|
||||
'age' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/max
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the maximum
|
||||
'age' of the scheme's target regions.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/ms
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the time
|
||||
quota of the scheme in milliseconds.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/bytes
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the size
|
||||
quota of the scheme in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the quotas
|
||||
charge reset interval of the scheme in milliseconds.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the
|
||||
under-quota limit regions prioritization weight for 'size' in
|
||||
permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/nr_accesses_permil
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the
|
||||
under-quota limit regions prioritization weight for
|
||||
'nr_accesses' in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/age_permil
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the
|
||||
under-quota limit regions prioritization weight for 'age' in
|
||||
permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/metric
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the metric
|
||||
of the watermarks for the scheme. The writable/readable
|
||||
keywords for this file are 'none' for disabling the watermarks
|
||||
feature, or 'free_mem_rate' for the system's global free memory
|
||||
rate in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/interval_us
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the metric
|
||||
check interval of the watermarks for the scheme in
|
||||
microseconds.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/high
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the high
|
||||
watermark of the scheme in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/mid
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the mid
|
||||
watermark of the scheme in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/low
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Writing to and reading from this file sets and gets the low
|
||||
watermark of the scheme in permil.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the number of regions that the action
|
||||
of the scheme has tried to be applied.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_tried
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the total size of regions that the
|
||||
action of the scheme has tried to be applied in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_applied
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the number of regions that the action
|
||||
of the scheme has successfully applied.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_applied
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the total size of regions that the
|
||||
action of the scheme has successfully applied in bytes.
|
||||
|
||||
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
|
||||
Date: Mar 2022
|
||||
Contact: SeongJae Park <sj@kernel.org>
|
||||
Description: Reading this file returns the number of the exceed events of
|
||||
the scheme's quotas.
|
||||
@@ -53,38 +53,6 @@ Description:
|
||||
(but some corrected errors might be still reported
|
||||
in other ways)
|
||||
|
||||
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
||||
Contact: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Feb, 2007
|
||||
Description:
|
||||
The entries appear for each CPU, but they are truly shared
|
||||
between all CPUs.
|
||||
|
||||
Tolerance level. When a machine check exception occurs for a
|
||||
non corrected machine check the kernel can take different
|
||||
actions.
|
||||
|
||||
Since machine check exceptions can happen any time it is
|
||||
sometimes risky for the kernel to kill a process because it
|
||||
defies normal kernel locking rules. The tolerance level
|
||||
configures how hard the kernel tries to recover even at some
|
||||
risk of deadlock. Higher tolerant values trade potentially
|
||||
better uptime with the risk of a crash or even corruption
|
||||
(for tolerant >= 3).
|
||||
|
||||
== ===========================================================
|
||||
0 always panic on uncorrected errors, log corrected errors
|
||||
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
||||
2 SIGBUS or log uncorrected errors, log corrected errors
|
||||
3 never panic or SIGBUS, log all errors (for testing only)
|
||||
== ===========================================================
|
||||
|
||||
Default: 1
|
||||
|
||||
Note this only makes a difference if the CPU allows recovery
|
||||
from a machine check exception. Current x86 CPUs generally
|
||||
do not.
|
||||
|
||||
What: /sys/devices/system/machinecheck/machinecheckX/trigger
|
||||
Contact: Andi Kleen <ak@linux.intel.com>
|
||||
Date: Feb, 2007
|
||||
|
||||
@@ -17,6 +17,7 @@ Date: October 2018
|
||||
KernelVersion: 4.20
|
||||
Contact: "Matan Ziv-Av <matan@svgalib.org>
|
||||
Description:
|
||||
Deprecated use /sys/class/power_supply/CMB0/charge_control_end_threshold
|
||||
Maximal battery charge level. Accepted values are 80 or 100.
|
||||
|
||||
What: /sys/devices/platform/lg-laptop/fan_mode
|
||||
|
||||
@@ -37,8 +37,15 @@ Description: (RO) Set of available destinations (sinks) for a SMA
|
||||
PPS2 signal is sent to the PPS2 selector
|
||||
TS1 signal is sent to timestamper 1
|
||||
TS2 signal is sent to timestamper 2
|
||||
TS3 signal is sent to timestamper 3
|
||||
TS4 signal is sent to timestamper 4
|
||||
IRIG signal is sent to the IRIG-B module
|
||||
DCF signal is sent to the DCF module
|
||||
FREQ1 signal is sent to frequency counter 1
|
||||
FREQ2 signal is sent to frequency counter 2
|
||||
FREQ3 signal is sent to frequency counter 3
|
||||
FREQ4 signal is sent to frequency counter 4
|
||||
None signal input is disabled
|
||||
===== ================================================
|
||||
|
||||
What: /sys/class/timecard/ocpN/available_sma_outputs
|
||||
@@ -50,10 +57,16 @@ Description: (RO) Set of available sources for a SMA output signal.
|
||||
10Mhz output is from the 10Mhz reference clock
|
||||
PHC output PPS is from the PHC clock
|
||||
MAC output PPS is from the Miniature Atomic Clock
|
||||
GNSS output PPS is from the GNSS module
|
||||
GNSS1 output PPS is from the first GNSS module
|
||||
GNSS2 output PPS is from the second GNSS module
|
||||
IRIG output is from the PHC, in IRIG-B format
|
||||
DCF output is from the PHC, in DCF format
|
||||
GEN1 output is from frequency generator 1
|
||||
GEN2 output is from frequency generator 2
|
||||
GEN3 output is from frequency generator 3
|
||||
GEN4 output is from frequency generator 4
|
||||
GND output is GND
|
||||
VCC output is VCC
|
||||
===== ================================================
|
||||
|
||||
What: /sys/class/timecard/ocpN/clock_source
|
||||
@@ -63,6 +76,97 @@ Description: (RW) Contains the current synchronization source used by
|
||||
the PHC. May be changed by writing one of the listed
|
||||
values from the available_clock_sources attribute set.
|
||||
|
||||
What: /sys/class/timecard/ocpN/clock_status_drift
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Contains the current drift value used by the firmware
|
||||
for internal disciplining of the atomic clock.
|
||||
|
||||
What: /sys/class/timecard/ocpN/clock_status_offset
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Contains the current offset value used by the firmware
|
||||
for internal disciplining of the atomic clock.
|
||||
|
||||
What: /sys/class/timecard/ocpN/freqX
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Optional directory containing the sysfs nodes for
|
||||
frequency counter <X>.
|
||||
|
||||
What: /sys/class/timecard/ocpN/freqX/frequency
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Contains the measured frequency over the specified
|
||||
measurement period.
|
||||
|
||||
What: /sys/class/timecard/ocpN/freqX/seconds
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RW) Specifies the number of seconds from 0-255 that the
|
||||
frequency should be measured over. Write 0 to disable.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Optional directory containing the sysfs nodes for
|
||||
frequency generator <X>.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/duty
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal duty cycle as a percentage from 1-99.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/period
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal period in nanoseconds.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/phase
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal phase offset in nanoseconds.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/polarity
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Specifies the signal polarity, either 1 or 0.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/running
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Either 0 or 1, showing if the signal generator is running.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/start
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RO) Shows the time in <sec>.<nsec> that the signal generator
|
||||
started running.
|
||||
|
||||
What: /sys/class/timecard/ocpN/genX/signal
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RW) Used to start the signal generator, and summarize
|
||||
the current status.
|
||||
|
||||
The signal generator may be started by writing the signal
|
||||
period, followed by the optional signal values. If the
|
||||
optional values are not provided, they default to the current
|
||||
settings, which may be obtained from the other sysfs nodes.
|
||||
|
||||
period [duty [phase [polarity]]]
|
||||
|
||||
echo 500000000 > signal # 1/2 second period
|
||||
echo 1000000 40 100 > signal
|
||||
echo 0 > signal # turn off generator
|
||||
|
||||
Period and phase are specified in nanoseconds. Duty cycle is
|
||||
a percentage from 1-99. Polarity is 1 or 0.
|
||||
|
||||
Reading this node will return:
|
||||
|
||||
period duty phase polarity start_time
|
||||
|
||||
What: /sys/class/timecard/ocpN/gnss_sync
|
||||
Date: September 2021
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
@@ -126,6 +230,16 @@ Description: (RW) These attributes specify the direction of the signal
|
||||
The 10Mhz reference clock input is currently only valid
|
||||
on SMA1 and may not be combined with other destination sinks.
|
||||
|
||||
What: /sys/class/timecard/ocpN/tod_correction
|
||||
Date: March 2022
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
Description: (RW) The incoming GNSS signal is in UTC time, and the NMEA
|
||||
format messages do not provide a TAI offset. This sets the
|
||||
correction value for the incoming time.
|
||||
|
||||
If UBX_LS is enabled, this should be 0, and the offset is
|
||||
taken from the UBX-NAV-TIMELS message.
|
||||
|
||||
What: /sys/class/timecard/ocpN/ts_window_adjust
|
||||
Date: September 2021
|
||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||
|
||||
@@ -26,7 +26,7 @@ SPHINX_CONF = conf.py
|
||||
PAPER =
|
||||
BUILDDIR = $(obj)/output
|
||||
PDFLATEX = xelatex
|
||||
LATEXOPTS = -interaction=batchmode
|
||||
LATEXOPTS = -interaction=batchmode -no-shell-escape
|
||||
|
||||
ifeq ($(KBUILD_VERBOSE),0)
|
||||
SPHINXOPTS += "-q"
|
||||
|
||||
@@ -278,20 +278,20 @@ appropriate parameters. In general this allows more efficient DMA
|
||||
on systems where System RAM exists above 4G _physical_ address.
|
||||
|
||||
Drivers for all PCI-X and PCIe compliant devices must call
|
||||
pci_set_dma_mask() as they are 64-bit DMA devices.
|
||||
set_dma_mask() as they are 64-bit DMA devices.
|
||||
|
||||
Similarly, drivers must also "register" this capability if the device
|
||||
can directly address "consistent memory" in System RAM above 4G physical
|
||||
address by calling pci_set_consistent_dma_mask().
|
||||
can directly address "coherent memory" in System RAM above 4G physical
|
||||
address by calling dma_set_coherent_mask().
|
||||
Again, this includes drivers for all PCI-X and PCIe compliant devices.
|
||||
Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
|
||||
64-bit DMA capable for payload ("streaming") data but not control
|
||||
("consistent") data.
|
||||
("coherent") data.
|
||||
|
||||
|
||||
Setup shared control data
|
||||
-------------------------
|
||||
Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
|
||||
Once the DMA masks are set, the driver can allocate "coherent" (a.k.a. shared)
|
||||
memory. See Documentation/core-api/dma-api.rst for a full description of
|
||||
the DMA APIs. This section is just a reminder that it needs to be done
|
||||
before enabling DMA on the device.
|
||||
@@ -367,7 +367,7 @@ steps need to be performed:
|
||||
- Disable the device from generating IRQs
|
||||
- Release the IRQ (free_irq())
|
||||
- Stop all DMA activity
|
||||
- Release DMA buffers (both streaming and consistent)
|
||||
- Release DMA buffers (both streaming and coherent)
|
||||
- Unregister from other subsystems (e.g. scsi or netdev)
|
||||
- Disable device from responding to MMIO/IO Port addresses
|
||||
- Release MMIO/IO Port resource(s)
|
||||
@@ -420,7 +420,7 @@ Once DMA is stopped, clean up streaming DMA first.
|
||||
I.e. unmap data buffers and return buffers to "upstream"
|
||||
owners if there is one.
|
||||
|
||||
Then clean up "consistent" buffers which contain the control data.
|
||||
Then clean up "coherent" buffers which contain the control data.
|
||||
|
||||
See Documentation/core-api/dma-api.rst for details on unmapping interfaces.
|
||||
|
||||
|
||||
@@ -60,3 +60,31 @@ For example::
|
||||
|
||||
When a given field is not populated or its value provided by the platform
|
||||
firmware is invalid, the "not-defined" string is shown instead of the value.
|
||||
|
||||
ACPI Fan Fine Grain Control
|
||||
=============================
|
||||
|
||||
When _FIF object specifies support for fine grain control, then fan speed
|
||||
can be set from 0 to 100% with the recommended minimum "step size" via
|
||||
_FSL object. User can adjust fan speed using thermal sysfs cooling device.
|
||||
|
||||
Here use can look at fan performance states for a reference speed (speed_rpm)
|
||||
and set it by changing cooling device cur_state. If the fine grain control
|
||||
is supported then user can also adjust to some other speeds which are
|
||||
not defined in the performance states.
|
||||
|
||||
The support of fine grain control is presented via sysfs attribute
|
||||
"fine_grain_control". If fine grain control is present, this attribute
|
||||
will show "1" otherwise "0".
|
||||
|
||||
This sysfs attribute is presented in the same directory as performance states.
|
||||
|
||||
ACPI Fan Performance Feedback
|
||||
=============================
|
||||
|
||||
The optional _FST object provides status information for the fan device.
|
||||
This includes field to provide current fan speed in revolutions per minute
|
||||
at which the fan is rotating.
|
||||
|
||||
This speed is presented in the sysfs using the attribute "fan_speed_rpm",
|
||||
in the same directory as performance states.
|
||||
|
||||
@@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via::
|
||||
|
||||
echo /dev/sda5 > /sys/block/zramX/backing_dev
|
||||
|
||||
before disksize setting. It supports only partition at this moment.
|
||||
If admin wants to use incompressible page writeback, they could do via::
|
||||
before disksize setting. It supports only partitions at this moment.
|
||||
If admin wants to use incompressible page writeback, they could do it via::
|
||||
|
||||
echo huge > /sys/block/zramX/writeback
|
||||
|
||||
@@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via::
|
||||
|
||||
echo idle > /sys/block/zramX/writeback
|
||||
|
||||
With the command, zram writeback idle pages from memory to the storage.
|
||||
With the command, zram will writeback idle pages from memory to the storage.
|
||||
|
||||
If admin want to write a specific page in zram device to backing device,
|
||||
If an admin wants to write a specific page in zram device to the backing device,
|
||||
they could write a page index into the interface.
|
||||
|
||||
echo "page_index=1251" > /sys/block/zramX/writeback
|
||||
@@ -354,7 +354,7 @@ to guarantee storage health for entire product life.
|
||||
|
||||
To overcome the concern, zram supports "writeback_limit" feature.
|
||||
The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
|
||||
any writeback. IOW, if admin wants to apply writeback budget, he should
|
||||
any writeback. IOW, if admin wants to apply writeback budget, they should
|
||||
enable writeback_limit_enable via::
|
||||
|
||||
$ echo 1 > /sys/block/zramX/writeback_limit_enable
|
||||
@@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit.
|
||||
(If admin doesn't enable writeback_limit_enable, writeback_limit's value
|
||||
assigned via /sys/block/zramX/writeback_limit is meaningless.)
|
||||
|
||||
If admin want to limit writeback as per-day 400M, he could do it
|
||||
If admin wants to limit writeback as per-day 400M, they could do it
|
||||
like below::
|
||||
|
||||
$ MB_SHIFT=20
|
||||
@@ -375,16 +375,16 @@ like below::
|
||||
$ echo 1 > /sys/block/zram0/writeback_limit_enable
|
||||
|
||||
If admins want to allow further write again once the budget is exhausted,
|
||||
he could do it like below::
|
||||
they could do it like below::
|
||||
|
||||
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
||||
/sys/block/zram0/writeback_limit
|
||||
|
||||
If admin wants to see remaining writeback budget since last set::
|
||||
If an admin wants to see the remaining writeback budget since last set::
|
||||
|
||||
$ cat /sys/block/zramX/writeback_limit
|
||||
|
||||
If admin want to disable writeback limit, he could do::
|
||||
If an admin wants to disable writeback limit, they could do::
|
||||
|
||||
$ echo 0 > /sys/block/zramX/writeback_limit_enable
|
||||
|
||||
@@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
|
||||
writeback happened until you reset the zram to allocate extra writeback
|
||||
budget in next setting is user's job.
|
||||
|
||||
If admin wants to measure writeback count in a certain period, he could
|
||||
If admin wants to measure writeback count in a certain period, they could
|
||||
know it via /sys/block/zram0/bd_stat's 3rd column.
|
||||
|
||||
memory tracking
|
||||
|
||||
@@ -64,6 +64,7 @@ Brief summary of control files.
|
||||
threads
|
||||
cgroup.procs show list of processes
|
||||
cgroup.event_control an interface for event_fd()
|
||||
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||
memory.usage_in_bytes show current usage for memory
|
||||
(See 5.5 for details)
|
||||
memory.memsw.usage_in_bytes show current usage for memory+Swap
|
||||
@@ -75,6 +76,7 @@ Brief summary of control files.
|
||||
memory.max_usage_in_bytes show max memory usage recorded
|
||||
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
||||
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
||||
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||
memory.stat show various statistics
|
||||
memory.use_hierarchy set/show hierarchical account enabled
|
||||
This knob is deprecated and shouldn't be
|
||||
|
||||
@@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
|
||||
Amount of memory used to cache filesystem data,
|
||||
including tmpfs and shared memory.
|
||||
|
||||
kernel (npn)
|
||||
Amount of total kernel memory, including
|
||||
(kernel_stack, pagetables, percpu, vmalloc, slab) in
|
||||
addition to other kernel memory use cases.
|
||||
|
||||
kernel_stack
|
||||
Amount of memory allocated to kernel stacks.
|
||||
|
||||
|
||||
@@ -35,6 +35,7 @@ problems and bugs in particular.
|
||||
:maxdepth: 1
|
||||
|
||||
reporting-issues
|
||||
reporting-regressions
|
||||
security-bugs
|
||||
bug-hunting
|
||||
bug-bisect
|
||||
|
||||
@@ -76,7 +76,7 @@ Field 3 -- # of sectors read (unsigned long)
|
||||
|
||||
Field 4 -- # of milliseconds spent reading (unsigned int)
|
||||
This is the total number of milliseconds spent by all reads (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||
|
||||
Field 5 -- # of writes completed (unsigned long)
|
||||
This is the total number of writes completed successfully.
|
||||
@@ -89,7 +89,7 @@ Field 7 -- # of sectors written (unsigned long)
|
||||
|
||||
Field 8 -- # of milliseconds spent writing (unsigned int)
|
||||
This is the total number of milliseconds spent by all writes (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||
|
||||
Field 9 -- # of I/Os currently in progress (unsigned int)
|
||||
The only field that should go to zero. Incremented as requests are
|
||||
@@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long)
|
||||
|
||||
Field 15 -- # of milliseconds spent discarding (unsigned int)
|
||||
This is the total number of milliseconds spent by all discards (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||
|
||||
Field 16 -- # of flush requests completed
|
||||
This is the total number of flush requests completed successfully.
|
||||
|
||||
@@ -146,9 +146,9 @@ System kernel config options
|
||||
CONFIG_SYSFS=y
|
||||
|
||||
Note that "sysfs file system support" might not appear in the "Pseudo
|
||||
filesystems" menu if "Configure standard kernel features (for small
|
||||
systems)" is not enabled in "General Setup." In this case, check the
|
||||
.config file itself to ensure that sysfs is turned on, as follows::
|
||||
filesystems" menu if "Configure standard kernel features (expert users)"
|
||||
is not enabled in "General Setup." In this case, check the .config file
|
||||
itself to ensure that sysfs is turned on, as follows::
|
||||
|
||||
grep 'CONFIG_SYSFS' .config
|
||||
|
||||
@@ -533,6 +533,10 @@ the following command::
|
||||
|
||||
cp /proc/vmcore <dump-file>
|
||||
|
||||
or use scp to write out the dump file between hosts on a network, e.g::
|
||||
|
||||
scp /proc/vmcore remote_username@remote_ip:<dump-file>
|
||||
|
||||
You can also use makedumpfile utility to write out the dump file
|
||||
with specified options to filter out unwanted contents, e.g::
|
||||
|
||||
|
||||
@@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual
|
||||
addresses in the higher VA range (refer to ARMv8 ARM document for
|
||||
more details).
|
||||
|
||||
MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
|
||||
-----------------------------------------------------------------------------
|
||||
|
||||
Used to get the correct ranges:
|
||||
MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
|
||||
VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space.
|
||||
VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array.
|
||||
|
||||
arm
|
||||
===
|
||||
|
||||
|
||||
@@ -724,6 +724,12 @@
|
||||
hvc<n> Use the hypervisor console device <n>. This is for
|
||||
both Xen and PowerPC hypervisors.
|
||||
|
||||
{ null | "" }
|
||||
Use to disable console output, i.e., to have kernel
|
||||
console messages discarded.
|
||||
This must be the only console= parameter used on the
|
||||
kernel command line.
|
||||
|
||||
If the device connected to the port is not a TTY but a braille
|
||||
device, prepend "brl," before the device type, for instance
|
||||
console=brl,ttyS0
|
||||
@@ -944,6 +950,30 @@
|
||||
dump out devices still on the deferred probe list after
|
||||
retrying.
|
||||
|
||||
dell_smm_hwmon.ignore_dmi=
|
||||
[HW] Continue probing hardware even if DMI data
|
||||
indicates that the driver is running on unsupported
|
||||
hardware.
|
||||
|
||||
dell_smm_hwmon.force=
|
||||
[HW] Activate driver even if SMM BIOS signature does
|
||||
not match list of supported models and enable otherwise
|
||||
blacklisted features.
|
||||
|
||||
dell_smm_hwmon.power_status=
|
||||
[HW] Report power status in /proc/i8k
|
||||
(disabled by default).
|
||||
|
||||
dell_smm_hwmon.restricted=
|
||||
[HW] Allow controlling fans only if SYS_ADMIN
|
||||
capability is set.
|
||||
|
||||
dell_smm_hwmon.fan_mult=
|
||||
[HW] Factor to multiply fan speed with.
|
||||
|
||||
dell_smm_hwmon.fan_max=
|
||||
[HW] Maximum configurable fan speed.
|
||||
|
||||
dfltcc= [HW,S390]
|
||||
Format: { on | off | def_only | inf_only | always }
|
||||
on: s390 zlib hardware support for compression on
|
||||
@@ -1435,6 +1465,14 @@
|
||||
as early as possible in order to facilitate early
|
||||
boot debugging.
|
||||
|
||||
ftrace_boot_snapshot
|
||||
[FTRACE] On boot up, a snapshot will be taken of the
|
||||
ftrace ring buffer that can be read at:
|
||||
/sys/kernel/tracing/snapshot.
|
||||
This is useful if you need tracing information from kernel
|
||||
boot up that is likely to be overridden by user space
|
||||
start up functionality.
|
||||
|
||||
ftrace_dump_on_oops[=orig_cpu]
|
||||
[FTRACE] will dump the trace buffers on oops.
|
||||
If no parameter is passed, ftrace will dump
|
||||
@@ -1625,7 +1663,7 @@
|
||||
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
|
||||
enabled.
|
||||
Allows heavy hugetlb users to free up some more
|
||||
memory (6 * PAGE_SIZE for each 2MB hugetlb page).
|
||||
memory (7 * PAGE_SIZE for each 2MB hugetlb page).
|
||||
Format: { on | off (default) }
|
||||
|
||||
on: enable the feature
|
||||
@@ -1703,17 +1741,6 @@
|
||||
|
||||
i810= [HW,DRM]
|
||||
|
||||
i8k.ignore_dmi [HW] Continue probing hardware even if DMI data
|
||||
indicates that the driver is running on unsupported
|
||||
hardware.
|
||||
i8k.force [HW] Activate i8k driver even if SMM BIOS signature
|
||||
does not match list of supported models.
|
||||
i8k.power_status
|
||||
[HW] Report power status in /proc/i8k
|
||||
(disabled by default)
|
||||
i8k.restricted [HW] Allow controlling fans only if SYS_ADMIN
|
||||
capability is set.
|
||||
|
||||
i915.invert_brightness=
|
||||
[DRM] Invert the sense of the variable that is used to
|
||||
set the brightness of the panel backlight. Normally a
|
||||
@@ -2339,13 +2366,35 @@
|
||||
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
||||
Default is 0 (don't ignore, but inject #GP)
|
||||
|
||||
kvm.eager_page_split=
|
||||
[KVM,X86] Controls whether or not KVM will try to
|
||||
proactively split all huge pages during dirty logging.
|
||||
Eager page splitting reduces interruptions to vCPU
|
||||
execution by eliminating the write-protection faults
|
||||
and MMU lock contention that would otherwise be
|
||||
required to split huge pages lazily.
|
||||
|
||||
VM workloads that rarely perform writes or that write
|
||||
only to a small region of VM memory may benefit from
|
||||
disabling eager page splitting to allow huge pages to
|
||||
still be used for reads.
|
||||
|
||||
The behavior of eager page splitting depends on whether
|
||||
KVM_DIRTY_LOG_INITIALLY_SET is enabled or disabled. If
|
||||
disabled, all huge pages in a memslot will be eagerly
|
||||
split when dirty logging is enabled on that memslot. If
|
||||
enabled, eager page splitting will be performed during
|
||||
the KVM_CLEAR_DIRTY ioctl, and only for the pages being
|
||||
cleared.
|
||||
|
||||
Eager page splitting currently only supports splitting
|
||||
huge pages mapped by the TDP MMU.
|
||||
|
||||
Default is Y (on).
|
||||
|
||||
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
||||
Default is false (don't support).
|
||||
|
||||
kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
|
||||
KVM MMU at runtime.
|
||||
Default is 0 (off)
|
||||
|
||||
kvm.nx_huge_pages=
|
||||
[KVM] Controls the software workaround for the
|
||||
X86_BUG_ITLB_MULTIHIT bug.
|
||||
@@ -2827,6 +2876,9 @@
|
||||
|
||||
For details see: Documentation/admin-guide/hw-vuln/mds.rst
|
||||
|
||||
mem=nn[KMG] [HEXAGON] Set the memory size.
|
||||
Must be specified, otherwise memory size will be 0.
|
||||
|
||||
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
|
||||
Amount of memory to be used in cases as follows:
|
||||
|
||||
@@ -2834,6 +2886,13 @@
|
||||
2 when the kernel is not able to see the whole system memory;
|
||||
3 memory that lies after 'mem=' boundary is excluded from
|
||||
the hypervisor, then assigned to KVM guests.
|
||||
4 to limit the memory available for kdump kernel.
|
||||
|
||||
[ARC,MICROBLAZE] - the limit applies only to low memory,
|
||||
high memory is not affected.
|
||||
|
||||
[ARM64] - only limits memory covered by the linear
|
||||
mapping. The NOMAP regions are not affected.
|
||||
|
||||
[X86] Work as limiting max address. Use together
|
||||
with memmap= to avoid physical address space collisions.
|
||||
@@ -2844,6 +2903,14 @@
|
||||
in above case 3, memory may need be hot added after boot
|
||||
if system memory of hypervisor is not sufficient.
|
||||
|
||||
mem=nn[KMG]@ss[KMG]
|
||||
[ARM,MIPS] - override the memory layout reported by
|
||||
firmware.
|
||||
Define a memory region of size nn[KMG] starting at
|
||||
ss[KMG].
|
||||
Multiple different regions can be specified with
|
||||
multiple mem= parameters on the command line.
|
||||
|
||||
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
||||
memory.
|
||||
|
||||
@@ -3485,8 +3552,7 @@
|
||||
difficult since unequal pointers can no longer be
|
||||
compared. However, if this command-line option is
|
||||
specified, then all normal pointers will have their true
|
||||
value printed. Pointers printed via %pK may still be
|
||||
hashed. This option should only be specified when
|
||||
value printed. This option should only be specified when
|
||||
debugging the kernel. Please do not use on production
|
||||
kernels.
|
||||
|
||||
@@ -3726,6 +3792,11 @@
|
||||
bit 3: print locks info if CONFIG_LOCKDEP is on
|
||||
bit 4: print ftrace buffer
|
||||
bit 5: print all printk messages in buffer
|
||||
bit 6: print all CPUs backtrace (if available in the arch)
|
||||
*Be aware* that this option may print a _lot_ of lines,
|
||||
so there are risks of losing older messages in the log.
|
||||
Use this option carefully, maybe worth to setup a
|
||||
bigger log buffer with "log_buf_len" along with this.
|
||||
|
||||
panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
|
||||
Format: <hex>[,nousertaint]
|
||||
@@ -4356,6 +4427,12 @@
|
||||
fully seed the kernel's CRNG. Default is controlled
|
||||
by CONFIG_RANDOM_TRUST_CPU.
|
||||
|
||||
random.trust_bootloader={on,off}
|
||||
[KNL] Enable or disable trusting the use of a
|
||||
seed passed by the bootloader (if available) to
|
||||
fully seed the kernel's CRNG. Default is controlled
|
||||
by CONFIG_RANDOM_TRUST_BOOTLOADER.
|
||||
|
||||
randomize_kstack_offset=
|
||||
[KNL] Enable or disable kernel stack offset
|
||||
randomization, which provides roughly 5 bits of
|
||||
@@ -4504,6 +4581,8 @@
|
||||
(the least-favored priority). Otherwise, when
|
||||
RCU_BOOST is not set, valid values are 0-99 and
|
||||
the default is zero (non-realtime operation).
|
||||
When RCU_NOCB_CPU is set, also adjust the
|
||||
priority of NOCB callback kthreads.
|
||||
|
||||
rcutree.rcu_nocb_gp_stride= [KNL]
|
||||
Set the number of NOCB callback kthreads in
|
||||
|
||||
@@ -38,7 +38,7 @@ FN lock.
|
||||
Battery care limit
|
||||
------------------
|
||||
|
||||
Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
|
||||
Writing 80/100 to /sys/class/power_supply/CMB0/charge_control_end_threshold
|
||||
sets the maximum capacity to charge the battery. Limiting the charge
|
||||
reduces battery capacity loss over time.
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ data from LCD controller (FIMD) through the SoC internal writeback data
|
||||
path. There are multiple FIMC instances in the SoCs (up to 4), having
|
||||
slightly different capabilities, like pixel alignment constraints, rotator
|
||||
availability, LCD writeback support, etc. The driver is located at
|
||||
drivers/media/platform/exynos4-is directory.
|
||||
drivers/media/platform/samsung/exynos4-is directory.
|
||||
|
||||
Supported SoCs
|
||||
--------------
|
||||
|
||||
@@ -284,7 +284,7 @@ tda9887 TDA 9885/6/7 analog IF demodulator
|
||||
tea5761 TEA 5761 radio tuner
|
||||
tea5767 TEA 5767 radio tuner
|
||||
tua9001 Infineon TUA9001 silicon tuner
|
||||
tuner-xc2028 XCeive xc2028/xc3028 tuners
|
||||
xc2028 XCeive xc2028/xc3028 tuners
|
||||
xc4000 Xceive XC4000 silicon tuner
|
||||
xc5000 Xceive XC5000 silicon tuner
|
||||
============ ==================================================
|
||||
|
||||
@@ -33,7 +33,7 @@ reference manual [#f1]_.
|
||||
Entities
|
||||
--------
|
||||
|
||||
imx7-mipi-csi2
|
||||
imx-mipi-csi2
|
||||
--------------
|
||||
|
||||
This is the MIPI CSI-2 receiver entity. It has one sink pad to receive the pixel
|
||||
|
||||
@@ -17,7 +17,7 @@ Introduction
|
||||
------------
|
||||
|
||||
This file documents the Texas Instruments OMAP 3 Image Signal Processor (ISP)
|
||||
driver located under drivers/media/platform/omap3isp. The original driver was
|
||||
driver located under drivers/media/platform/ti/omap3isp. The original driver was
|
||||
written by Texas Instruments but since that it has been rewritten (twice) at
|
||||
Nokia.
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ As of Revision AB, the ISS is described in detail in section 8.
|
||||
This driver is supporting **only** the CSI2-A/B interfaces for now.
|
||||
|
||||
It makes use of the Media Controller framework [#f2]_, and inherited most of the
|
||||
code from OMAP3 ISP driver (found under drivers/media/platform/omap3isp/\*),
|
||||
code from OMAP3 ISP driver (found under drivers/media/platform/ti/omap3isp/\*),
|
||||
except that it doesn't need an IOMMU now for ISS buffers memory mapping.
|
||||
|
||||
Supports usage of MMAP buffers only (for now).
|
||||
|
||||
@@ -76,3 +76,16 @@ vimc-capture:
|
||||
|
||||
* 1 Pad sink
|
||||
* 1 Pad source
|
||||
|
||||
Module options
|
||||
--------------
|
||||
|
||||
Vimc has a module parameter to configure the driver.
|
||||
|
||||
* ``allocator=<unsigned int>``
|
||||
|
||||
memory allocator selection, default is 0. It specifies the way buffers
|
||||
will be allocated.
|
||||
|
||||
- 0: vmalloc
|
||||
- 1: dma-contig
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
Detailed Usages
|
||||
===============
|
||||
|
||||
DAMON provides below three interfaces for different users.
|
||||
DAMON provides below interfaces for different users.
|
||||
|
||||
- *DAMON user space tool.*
|
||||
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||
@@ -14,17 +14,21 @@ DAMON provides below three interfaces for different users.
|
||||
virtual and physical address spaces monitoring. For more detail, please
|
||||
refer to its `usage document
|
||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||
- *debugfs interface.*
|
||||
:ref:`This <debugfs_interface>` is for privileged user space programmers who
|
||||
- *sysfs interface.*
|
||||
:ref:`This <sysfs_interface>` is for privileged user space programmers who
|
||||
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
||||
features by reading from and writing to special debugfs files. Therefore,
|
||||
you can write and use your personalized DAMON debugfs wrapper programs that
|
||||
reads/writes the debugfs files instead of you. The `DAMON user space tool
|
||||
features by reading from and writing to special sysfs files. Therefore,
|
||||
you can write and use your personalized DAMON sysfs wrapper programs that
|
||||
reads/writes the sysfs files instead of you. The `DAMON user space tool
|
||||
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
||||
supports both virtual and physical address spaces monitoring. Note that this
|
||||
interface provides only simple :ref:`statistics <damos_stats>` for the
|
||||
monitoring results. For detailed monitoring results, DAMON provides a
|
||||
:ref:`tracepoint <tracepoint>`.
|
||||
- *debugfs interface.*
|
||||
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
|
||||
<sysfs_interface>`. This will be removed after next LTS kernel is released,
|
||||
so users should move to the :ref:`sysfs interface <sysfs_interface>`.
|
||||
- *Kernel Space Programming Interface.*
|
||||
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
||||
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||
@@ -32,6 +36,340 @@ DAMON provides below three interfaces for different users.
|
||||
DAMON for various address spaces. For detail, please refer to the interface
|
||||
:doc:`document </vm/damon/api>`.
|
||||
|
||||
.. _sysfs_interface:
|
||||
|
||||
sysfs Interface
|
||||
===============
|
||||
|
||||
DAMON sysfs interface is built when ``CONFIG_DAMON_SYSFS`` is defined. It
|
||||
creates multiple directories and files under its sysfs directory,
|
||||
``<sysfs>/kernel/mm/damon/``. You can control DAMON by writing to and reading
|
||||
from the files under the directory.
|
||||
|
||||
For a short example, users can monitor the virtual address space of a given
|
||||
workload as below. ::
|
||||
|
||||
# cd /sys/kernel/mm/damon/admin/
|
||||
# echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr
|
||||
# echo vaddr > kdamonds/0/contexts/0/operations
|
||||
# echo 1 > kdamonds/0/contexts/0/targets/nr
|
||||
# echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid
|
||||
# echo on > kdamonds/0/state
|
||||
|
||||
Files Hierarchy
|
||||
---------------
|
||||
|
||||
The files hierarchy of DAMON sysfs interface is shown below. In the below
|
||||
figure, parents-children relations are represented with indentations, each
|
||||
directory is having ``/`` suffix, and files in each directory are separated by
|
||||
comma (","). ::
|
||||
|
||||
/sys/kernel/mm/damon/admin
|
||||
│ kdamonds/nr_kdamonds
|
||||
│ │ 0/state,pid
|
||||
│ │ │ contexts/nr_contexts
|
||||
│ │ │ │ 0/operations
|
||||
│ │ │ │ │ monitoring_attrs/
|
||||
│ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
|
||||
│ │ │ │ │ │ nr_regions/min,max
|
||||
│ │ │ │ │ targets/nr_targets
|
||||
│ │ │ │ │ │ 0/pid_target
|
||||
│ │ │ │ │ │ │ regions/nr_regions
|
||||
│ │ │ │ │ │ │ │ 0/start,end
|
||||
│ │ │ │ │ │ │ │ ...
|
||||
│ │ │ │ │ │ ...
|
||||
│ │ │ │ │ schemes/nr_schemes
|
||||
│ │ │ │ │ │ 0/action
|
||||
│ │ │ │ │ │ │ access_pattern/
|
||||
│ │ │ │ │ │ │ │ sz/min,max
|
||||
│ │ │ │ │ │ │ │ nr_accesses/min,max
|
||||
│ │ │ │ │ │ │ │ age/min,max
|
||||
│ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms
|
||||
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
||||
│ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
|
||||
│ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
|
||||
│ │ │ │ │ │ ...
|
||||
│ │ │ │ ...
|
||||
│ │ ...
|
||||
|
||||
Root
|
||||
----
|
||||
|
||||
The root of the DAMON sysfs interface is ``<sysfs>/kernel/mm/damon/``, and it
|
||||
has one directory named ``admin``. The directory contains the files for
|
||||
privileged user space programs' control of DAMON. User space tools or deamons
|
||||
having the root permission could use this directory.
|
||||
|
||||
kdamonds/
|
||||
---------
|
||||
|
||||
The monitoring-related information including request specifications and results
|
||||
are called DAMON context. DAMON executes each context with a kernel thread
|
||||
called kdamond, and multiple kdamonds could run in parallel.
|
||||
|
||||
Under the ``admin`` directory, one directory, ``kdamonds``, which has files for
|
||||
controlling the kdamonds exist. In the beginning, this directory has only one
|
||||
file, ``nr_kdamonds``. Writing a number (``N``) to the file creates the number
|
||||
of child directories named ``0`` to ``N-1``. Each directory represents each
|
||||
kdamond.
|
||||
|
||||
kdamonds/<N>/
|
||||
-------------
|
||||
|
||||
In each kdamond directory, two files (``state`` and ``pid``) and one directory
|
||||
(``contexts``) exist.
|
||||
|
||||
Reading ``state`` returns ``on`` if the kdamond is currently running, or
|
||||
``off`` if it is not running. Writing ``on`` or ``off`` makes the kdamond be
|
||||
in the state. Writing ``update_schemes_stats`` to ``state`` file updates the
|
||||
contents of stats files for each DAMON-based operation scheme of the kdamond.
|
||||
For details of the stats, please refer to :ref:`stats section
|
||||
<sysfs_schemes_stats>`.
|
||||
|
||||
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||
|
||||
``contexts`` directory contains files for controlling the monitoring contexts
|
||||
that this kdamond will execute.
|
||||
|
||||
kdamonds/<N>/contexts/
|
||||
----------------------
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_contexts``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named as
|
||||
``0`` to ``N-1``. Each directory represents each monitoring context. At the
|
||||
moment, only one context per kdamond is supported, so only ``0`` or ``1`` can
|
||||
be written to the file.
|
||||
|
||||
contexts/<N>/
|
||||
-------------
|
||||
|
||||
In each context directory, one file (``operations``) and three directories
|
||||
(``monitoring_attrs``, ``targets``, and ``schemes``) exist.
|
||||
|
||||
DAMON supports multiple types of monitoring operations, including those for
|
||||
virtual address space and the physical address space. You can set and get what
|
||||
type of monitoring operations DAMON will use for the context by writing one of
|
||||
below keywords to, and reading from the file.
|
||||
|
||||
- vaddr: Monitor virtual address spaces of specific processes
|
||||
- paddr: Monitor the physical address space of the system
|
||||
|
||||
contexts/<N>/monitoring_attrs/
|
||||
------------------------------
|
||||
|
||||
Files for specifying attributes of the monitoring including required quality
|
||||
and efficiency of the monitoring are in ``monitoring_attrs`` directory.
|
||||
Specifically, two directories, ``intervals`` and ``nr_regions`` exist in this
|
||||
directory.
|
||||
|
||||
Under ``intervals`` directory, three files for DAMON's sampling interval
|
||||
(``sample_us``), aggregation interval (``aggr_us``), and update interval
|
||||
(``update_us``) exist. You can set and get the values in micro-seconds by
|
||||
writing to and reading from the files.
|
||||
|
||||
Under ``nr_regions`` directory, two files for the lower-bound and upper-bound
|
||||
of DAMON's monitoring regions (``min`` and ``max``, respectively), which
|
||||
controls the monitoring overhead, exist. You can set and get the values by
|
||||
writing to and rading from the files.
|
||||
|
||||
For more details about the intervals and monitoring regions range, please refer
|
||||
to the Design document (:doc:`/vm/damon/design`).
|
||||
|
||||
contexts/<N>/targets/
|
||||
---------------------
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_targets``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each monitoring target.
|
||||
|
||||
targets/<N>/
|
||||
------------
|
||||
|
||||
In each target directory, one file (``pid_target``) and one directory
|
||||
(``regions``) exist.
|
||||
|
||||
If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
|
||||
be a process. You can specify the process to DAMON by writing the pid of the
|
||||
process to the ``pid_target`` file.
|
||||
|
||||
targets/<N>/regions
|
||||
-------------------
|
||||
|
||||
When ``vaddr`` monitoring operations set is being used (``vaddr`` is written to
|
||||
the ``contexts/<N>/operations`` file), DAMON automatically sets and updates the
|
||||
monitoring target regions so that entire memory mappings of target processes
|
||||
can be covered. However, users could want to set the initial monitoring region
|
||||
to specific address ranges.
|
||||
|
||||
In contrast, DAMON do not automatically sets and updates the monitoring target
|
||||
regions when ``paddr`` monitoring operations set is being used (``paddr`` is
|
||||
written to the ``contexts/<N>/operations``). Therefore, users should set the
|
||||
monitoring target regions by themselves in the case.
|
||||
|
||||
For such cases, users can explicitly set the initial monitoring target regions
|
||||
as they want, by writing proper values to the files under this directory.
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_regions``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each initial monitoring target region.
|
||||
|
||||
regions/<N>/
|
||||
------------
|
||||
|
||||
In each region directory, you will find two files (``start`` and ``end``). You
|
||||
can set and get the start and end addresses of the initial monitoring target
|
||||
region by writing to and reading from the files, respectively.
|
||||
|
||||
contexts/<N>/schemes/
|
||||
---------------------
|
||||
|
||||
For usual DAMON-based data access aware memory management optimizations, users
|
||||
would normally want the system to apply a memory management action to a memory
|
||||
region of a specific access pattern. DAMON receives such formalized operation
|
||||
schemes from the user and applies those to the target memory regions. Users
|
||||
can get and set the schemes by reading from and writing to files under this
|
||||
directory.
|
||||
|
||||
In the beginning, this directory has only one file, ``nr_schemes``. Writing a
|
||||
number (``N``) to the file creates the number of child directories named ``0``
|
||||
to ``N-1``. Each directory represents each DAMON-based operation scheme.
|
||||
|
||||
schemes/<N>/
|
||||
------------
|
||||
|
||||
In each scheme directory, four directories (``access_pattern``, ``quotas``,
|
||||
``watermarks``, and ``stats``) and one file (``action``) exist.
|
||||
|
||||
The ``action`` file is for setting and getting what action you want to apply to
|
||||
memory regions having specific access pattern of the interest. The keywords
|
||||
that can be written to and read from the file and their meaning are as below.
|
||||
|
||||
- ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``
|
||||
- ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``
|
||||
- ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
|
||||
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
|
||||
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
||||
- ``stat``: Do nothing but count the statistics
|
||||
|
||||
schemes/<N>/access_pattern/
|
||||
---------------------------
|
||||
|
||||
The target access pattern of each DAMON-based operation scheme is constructed
|
||||
with three ranges including the size of the region in bytes, number of
|
||||
monitored accesses per aggregate interval, and number of aggregated intervals
|
||||
for the age of the region.
|
||||
|
||||
Under the ``access_pattern`` directory, three directories (``sz``,
|
||||
``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
|
||||
exist. You can set and get the access pattern for the given scheme by writing
|
||||
to and reading from the ``min`` and ``max`` files under ``sz``,
|
||||
``nr_accesses``, and ``age`` directories, respectively.
|
||||
|
||||
schemes/<N>/quotas/
|
||||
-------------------
|
||||
|
||||
Optimal ``target access pattern`` for each ``action`` is workload dependent, so
|
||||
not easy to find. Worse yet, setting a scheme of some action too aggressive
|
||||
can cause severe overhead. To avoid such overhead, users can limit time and
|
||||
size quota for each scheme. In detail, users can ask DAMON to try to use only
|
||||
up to specific time (``time quota``) for applying the action, and to apply the
|
||||
action to only up to specific amount (``size quota``) of memory regions having
|
||||
the target access pattern within a given time interval (``reset interval``).
|
||||
|
||||
When the quota limit is expected to be exceeded, DAMON prioritizes found memory
|
||||
regions of the ``target access pattern`` based on their size, access frequency,
|
||||
and age. For personalized prioritization, users can set the weights for the
|
||||
three properties.
|
||||
|
||||
Under ``quotas`` directory, three files (``ms``, ``bytes``,
|
||||
``reset_interval_ms``) and one directory (``weights``) having three files
|
||||
(``sz_permil``, ``nr_accesses_permil``, and ``age_permil``) in it exist.
|
||||
|
||||
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
|
||||
``reset interval`` in milliseconds by writing the values to the three files,
|
||||
respectively. You can also set the prioritization weights for size, access
|
||||
frequency, and age in per-thousand unit by writing the values to the three
|
||||
files under the ``weights`` directory.
|
||||
|
||||
schemes/<N>/watermarks/
|
||||
-----------------------
|
||||
|
||||
To allow easy activation and deactivation of each scheme based on system
|
||||
status, DAMON provides a feature called watermarks. The feature receives five
|
||||
values called ``metric``, ``interval``, ``high``, ``mid``, and ``low``. The
|
||||
``metric`` is the system metric such as free memory ratio that can be measured.
|
||||
If the metric value of the system is higher than the value in ``high`` or lower
|
||||
than ``low`` at the memoent, the scheme is deactivated. If the value is lower
|
||||
than ``mid``, the scheme is activated.
|
||||
|
||||
Under the watermarks directory, five files (``metric``, ``interval_us``,
|
||||
``high``, ``mid``, and ``low``) for setting each value exist. You can set and
|
||||
get the five values by writing to the files, respectively.
|
||||
|
||||
Keywords and meanings of those that can be written to the ``metric`` file are
|
||||
as below.
|
||||
|
||||
- none: Ignore the watermarks
|
||||
- free_mem_rate: System's free memory rate (per thousand)
|
||||
|
||||
The ``interval`` should written in microseconds unit.
|
||||
|
||||
.. _sysfs_schemes_stats:
|
||||
|
||||
schemes/<N>/stats/
|
||||
------------------
|
||||
|
||||
DAMON counts the total number and bytes of regions that each scheme is tried to
|
||||
be applied, the two numbers for the regions that each scheme is successfully
|
||||
applied, and the total number of the quota limit exceeds. This statistics can
|
||||
be used for online analysis or tuning of the schemes.
|
||||
|
||||
The statistics can be retrieved by reading the files under ``stats`` directory
|
||||
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
|
||||
``qt_exceeds``), respectively. The files are not updated in real time, so you
|
||||
should ask DAMON sysfs interface to updte the content of the files for the
|
||||
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
|
||||
``kdamonds/<N>/state`` file.
|
||||
|
||||
Example
|
||||
~~~~~~~
|
||||
|
||||
Below commands applies a scheme saying "If a memory region of size in [4KiB,
|
||||
8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
|
||||
interval in [10, 20], page out the region. For the paging out, use only up to
|
||||
10ms per second, and also don't page out more than 1GiB per second. Under the
|
||||
limitation, page out memory regions having longer age first. Also, check the
|
||||
free memory rate of the system every 5 seconds, start the monitoring and paging
|
||||
out when the free memory rate becomes lower than 50%, but stop it if the free
|
||||
memory rate becomes larger than 60%, or lower than 30%". ::
|
||||
|
||||
# cd <sysfs>/kernel/mm/damon/admin
|
||||
# # populate directories
|
||||
# echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts;
|
||||
# echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
|
||||
# cd kdamonds/0/contexts/0/schemes/0
|
||||
# # set the basic access pattern and the action
|
||||
# echo 4096 > access_patterns/sz/min
|
||||
# echo 8192 > access_patterns/sz/max
|
||||
# echo 0 > access_patterns/nr_accesses/min
|
||||
# echo 5 > access_patterns/nr_accesses/max
|
||||
# echo 10 > access_patterns/age/min
|
||||
# echo 20 > access_patterns/age/max
|
||||
# echo pageout > action
|
||||
# # set quotas
|
||||
# echo 10 > quotas/ms
|
||||
# echo $((1024*1024*1024)) > quotas/bytes
|
||||
# echo 1000 > quotas/reset_interval_ms
|
||||
# # set watermark
|
||||
# echo free_mem_rate > watermarks/metric
|
||||
# echo 5000000 > watermarks/interval_us
|
||||
# echo 600 > watermarks/high
|
||||
# echo 500 > watermarks/mid
|
||||
# echo 300 > watermarks/low
|
||||
|
||||
Please note that it's highly recommended to use user space tools like `damo
|
||||
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
|
||||
the files as above. Above is only for an example.
|
||||
|
||||
.. _debugfs_interface:
|
||||
|
||||
@@ -47,7 +385,7 @@ Attributes
|
||||
----------
|
||||
|
||||
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
||||
``regions update interval``, and min/max number of monitoring target regions by
|
||||
``update interval``, and min/max number of monitoring target regions by
|
||||
reading from and writing to the ``attrs`` file. To know about the monitoring
|
||||
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
|
||||
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
||||
@@ -108,24 +446,28 @@ In such cases, users can explicitly set the initial monitoring target regions
|
||||
as they want, by writing proper values to the ``init_regions`` file. Each line
|
||||
of the input should represent one region in below form.::
|
||||
|
||||
<target id> <start address> <end address>
|
||||
<target idx> <start address> <end address>
|
||||
|
||||
The ``target id`` should already in ``target_ids`` file, and the regions should
|
||||
be passed in address order. For example, below commands will set a couple of
|
||||
address ranges, ``1-100`` and ``100-200`` as the initial monitoring target
|
||||
region of process 42, and another couple of address ranges, ``20-40`` and
|
||||
``50-100`` as that of process 4242.::
|
||||
The ``target idx`` should be the index of the target in ``target_ids`` file,
|
||||
starting from ``0``, and the regions should be passed in address order. For
|
||||
example, below commands will set a couple of address ranges, ``1-100`` and
|
||||
``100-200`` as the initial monitoring target region of pid 42, which is the
|
||||
first one (index ``0``) in ``target_ids``, and another couple of address
|
||||
ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
|
||||
(index ``1``) in ``target_ids``.::
|
||||
|
||||
# cd <debugfs>/damon
|
||||
# echo "42 1 100
|
||||
42 100 200
|
||||
4242 20 40
|
||||
4242 50 100" > init_regions
|
||||
# cat target_ids
|
||||
42 4242
|
||||
# echo "0 1 100
|
||||
0 100 200
|
||||
1 20 40
|
||||
1 50 100" > init_regions
|
||||
|
||||
Note that this sets the initial monitoring target regions only. In case of
|
||||
virtual memory monitoring, DAMON will automatically updates the boundary of the
|
||||
regions after one ``regions update interval``. Therefore, users should set the
|
||||
``regions update interval`` large enough in this case, if they don't want the
|
||||
regions after one ``update interval``. Therefore, users should set the
|
||||
``update interval`` large enough in this case, if they don't want the
|
||||
update.
|
||||
|
||||
|
||||
|
||||
@@ -130,9 +130,25 @@ attribute, e.g.::
|
||||
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
|
||||
|
||||
When zswap same-filled page identification is disabled at runtime, it will stop
|
||||
checking for the same-value filled pages during store operation. However, the
|
||||
existing pages which are marked as same-value filled pages remain stored
|
||||
unchanged in zswap until they are either loaded or invalidated.
|
||||
checking for the same-value filled pages during store operation.
|
||||
In other words, every page will be then considered non-same-value filled.
|
||||
However, the existing pages which are marked as same-value filled pages remain
|
||||
stored unchanged in zswap until they are either loaded or invalidated.
|
||||
|
||||
In some circumstances it might be advantageous to make use of just the zswap
|
||||
ability to efficiently store same-filled pages without enabling the whole
|
||||
compressed page storage.
|
||||
In this case the handling of non-same-value pages by zswap (enabled by default)
|
||||
can be disabled by setting the ``non_same_filled_pages_enabled`` attribute
|
||||
to 0, e.g. ``zswap.non_same_filled_pages_enabled=0``.
|
||||
It can also be enabled and disabled at runtime using the sysfs
|
||||
``non_same_filled_pages_enabled`` attribute, e.g.::
|
||||
|
||||
echo 1 > /sys/module/zswap/parameters/non_same_filled_pages_enabled
|
||||
|
||||
Disabling both ``zswap.same_filled_pages_enabled`` and
|
||||
``zswap.non_same_filled_pages_enabled`` effectively disables accepting any new
|
||||
pages by zswap.
|
||||
|
||||
To prevent zswap from shrinking pool when zswap is full and there's a high
|
||||
pressure on swap (this will result in flipping pages in and out zswap pool
|
||||
|
||||
@@ -8,6 +8,7 @@ Performance monitor support
|
||||
:maxdepth: 1
|
||||
|
||||
hisi-pmu
|
||||
hisi-pcie-pmu
|
||||
imx-ddr
|
||||
qcom_l2_pmu
|
||||
qcom_l3_pmu
|
||||
|
||||
@@ -19,7 +19,7 @@ Linux kernel. The new mechanism is based on Collaborative Processor
|
||||
Performance Control (CPPC) which provides finer grain frequency management
|
||||
than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using
|
||||
the ACPI P-states driver to manage CPU frequency and clocks with switching
|
||||
only in 3 P-states. CPPC replaces the ACPI P-states controls, allows a
|
||||
only in 3 P-states. CPPC replaces the ACPI P-states controls and allows a
|
||||
flexible, low-latency interface for the Linux kernel to directly
|
||||
communicate the performance hints to hardware.
|
||||
|
||||
@@ -27,7 +27,7 @@ communicate the performance hints to hardware.
|
||||
``ondemand``, etc. to manage the performance hints which are provided by
|
||||
CPPC hardware functionality that internally follows the hardware
|
||||
specification (for details refer to AMD64 Architecture Programmer's Manual
|
||||
Volume 2: System Programming [1]_). Currently ``amd-pstate`` supports basic
|
||||
Volume 2: System Programming [1]_). Currently, ``amd-pstate`` supports basic
|
||||
frequency control function according to kernel governors on some of the
|
||||
Zen2 and Zen3 processors, and we will implement more AMD specific functions
|
||||
in future after we verify them on the hardware and SBIOS.
|
||||
@@ -41,9 +41,9 @@ continuous, abstract, and unit-less performance value in a scale that is
|
||||
not tied to a specific performance state / frequency. This is an ACPI
|
||||
standard [2]_ which software can specify application performance goals and
|
||||
hints as a relative target to the infrastructure limits. AMD processors
|
||||
provides the low latency register model (MSR) instead of AML code
|
||||
provide the low latency register model (MSR) instead of an AML code
|
||||
interpreter for performance adjustments. ``amd-pstate`` will initialize a
|
||||
``struct cpufreq_driver`` instance ``amd_pstate_driver`` with the callbacks
|
||||
``struct cpufreq_driver`` instance, ``amd_pstate_driver``, with the callbacks
|
||||
to manage each performance update behavior. ::
|
||||
|
||||
Highest Perf ------>+-----------------------+ +-----------------------+
|
||||
@@ -91,26 +91,26 @@ AMD CPPC Performance Capability
|
||||
Highest Performance (RO)
|
||||
.........................
|
||||
|
||||
It is the absolute maximum performance an individual processor may reach,
|
||||
This is the absolute maximum performance an individual processor may reach,
|
||||
assuming ideal conditions. This performance level may not be sustainable
|
||||
for long durations and may only be achievable if other platform components
|
||||
are in a specific state; for example, it may require other processors be in
|
||||
are in a specific state; for example, it may require other processors to be in
|
||||
an idle state. This would be equivalent to the highest frequencies
|
||||
supported by the processor.
|
||||
|
||||
Nominal (Guaranteed) Performance (RO)
|
||||
......................................
|
||||
|
||||
It is the maximum sustained performance level of the processor, assuming
|
||||
ideal operating conditions. In absence of an external constraint (power,
|
||||
thermal, etc.) this is the performance level the processor is expected to
|
||||
This is the maximum sustained performance level of the processor, assuming
|
||||
ideal operating conditions. In the absence of an external constraint (power,
|
||||
thermal, etc.), this is the performance level the processor is expected to
|
||||
be able to maintain continuously. All cores/processors are expected to be
|
||||
able to sustain their nominal performance state simultaneously.
|
||||
|
||||
Lowest non-linear Performance (RO)
|
||||
...................................
|
||||
|
||||
It is the lowest performance level at which nonlinear power savings are
|
||||
This is the lowest performance level at which nonlinear power savings are
|
||||
achieved, for example, due to the combined effects of voltage and frequency
|
||||
scaling. Above this threshold, lower performance levels should be generally
|
||||
more energy efficient than higher performance levels. This register
|
||||
@@ -119,7 +119,7 @@ effectively conveys the most efficient performance level to ``amd-pstate``.
|
||||
Lowest Performance (RO)
|
||||
........................
|
||||
|
||||
It is the absolute lowest performance level of the processor. Selecting a
|
||||
This is the absolute lowest performance level of the processor. Selecting a
|
||||
performance level lower than the lowest nonlinear performance level may
|
||||
cause an efficiency penalty but should reduce the instantaneous power
|
||||
consumption of the processor.
|
||||
@@ -149,14 +149,14 @@ a relative number. This can be expressed as percentage of nominal
|
||||
performance (infrastructure max). Below the nominal sustained performance
|
||||
level, desired performance expresses the average performance level of the
|
||||
processor subject to hardware. Above the nominal performance level,
|
||||
processor must provide at least nominal performance requested and go higher
|
||||
the processor must provide at least nominal performance requested and go higher
|
||||
if current operating conditions allow.
|
||||
|
||||
Energy Performance Preference (EPP) (RW)
|
||||
.........................................
|
||||
|
||||
Provides a hint to the hardware if software wants to bias toward performance
|
||||
(0x0) or energy efficiency (0xff).
|
||||
This attribute provides a hint to the hardware if software wants to bias
|
||||
toward performance (0x0) or energy efficiency (0xff).
|
||||
|
||||
|
||||
Key Governors Support
|
||||
@@ -173,35 +173,34 @@ operating frequencies supported by the hardware. Users can check the
|
||||
``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic
|
||||
frequency control. It is to fine tune the processor configuration on
|
||||
``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate``
|
||||
registers adjust_perf callback to implement the CPPC similar performance
|
||||
update behavior. It is initialized by ``sugov_start`` and then populate the
|
||||
CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as
|
||||
the utilization update callback function in CPU scheduler. CPU scheduler
|
||||
will call ``cpufreq_update_util`` and assign the target performance
|
||||
according to the ``struct sugov_cpu`` that utilization update belongs to.
|
||||
Then ``amd-pstate`` updates the desired performance according to the CPU
|
||||
registers the adjust_perf callback to implement performance update behavior
|
||||
similar to CPPC. It is initialized by ``sugov_start`` and then populates the
|
||||
CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as the
|
||||
utilization update callback function in the CPU scheduler. The CPU scheduler
|
||||
will call ``cpufreq_update_util`` and assigns the target performance according
|
||||
to the ``struct sugov_cpu`` that the utilization update belongs to.
|
||||
Then, ``amd-pstate`` updates the desired performance according to the CPU
|
||||
scheduler assigned.
|
||||
|
||||
|
||||
Processor Support
|
||||
=======================
|
||||
|
||||
The ``amd-pstate`` initialization will fail if the _CPC in ACPI SBIOS is
|
||||
not existed at the detected processor, and it uses ``acpi_cpc_valid`` to
|
||||
check the _CPC existence. All Zen based processors support legacy ACPI
|
||||
hardware P-States function, so while the ``amd-pstate`` fails to be
|
||||
initialized, the kernel will fall back to initialize ``acpi-cpufreq``
|
||||
driver.
|
||||
The ``amd-pstate`` initialization will fail if the ``_CPC`` entry in the ACPI
|
||||
SBIOS does not exist in the detected processor. It uses ``acpi_cpc_valid``
|
||||
to check the existence of ``_CPC``. All Zen based processors support the legacy
|
||||
ACPI hardware P-States function, so when ``amd-pstate`` fails initialization,
|
||||
the kernel will fall back to initialize the ``acpi-cpufreq`` driver.
|
||||
|
||||
There are two types of hardware implementations for ``amd-pstate``: one is
|
||||
`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support
|
||||
<perf_cap_>`_. It can use :c:macro:`X86_FEATURE_CPPC` feature flag (for
|
||||
details refer to Processor Programming Reference (PPR) for AMD Family
|
||||
19h Model 51h, Revision A1 Processors [3]_) to indicate the different
|
||||
types. ``amd-pstate`` is to register different ``static_call`` instances
|
||||
for different hardware implementations.
|
||||
<perf_cap_>`_. It can use the :c:macro:`X86_FEATURE_CPPC` feature flag to
|
||||
indicate the different types. (For details, refer to the Processor Programming
|
||||
Reference (PPR) for AMD Family 19h Model 51h, Revision A1 Processors [3]_.)
|
||||
``amd-pstate`` is to register different ``static_call`` instances for different
|
||||
hardware implementations.
|
||||
|
||||
Currently, some of Zen2 and Zen3 processors support ``amd-pstate``. In the
|
||||
Currently, some of the Zen2 and Zen3 processors support ``amd-pstate``. In the
|
||||
future, it will be supported on more and more AMD processors.
|
||||
|
||||
Full MSR Support
|
||||
@@ -210,18 +209,18 @@ Full MSR Support
|
||||
Some new Zen3 processors such as Cezanne provide the MSR registers directly
|
||||
while the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is set.
|
||||
``amd-pstate`` can handle the MSR register to implement the fast switch
|
||||
function in ``CPUFreq`` that can shrink latency of frequency control on the
|
||||
interrupt context. The functions with ``pstate_xxx`` prefix represent the
|
||||
operations of MSR registers.
|
||||
function in ``CPUFreq`` that can reduce the latency of frequency control in
|
||||
interrupt context. The functions with a ``pstate_xxx`` prefix represent the
|
||||
operations on MSR registers.
|
||||
|
||||
Shared Memory Support
|
||||
----------------------
|
||||
|
||||
If :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, that means the
|
||||
processor supports shared memory solution. In this case, ``amd-pstate``
|
||||
If the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, the
|
||||
processor supports the shared memory solution. In this case, ``amd-pstate``
|
||||
uses the ``cppc_acpi`` helper methods to implement the callback functions
|
||||
that defined on ``static_call``. The functions with ``cppc_xxx`` prefix
|
||||
represent the operations of acpi cppc helpers for shared memory solution.
|
||||
that are defined on ``static_call``. The functions with the ``cppc_xxx`` prefix
|
||||
represent the operations of ACPI CPPC helpers for the shared memory solution.
|
||||
|
||||
|
||||
AMD P-States and ACPI hardware P-States always can be supported in one
|
||||
@@ -234,7 +233,7 @@ User Space Interface in ``sysfs``
|
||||
==================================
|
||||
|
||||
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
|
||||
control its functionality at the system level. They located in the
|
||||
control its functionality at the system level. They are located in the
|
||||
``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. ::
|
||||
|
||||
root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd*
|
||||
@@ -246,38 +245,38 @@ control its functionality at the system level. They located in the
|
||||
``amd_pstate_highest_perf / amd_pstate_max_freq``
|
||||
|
||||
Maximum CPPC performance and CPU frequency that the driver is allowed to
|
||||
set in percent of the maximum supported CPPC performance level (the highest
|
||||
set, in percent of the maximum supported CPPC performance level (the highest
|
||||
performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||
In some of ASICs, the highest CPPC performance is not the one in the _CPC
|
||||
table, so we need to expose it to sysfs. If boost is not active but
|
||||
supported, this maximum frequency will be larger than the one in
|
||||
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||
table, so we need to expose it to sysfs. If boost is not active, but
|
||||
still supported, this maximum frequency will be larger than the one in
|
||||
``cpuinfo``.
|
||||
This attribute is read-only.
|
||||
|
||||
``amd_pstate_lowest_nonlinear_freq``
|
||||
|
||||
The lowest non-linear CPPC CPU frequency that the driver is allowed to set
|
||||
in percent of the maximum supported CPPC performance level (Please see the
|
||||
The lowest non-linear CPPC CPU frequency that the driver is allowed to set,
|
||||
in percent of the maximum supported CPPC performance level. (Please see the
|
||||
lowest non-linear performance in `AMD CPPC Performance Capability
|
||||
<perf_cap_>`_).
|
||||
<perf_cap_>`_.)
|
||||
This attribute is read-only.
|
||||
|
||||
For other performance and frequency values, we can read them back from
|
||||
Other performance and frequency values can be read back from
|
||||
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
|
||||
|
||||
|
||||
``amd-pstate`` vs ``acpi-cpufreq``
|
||||
======================================
|
||||
|
||||
On majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
|
||||
provided by the platform firmware used for CPU performance scaling, but
|
||||
only provides 3 P-states on AMD processors.
|
||||
However, on modern AMD APU and CPU series, it provides the collaborative
|
||||
processor performance control according to ACPI protocol and customize this
|
||||
for AMD platforms. That is fine-grain and continuous frequency range
|
||||
On the majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
|
||||
provided by the platform firmware are used for CPU performance scaling, but
|
||||
only provide 3 P-states on AMD processors.
|
||||
However, on modern AMD APU and CPU series, hardware provides the Collaborative
|
||||
Processor Performance Control according to the ACPI protocol and customizes this
|
||||
for AMD platforms. That is, fine-grained and continuous frequency ranges
|
||||
instead of the legacy hardware P-states. ``amd-pstate`` is the kernel
|
||||
module which supports the new AMD P-States mechanism on most of future AMD
|
||||
platforms. The AMD P-States mechanism will be the more performance and energy
|
||||
module which supports the new AMD P-States mechanism on most of the future AMD
|
||||
platforms. The AMD P-States mechanism is the more performance and energy
|
||||
efficiency frequency management method on AMD processors.
|
||||
|
||||
Kernel Module Options for ``amd-pstate``
|
||||
@@ -287,25 +286,25 @@ Kernel Module Options for ``amd-pstate``
|
||||
Use a module param (shared_mem) to enable related processors manually with
|
||||
**amd_pstate.shared_mem=1**.
|
||||
Due to the performance issue on the processors with `Shared Memory Support
|
||||
<perf_cap_>`_, so we disable it for the moment and will enable this by default
|
||||
once we address performance issue on this solution.
|
||||
<perf_cap_>`_, we disable it presently and will re-enable this by default
|
||||
once we address performance issue with this solution.
|
||||
|
||||
The way to check whether current processor is `Full MSR Support <perf_cap_>`_
|
||||
To check whether the current processor is using `Full MSR Support <perf_cap_>`_
|
||||
or `Shared Memory Support <perf_cap_>`_ : ::
|
||||
|
||||
ray@hr-test1:~$ lscpu | grep cppc
|
||||
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
|
||||
|
||||
If CPU Flags have cppc, then this processor supports `Full MSR Support
|
||||
<perf_cap_>`_. Otherwise it supports `Shared Memory Support <perf_cap_>`_.
|
||||
If the CPU flags have ``cppc``, then this processor supports `Full MSR Support
|
||||
<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_.
|
||||
|
||||
|
||||
``cpupower`` tool support for ``amd-pstate``
|
||||
===============================================
|
||||
|
||||
``amd-pstate`` is supported on ``cpupower`` tool that can be used to dump the frequency
|
||||
information. And it is in progress to support more and more operations for new
|
||||
``amd-pstate`` module with this tool. ::
|
||||
``amd-pstate`` is supported by the ``cpupower`` tool, which can be used to dump
|
||||
frequency information. Development is in progress to support more and more
|
||||
operations for the new ``amd-pstate`` module with this tool. ::
|
||||
|
||||
root@hr-test1:/home/ray# cpupower frequency-info
|
||||
analyzing CPU 0:
|
||||
@@ -336,10 +335,10 @@ Trace Events
|
||||
--------------
|
||||
|
||||
There are two static trace events that can be used for ``amd-pstate``
|
||||
diagnostics. One of them is the cpu_frequency trace event generally used
|
||||
diagnostics. One of them is the ``cpu_frequency`` trace event generally used
|
||||
by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event
|
||||
specific to ``amd-pstate``. The following sequence of shell commands can
|
||||
be used to enable them and see their output (if the kernel is generally
|
||||
be used to enable them and see their output (if the kernel is
|
||||
configured to support event tracing). ::
|
||||
|
||||
root@hr-test1:/home/ray# cd /sys/kernel/tracing/
|
||||
@@ -364,11 +363,37 @@ configured to support event tracing). ::
|
||||
<idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true
|
||||
<idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true
|
||||
|
||||
The cpu_frequency trace event will be triggered either by the ``schedutil`` scaling
|
||||
The ``cpu_frequency`` trace event will be triggered either by the ``schedutil`` scaling
|
||||
governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the
|
||||
policies with other scaling governors).
|
||||
|
||||
|
||||
Tracer Tool
|
||||
-------------
|
||||
|
||||
``amd_pstate_tracer.py`` can record and parse ``amd-pstate`` trace log, then
|
||||
generate performance plots. This utility can be used to debug and tune the
|
||||
performance of ``amd-pstate`` driver. The tracer tool needs to import intel
|
||||
pstate tracer.
|
||||
|
||||
Tracer tool located in ``linux/tools/power/x86/amd_pstate_tracer``. It can be
|
||||
used in two ways. If trace file is available, then directly parse the file
|
||||
with command ::
|
||||
|
||||
./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
|
||||
|
||||
Or generate trace file with root privilege, then parse and plot with command ::
|
||||
|
||||
sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
|
||||
|
||||
The test result can be found in ``results/test_name``. Following is the example
|
||||
about part of the output. ::
|
||||
|
||||
common_cpu common_secs common_usecs min_perf des_perf max_perf freq mperf apef tsc load duration_ms sample_num elapsed_time common_comm
|
||||
CPU_005 712 116384 39 49 166 0.7565 9645075 2214891 38431470 25.1 11.646 469 2.496 kworker/5:0-40
|
||||
CPU_006 712 116408 39 49 166 0.6769 8950227 1839034 37192089 24.06 11.272 470 2.496 kworker/6:0-1264
|
||||
|
||||
|
||||
Reference
|
||||
===========
|
||||
|
||||
|
||||
@@ -0,0 +1,60 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
==============================
|
||||
Intel Uncore Frequency Scaling
|
||||
==============================
|
||||
|
||||
:Copyright: |copy| 2022 Intel Corporation
|
||||
|
||||
:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
||||
|
||||
Introduction
|
||||
------------
|
||||
|
||||
The uncore can consume significant amount of power in Intel's Xeon servers based
|
||||
on the workload characteristics. To optimize the total power and improve overall
|
||||
performance, SoCs have internal algorithms for scaling uncore frequency. These
|
||||
algorithms monitor workload usage of uncore and set a desirable frequency.
|
||||
|
||||
It is possible that users have different expectations of uncore performance and
|
||||
want to have control over it. The objective is similar to allowing users to set
|
||||
the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
|
||||
Users may have some latency sensitive workloads where they do not want any
|
||||
change to uncore frequency. Also, users may have workloads which require
|
||||
different core and uncore performance at distinct phases and they may want to
|
||||
use both cpufreq and the uncore scaling interface to distribute power and
|
||||
improve overall performance.
|
||||
|
||||
Sysfs Interface
|
||||
---------------
|
||||
|
||||
To control uncore frequency, a sysfs interface is provided in the directory:
|
||||
`/sys/devices/system/cpu/intel_uncore_frequency/`.
|
||||
|
||||
There is one directory for each package and die combination as the scope of
|
||||
uncore scaling control is per die in multiple die/package SoCs or per
|
||||
package for single die per package SoCs. The name represents the
|
||||
scope of control. For example: 'package_00_die_00' is for package id 0 and
|
||||
die 0.
|
||||
|
||||
Each package_*_die_* contains the following attributes:
|
||||
|
||||
``initial_max_freq_khz``
|
||||
Out of reset, this attribute represent the maximum possible frequency.
|
||||
This is a read-only attribute. If users adjust max_freq_khz,
|
||||
they can always go back to maximum using the value from this attribute.
|
||||
|
||||
``initial_min_freq_khz``
|
||||
Out of reset, this attribute represent the minimum possible frequency.
|
||||
This is a read-only attribute. If users adjust min_freq_khz,
|
||||
they can always go back to minimum using the value from this attribute.
|
||||
|
||||
``max_freq_khz``
|
||||
This attribute is used to set the maximum uncore frequency.
|
||||
|
||||
``min_freq_khz``
|
||||
This attribute is used to set the minimum uncore frequency.
|
||||
|
||||
``current_freq_khz``
|
||||
This attribute is used to get the current uncore frequency.
|
||||
@@ -15,3 +15,4 @@ Working-State Power Management
|
||||
cpufreq_drivers
|
||||
intel_epb
|
||||
intel-speed-select
|
||||
intel_uncore_frequency_scaling
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||
..
|
||||
If you want to distribute this text under CC-BY-4.0 only, please use 'The
|
||||
Linux kernel developers' for author attribution and link this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
|
||||
.. See the bottom of this file for additional redistribution information.
|
||||
|
||||
Reporting issues
|
||||
++++++++++++++++
|
||||
@@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get
|
||||
handled slightly differently in the reporting process. Three type of cases
|
||||
qualify: regressions, security issues, and really severe problems.
|
||||
|
||||
You deal with a 'regression' if something that worked with an older version of
|
||||
the Linux kernel does not work with a newer one or somehow works worse with it.
|
||||
It thus is a regression when a WiFi driver that did a fine job with Linux 5.7
|
||||
somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if
|
||||
an application shows erratic behavior with a newer kernel, which might happen
|
||||
due to incompatible changes in the interface between the kernel and the
|
||||
userland (like procfs and sysfs). Significantly reduced performance or
|
||||
increased power consumption also qualify as regression. But keep in mind: the
|
||||
new kernel needs to be built with a configuration that is similar to the one
|
||||
from the old kernel (see below how to achieve that). That's because the kernel
|
||||
developers sometimes can not avoid incompatibilities when implementing new
|
||||
features; but to avoid regressions such features have to be enabled explicitly
|
||||
during build time configuration.
|
||||
You deal with a regression if some application or practical use case running
|
||||
fine with one Linux kernel works worse or not at all with a newer version
|
||||
compiled using a similar configuration. The document
|
||||
Documentation/admin-guide/reporting-regressions.rst explains this in more
|
||||
detail. It also provides a good deal of other information about regressions you
|
||||
might want to be aware of; it for example explains how to add your issue to the
|
||||
list of tracked regressions, to ensure it won't fall through the cracks.
|
||||
|
||||
What qualifies as security issue is left to your judgment. Consider reading
|
||||
'Documentation/admin-guide/security-bugs.rst' before proceeding, as it
|
||||
Documentation/admin-guide/security-bugs.rst before proceeding, as it
|
||||
provides additional details how to best handle security issues.
|
||||
|
||||
An issue is a 'really severe problem' when something totally unacceptably bad
|
||||
@@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was
|
||||
not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
|
||||
followed by a few spaces and some letters.
|
||||
|
||||
If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst'
|
||||
If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst
|
||||
to find out why. Try to eliminate the reason. Often it's caused by one these
|
||||
three things:
|
||||
|
||||
@@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to
|
||||
reproduce it themselves.
|
||||
|
||||
To find the change there is a process called 'bisection' which the document
|
||||
'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process
|
||||
Documentation/admin-guide/bug-bisect.rst describes in detail. That process
|
||||
will often require you to build about ten to twenty kernel images, trying to
|
||||
reproduce the issue with each of them before building the next. Yes, that takes
|
||||
some time, but don't worry, it works a lot quicker than most people assume.
|
||||
@@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by
|
||||
the kernel and not by something else, as outlined above already.
|
||||
|
||||
In the whole process keep in mind: an issue only qualifies as regression if the
|
||||
older and the newer kernel got built with a similar configuration. The best way
|
||||
to archive this: copy the configuration file (``.config``) from the old working
|
||||
kernel freshly to each newer kernel version you try. Afterwards run ``make
|
||||
olddefconfig`` to adjust it for the needs of the new version.
|
||||
older and the newer kernel got built with a similar configuration. This can be
|
||||
achieved by using ``make olddefconfig``, as explained in more detail by
|
||||
Documentation/admin-guide/reporting-regressions.rst; that document also
|
||||
provides a good deal of other information about regressions you might want to be
|
||||
aware of.
|
||||
|
||||
|
||||
Write and send the report
|
||||
@@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
|
||||
the report's text to these addresses; but on top of it put a small note where
|
||||
you mention that you filed it with a link to the ticket.
|
||||
|
||||
See 'Documentation/admin-guide/security-bugs.rst' for more information.
|
||||
See Documentation/admin-guide/security-bugs.rst for more information.
|
||||
|
||||
|
||||
Duties after the report went out
|
||||
@@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to
|
||||
pinpoint the exact change that causes the issue (which then can easily get
|
||||
reverted to fix the issue quickly). Hence consider to do a proper bisection
|
||||
right away if time permits. See the section 'Special care for regressions' and
|
||||
the document 'Documentation/admin-guide/bug-bisect.rst' for details how to
|
||||
the document Documentation/admin-guide/bug-bisect.rst for details how to
|
||||
perform one. In case of a successful bisection add the author of the culprit to
|
||||
the recipients; also CC everyone in the signed-off-by chain, which you find at
|
||||
the end of its commit message.
|
||||
@@ -1594,7 +1580,7 @@ Some fixes are too complex
|
||||
Even small and seemingly obvious code-changes sometimes introduce new and
|
||||
totally unexpected problems. The maintainers of the stable and longterm kernels
|
||||
are very aware of that and thus only apply changes to these kernels that are
|
||||
within rules outlined in 'Documentation/process/stable-kernel-rules.rst'.
|
||||
within rules outlined in Documentation/process/stable-kernel-rules.rst.
|
||||
|
||||
Complex or risky changes for example do not qualify and thus only get applied
|
||||
to mainline. Other fixes are easy to get backported to the newest stable and
|
||||
@@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time.
|
||||
|
||||
|
||||
..
|
||||
This text is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If you
|
||||
spot a typo or small mistake, feel free to let him know directly and he'll
|
||||
fix it. You are free to do the same in a mostly informal way if you want
|
||||
to contribute changes to the text, but for copyright reasons please CC
|
||||
end-of-content
|
||||
..
|
||||
This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
|
||||
you spot a typo or small mistake, feel free to let him know directly and
|
||||
he'll fix it. You are free to do the same in a mostly informal way if you
|
||||
want to contribute changes to the text, but for copyright reasons please CC
|
||||
linux-doc@vger.kernel.org and "sign-off" your contribution as
|
||||
Documentation/process/submitting-patches.rst outlines in the section "Sign
|
||||
your work - the Developer's Certificate of Origin".
|
||||
..
|
||||
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||
please use "The Linux kernel developers" for author attribution and link
|
||||
this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
|
||||
451
Documentation/admin-guide/reporting-regressions.rst
Normal file
451
Documentation/admin-guide/reporting-regressions.rst
Normal file
@@ -0,0 +1,451 @@
|
||||
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||
.. [see the bottom of this file for redistribution information]
|
||||
|
||||
Reporting regressions
|
||||
+++++++++++++++++++++
|
||||
|
||||
"*We don't cause regressions*" is the first rule of Linux kernel development;
|
||||
Linux founder and lead developer Linus Torvalds established it himself and
|
||||
ensures it's obeyed.
|
||||
|
||||
This document describes what the rule means for users and how the Linux kernel's
|
||||
development model ensures to address all reported regressions; aspects relevant
|
||||
for kernel developers are left to Documentation/process/handling-regressions.rst.
|
||||
|
||||
|
||||
The important bits (aka "TL;DR")
|
||||
================================
|
||||
|
||||
#. It's a regression if something running fine with one Linux kernel works worse
|
||||
or not at all with a newer version. Note, the newer kernel has to be compiled
|
||||
using a similar configuration; the detailed explanations below describes this
|
||||
and other fine print in more detail.
|
||||
|
||||
#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst,
|
||||
it already covers all aspects important for regressions and repeated
|
||||
below for convenience. Two of them are important: start your report's subject
|
||||
with "[REGRESSION]" and CC or forward it to `the regression mailing list
|
||||
<https://lore.kernel.org/regressions/>`_ (regressions@lists.linux.dev).
|
||||
|
||||
#. Optional, but recommended: when sending or forwarding your report, make the
|
||||
Linux kernel regression tracking bot "regzbot" track the issue by specifying
|
||||
when the regression started like this::
|
||||
|
||||
#regzbot introduced v5.13..v5.14-rc1
|
||||
|
||||
|
||||
All the details on Linux kernel regressions relevant for users
|
||||
==============================================================
|
||||
|
||||
|
||||
The important basics
|
||||
--------------------
|
||||
|
||||
|
||||
What is a "regression" and what is the "no regressions rule"?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It's a regression if some application or practical use case running fine with
|
||||
one Linux kernel works worse or not at all with a newer version compiled using a
|
||||
similar configuration. The "no regressions rule" forbids this to take place; if
|
||||
it happens by accident, developers that caused it are expected to quickly fix
|
||||
the issue.
|
||||
|
||||
It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with
|
||||
5.14 doesn't work at all, works significantly slower, or misbehaves somehow.
|
||||
It's also a regression if a perfectly working application suddenly shows erratic
|
||||
behavior with a newer kernel version; such issues can be caused by changes in
|
||||
procfs, sysfs, or one of the many other interfaces Linux provides to userland
|
||||
software. But keep in mind, as mentioned earlier: 5.14 in this example needs to
|
||||
be built from a configuration similar to the one from 5.13. This can be achieved
|
||||
using ``make olddefconfig``, as explained in more detail below.
|
||||
|
||||
Note the "practical use case" in the first sentence of this section: developers
|
||||
despite the "no regressions" rule are free to change any aspect of the kernel
|
||||
and even APIs or ABIs to userland, as long as no existing application or use
|
||||
case breaks.
|
||||
|
||||
Also be aware the "no regressions" rule covers only interfaces the kernel
|
||||
provides to the userland. It thus does not apply to kernel-internal interfaces
|
||||
like the module API, which some externally developed drivers use to hook into
|
||||
the kernel.
|
||||
|
||||
How do I report a regression?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Just report the issue as outlined in
|
||||
Documentation/admin-guide/reporting-issues.rst, it already describes the
|
||||
important points. The following aspects outlined there are especially relevant
|
||||
for regressions:
|
||||
|
||||
* When checking for existing reports to join, also search the `archives of the
|
||||
Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and
|
||||
`regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
|
||||
|
||||
* Start your report's subject with "[REGRESSION]".
|
||||
|
||||
* In your report, clearly mention the last kernel version that worked fine and
|
||||
the first broken one. Ideally try to find the exact change causing the
|
||||
regression using a bisection, as explained below in more detail.
|
||||
|
||||
* Remember to let the Linux regressions mailing list
|
||||
(regressions@lists.linux.dev) know about your report:
|
||||
|
||||
* If you report the regression by mail, CC the regressions list.
|
||||
|
||||
* If you report your regression to some bug tracker, forward the submitted
|
||||
report by mail to the regressions list while CCing the maintainer and the
|
||||
mailing list for the subsystem in question.
|
||||
|
||||
If it's a regression within a stable or longterm series (e.g.
|
||||
v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list
|
||||
<https://lore.kernel.org/stable/>`_ (stable@vger.kernel.org).
|
||||
|
||||
In case you performed a successful bisection, add everyone to the CC the
|
||||
culprit's commit message mentions in lines starting with "Signed-off-by:".
|
||||
|
||||
When CCing for forwarding your report to the list, consider directly telling the
|
||||
aforementioned Linux kernel regression tracking bot about your report. To do
|
||||
that, include a paragraph like this in your mail::
|
||||
|
||||
#regzbot introduced: v5.13..v5.14-rc1
|
||||
|
||||
Regzbot will then consider your mail a report for a regression introduced in the
|
||||
specified version range. In above case Linux v5.13 still worked fine and Linux
|
||||
v5.14-rc1 was the first version where you encountered the issue. If you
|
||||
performed a bisection to find the commit that caused the regression, specify the
|
||||
culprit's commit-id instead::
|
||||
|
||||
#regzbot introduced: 1f2e3d4c5d
|
||||
|
||||
Placing such a "regzbot command" is in your interest, as it will ensure the
|
||||
report won't fall through the cracks unnoticed. If you omit this, the Linux
|
||||
kernel's regressions tracker will take care of telling regzbot about your
|
||||
regression, as long as you send a copy to the regressions mailing lists. But the
|
||||
regression tracker is just one human which sometimes has to rest or occasionally
|
||||
might even enjoy some time away from computers (as crazy as that might sound).
|
||||
Relying on this person thus will result in an unnecessary delay before the
|
||||
regressions becomes mentioned `on the list of tracked and unresolved Linux
|
||||
kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the
|
||||
weekly regression reports sent by regzbot. Such delays can result in Linus
|
||||
Torvalds being unaware of important regressions when deciding between "continue
|
||||
development or call this finished and release the final?".
|
||||
|
||||
Are really all regressions fixed?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Nearly all of them are, as long as the change causing the regression (the
|
||||
"culprit commit") is reliably identified. Some regressions can be fixed without
|
||||
this, but often it's required.
|
||||
|
||||
Who needs to find the root cause of a regression?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Developers of the affected code area should try to locate the culprit on their
|
||||
own. But for them that's often impossible to do with reasonable effort, as quite
|
||||
a lot of issues only occur in a particular environment outside the developer's
|
||||
reach -- for example, a specific hardware platform, firmware, Linux distro,
|
||||
system's configuration, or application. That's why in the end it's often up to
|
||||
the reporter to locate the culprit commit; sometimes users might even need to
|
||||
run additional tests afterwards to pinpoint the exact root cause. Developers
|
||||
should offer advice and reasonably help where they can, to make this process
|
||||
relatively easy and achievable for typical users.
|
||||
|
||||
How can I find the culprit?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Perform a bisection, as roughly outlined in
|
||||
Documentation/admin-guide/reporting-issues.rst and described in more detail by
|
||||
Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but
|
||||
in many cases finds the culprit relatively quickly. If it's hard or
|
||||
time-consuming to reliably reproduce the issue, consider teaming up with other
|
||||
affected users to narrow down the search range together.
|
||||
|
||||
Who can I ask for advice when it comes to regressions?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
|
||||
CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
|
||||
issue might better be dealt with in private, feel free to omit the list.
|
||||
|
||||
|
||||
Additional details about regressions
|
||||
------------------------------------
|
||||
|
||||
|
||||
What is the goal of the "no regressions rule"?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Users should feel safe when updating kernel versions and not have to worry
|
||||
something might break. This is in the interest of the kernel developers to make
|
||||
updating attractive: they don't want users to stay on stable or longterm Linux
|
||||
series that are either abandoned or more than one and a half years old. That's
|
||||
in everybody's interest, as `those series might have known bugs, security
|
||||
issues, or other problematic aspects already fixed in later versions
|
||||
<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_.
|
||||
Additionally, the kernel developers want to make it simple and appealing for
|
||||
users to test the latest pre-release or regular release. That's also in
|
||||
everybody's interest, as it's a lot easier to track down and fix problems, if
|
||||
they are reported shortly after being introduced.
|
||||
|
||||
Is the "no regressions" rule really adhered in practice?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It's taken really seriously, as can be seen by many mailing list posts from
|
||||
Linux creator and lead developer Linus Torvalds, some of which are quoted in
|
||||
Documentation/process/handling-regressions.rst.
|
||||
|
||||
Exceptions to this rule are extremely rare; in the past developers almost always
|
||||
turned out to be wrong when they assumed a particular situation was warranting
|
||||
an exception.
|
||||
|
||||
Who ensures the "no regressions" is actually followed?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The subsystem maintainers should take care of that, which are watched and
|
||||
supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
|
||||
Greg Kroah-Hartman et al. for various stable/longterm series.
|
||||
|
||||
All of them are helped by people trying to ensure no regression report falls
|
||||
through the cracks. One of them is Thorsten Leemhuis, who's currently acting as
|
||||
the Linux kernel's "regressions tracker"; to facilitate this work he relies on
|
||||
regzbot, the Linux kernel regression tracking bot. That's why you want to bring
|
||||
your report on the radar of these people by CCing or forwarding each report to
|
||||
the regressions mailing list, ideally with a "regzbot command" in your mail to
|
||||
get it tracked immediately.
|
||||
|
||||
How quickly are regressions normally fixed?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Developers should fix any reported regression as quickly as possible, to provide
|
||||
affected users with a solution in a timely manner and prevent more users from
|
||||
running into the issue; nevertheless developers need to take enough time and
|
||||
care to ensure regression fixes do not cause additional damage.
|
||||
|
||||
The answer thus depends on various factors like the impact of a regression, its
|
||||
age, or the Linux series in which it occurs. In the end though, most regressions
|
||||
should be fixed within two weeks.
|
||||
|
||||
Is it a regression, if the issue can be avoided by updating some software?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Almost always: yes. If a developer tells you otherwise, ask the regression
|
||||
tracker for advice as outlined above.
|
||||
|
||||
Is it a regression, if a newer kernel works slower or consumes more energy?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Yes, but the difference has to be significant. A five percent slow-down in a
|
||||
micro-benchmark thus is unlikely to qualify as regression, unless it also
|
||||
influences the results of a broad benchmark by more than one percent. If in
|
||||
doubt, ask for advice.
|
||||
|
||||
Is it a regression, if an external kernel module breaks when updating Linux?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
No, as the "no regression" rule is about interfaces and services the Linux
|
||||
kernel provides to the userland. It thus does not cover building or running
|
||||
externally developed kernel modules, as they run in kernel-space and hook into
|
||||
the kernel using internal interfaces occasionally changed.
|
||||
|
||||
How are regressions handled that are caused by security fixes?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
In extremely rare situations security issues can't be fixed without causing
|
||||
regressions; those fixes are given way, as they are the lesser evil in the end.
|
||||
Luckily this middling almost always can be avoided, as key developers for the
|
||||
affected area and often Linus Torvalds himself try very hard to fix security
|
||||
issues without causing regressions.
|
||||
|
||||
If you nevertheless face such a case, check the mailing list archives if people
|
||||
tried their best to avoid the regression. If not, report it; if in doubt, ask
|
||||
for advice as outlined above.
|
||||
|
||||
What happens if fixing a regression is impossible without causing another?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Sadly these things happen, but luckily not very often; if they occur, expert
|
||||
developers of the affected code area should look into the issue to find a fix
|
||||
that avoids regressions or at least their impact. If you run into such a
|
||||
situation, do what was outlined already for regressions caused by security
|
||||
fixes: check earlier discussions if people already tried their best and ask for
|
||||
advice if in doubt.
|
||||
|
||||
A quick note while at it: these situations could be avoided, if people would
|
||||
regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each
|
||||
development cycle a test run. This is best explained by imagining a change
|
||||
integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at
|
||||
the same time is a hard requirement for some other improvement applied for
|
||||
5.15-rc1. All these changes often can simply be reverted and the regression thus
|
||||
solved, if someone finds and reports it before 5.15 is released. A few days or
|
||||
weeks later this solution can become impossible, as some software might have
|
||||
started to rely on aspects introduced by one of the follow-up changes: reverting
|
||||
all changes would then cause a regression for users of said software and thus is
|
||||
out of the question.
|
||||
|
||||
Is it a regression, if some feature I relied on was removed months ago?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It is, but often it's hard to fix such regressions due to the aspects outlined
|
||||
in the previous section. It hence needs to be dealt with on a case-by-case
|
||||
basis. This is another reason why it's in everybody's interest to regularly test
|
||||
mainline pre-releases.
|
||||
|
||||
Does the "no regression" rule apply if I seem to be the only affected person?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
It does, but only for practical usage: the Linux developers want to be free to
|
||||
remove support for hardware only to be found in attics and museums anymore.
|
||||
|
||||
Note, sometimes regressions can't be avoided to make progress -- and the latter
|
||||
is needed to prevent Linux from stagnation. Hence, if only very few users seem
|
||||
to be affected by a regression, it for the greater good might be in their and
|
||||
everyone else's interest to lettings things pass. Especially if there is an
|
||||
easy way to circumvent the regression somehow, for example by updating some
|
||||
software or using a kernel parameter created just for this purpose.
|
||||
|
||||
Does the regression rule apply for code in the staging tree as well?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Not according to the `help text for the configuration option covering all
|
||||
staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_,
|
||||
which since its early days states::
|
||||
|
||||
Please note that these drivers are under heavy development, may or
|
||||
may not work, and may contain userspace interfaces that most likely
|
||||
will be changed in the near future.
|
||||
|
||||
The staging developers nevertheless often adhere to the "no regressions" rule,
|
||||
but sometimes bend it to make progress. That's for example why some users had to
|
||||
deal with (often negligible) regressions when a WiFi driver from the staging
|
||||
tree was replaced by a totally different one written from scratch.
|
||||
|
||||
Why do later versions have to be "compiled with a similar configuration"?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Because the Linux kernel developers sometimes integrate changes known to cause
|
||||
regressions, but make them optional and disable them in the kernel's default
|
||||
configuration. This trick allows progress, as the "no regressions" rule
|
||||
otherwise would lead to stagnation.
|
||||
|
||||
Consider for example a new security feature blocking access to some kernel
|
||||
interfaces often abused by malware, which at the same time are required to run a
|
||||
few rarely used applications. The outlined approach makes both camps happy:
|
||||
people using these applications can leave the new security feature off, while
|
||||
everyone else can enable it without running into trouble.
|
||||
|
||||
How to create a configuration similar to the one of an older kernel?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Start your machine with a known-good kernel and configure the newer Linux
|
||||
version with ``make olddefconfig``. This makes the kernel's build scripts pick
|
||||
up the configuration file (the ".config" file) from the running kernel as base
|
||||
for the new one you are about to compile; afterwards they set all new
|
||||
configuration options to their default value, which should disable new features
|
||||
that might cause regressions.
|
||||
|
||||
Can I report a regression I found with pre-compiled vanilla kernels?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
You need to ensure the newer kernel was compiled with a similar configuration
|
||||
file as the older one (see above), as those that built them might have enabled
|
||||
some known-to-be incompatible feature for the newer kernel. If in doubt, report
|
||||
the matter to the kernel's provider and ask for advice.
|
||||
|
||||
|
||||
More about regression tracking with "regzbot"
|
||||
---------------------------------------------
|
||||
|
||||
What is regression tracking and why should I care about it?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Rules like "no regressions" need someone to ensure they are followed, otherwise
|
||||
they are broken either accidentally or on purpose. History has shown this to be
|
||||
true for Linux kernel development as well. That's why Thorsten Leemhuis, the
|
||||
Linux Kernel's regression tracker, and some people try to ensure all regression
|
||||
are fixed by keeping an eye on them until they are resolved. Neither of them are
|
||||
paid for this, that's why the work is done on a best effort basis.
|
||||
|
||||
Why and how are Linux kernel regressions tracked using a bot?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Tracking regressions completely manually has proven to be quite hard due to the
|
||||
distributed and loosely structured nature of Linux kernel development process.
|
||||
That's why the Linux kernel's regression tracker developed regzbot to facilitate
|
||||
the work, with the long term goal to automate regression tracking as much as
|
||||
possible for everyone involved.
|
||||
|
||||
Regzbot works by watching for replies to reports of tracked regressions.
|
||||
Additionally, it's looking out for posted or committed patches referencing such
|
||||
reports with "Link:" tags; replies to such patch postings are tracked as well.
|
||||
Combined this data provides good insights into the current state of the fixing
|
||||
process.
|
||||
|
||||
How to see which regressions regzbot tracks currently?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
|
||||
|
||||
What kind of issues are supposed to be tracked by regzbot?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The bot is meant to track regressions, hence please don't involve regzbot for
|
||||
regular issues. But it's okay for the Linux kernel's regression tracker if you
|
||||
involve regzbot to track severe issues, like reports about hangs, corrupted
|
||||
data, or internal errors (Panic, Oops, BUG(), warning, ...).
|
||||
|
||||
How to change aspects of a tracked regression?
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
By using a 'regzbot command' in a direct or indirect reply to the mail with the
|
||||
report. The easiest way to do that: find the report in your "Sent" folder or the
|
||||
mailing list archive and reply to it using your mailer's "Reply-all" function.
|
||||
In that mail, use one of the following commands in a stand-alone paragraph (IOW:
|
||||
use blank lines to separate one or multiple of these commands from the rest of
|
||||
the mail's text).
|
||||
|
||||
* Update when the regression started to happen, for example after performing a
|
||||
bisection::
|
||||
|
||||
#regzbot introduced: 1f2e3d4c5d
|
||||
|
||||
* Set or update the title::
|
||||
|
||||
#regzbot title: foo
|
||||
|
||||
* Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
|
||||
the issue or a fix are discussed:::
|
||||
|
||||
#regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
|
||||
#regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
|
||||
|
||||
* Point to a place with further details of interest, like a mailing list post
|
||||
or a ticket in a bug tracker that are slightly related, but about a different
|
||||
topic::
|
||||
|
||||
#regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
|
||||
|
||||
* Mark a regression as invalid::
|
||||
|
||||
#regzbot invalid: wasn't a regression, problem has always existed
|
||||
|
||||
Regzbot supports a few other commands primarily used by developers or people
|
||||
tracking regressions. They and more details about the aforementioned regzbot
|
||||
commands can be found in the `getting started guide
|
||||
<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and
|
||||
the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
|
||||
for regzbot.
|
||||
|
||||
..
|
||||
end-of-content
|
||||
..
|
||||
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||
please use "The Linux kernel developers" for author attribution and link
|
||||
this as source:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst
|
||||
..
|
||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||
is available under CC-BY-4.0, as versions of this text that were processed
|
||||
(for example by the kernel's build system) might contain content taken from
|
||||
files which use a more restrictive license.
|
||||
@@ -595,65 +595,33 @@ Documentation/admin-guide/kernel-parameters.rst).
|
||||
numa_balancing
|
||||
==============
|
||||
|
||||
Enables/disables automatic page fault based NUMA memory
|
||||
balancing. Memory is moved automatically to nodes
|
||||
that access it often.
|
||||
Enables/disables and configures automatic page fault based NUMA memory
|
||||
balancing. Memory is moved automatically to nodes that access it often.
|
||||
The value to set can be the result of ORing the following:
|
||||
|
||||
Enables/disables automatic NUMA memory balancing. On NUMA machines, there
|
||||
is a performance penalty if remote memory is accessed by a CPU. When this
|
||||
feature is enabled the kernel samples what task thread is accessing memory
|
||||
by periodically unmapping pages and later trapping a page fault. At the
|
||||
time of the page fault, it is determined if the data being accessed should
|
||||
be migrated to a local memory node.
|
||||
= =================================
|
||||
0 NUMA_BALANCING_DISABLED
|
||||
1 NUMA_BALANCING_NORMAL
|
||||
2 NUMA_BALANCING_MEMORY_TIERING
|
||||
= =================================
|
||||
|
||||
Or NUMA_BALANCING_NORMAL to optimize page placement among different
|
||||
NUMA nodes to reduce remote accessing. On NUMA machines, there is a
|
||||
performance penalty if remote memory is accessed by a CPU. When this
|
||||
feature is enabled the kernel samples what task thread is accessing
|
||||
memory by periodically unmapping pages and later trapping a page
|
||||
fault. At the time of the page fault, it is determined if the data
|
||||
being accessed should be migrated to a local memory node.
|
||||
|
||||
The unmapping of pages and trapping faults incur additional overhead that
|
||||
ideally is offset by improved memory locality but there is no universal
|
||||
guarantee. If the target workload is already bound to NUMA nodes then this
|
||||
feature should be disabled. Otherwise, if the system overhead from the
|
||||
feature is too high then the rate the kernel samples for NUMA hinting
|
||||
faults may be controlled by the `numa_balancing_scan_period_min_ms,
|
||||
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
|
||||
numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
|
||||
|
||||
|
||||
numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
|
||||
===============================================================================================================================
|
||||
|
||||
|
||||
Automatic NUMA balancing scans tasks address space and unmaps pages to
|
||||
detect if pages are properly placed or if the data should be migrated to a
|
||||
memory node local to where the task is running. Every "scan delay" the task
|
||||
scans the next "scan size" number of pages in its address space. When the
|
||||
end of the address space is reached the scanner restarts from the beginning.
|
||||
|
||||
In combination, the "scan delay" and "scan size" determine the scan rate.
|
||||
When "scan delay" decreases, the scan rate increases. The scan delay and
|
||||
hence the scan rate of every task is adaptive and depends on historical
|
||||
behaviour. If pages are properly placed then the scan delay increases,
|
||||
otherwise the scan delay decreases. The "scan size" is not adaptive but
|
||||
the higher the "scan size", the higher the scan rate.
|
||||
|
||||
Higher scan rates incur higher system overhead as page faults must be
|
||||
trapped and potentially data must be migrated. However, the higher the scan
|
||||
rate, the more quickly a tasks memory is migrated to a local node if the
|
||||
workload pattern changes and minimises performance impact due to remote
|
||||
memory accesses. These sysctls control the thresholds for scan delays and
|
||||
the number of pages scanned.
|
||||
|
||||
``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
|
||||
scan a tasks virtual memory. It effectively controls the maximum scanning
|
||||
rate for each task.
|
||||
|
||||
``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
|
||||
when it initially forks.
|
||||
|
||||
``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
|
||||
scan a tasks virtual memory. It effectively controls the minimum scanning
|
||||
rate for each task.
|
||||
|
||||
``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
|
||||
scanned for a given scan.
|
||||
feature should be disabled.
|
||||
|
||||
Or NUMA_BALANCING_MEMORY_TIERING to optimize page placement among
|
||||
different types of memory (represented as different NUMA nodes) to
|
||||
place the hot pages in the fast memory. This is implemented based on
|
||||
unmapping and page fault too.
|
||||
|
||||
oops_all_cpu_backtrace
|
||||
======================
|
||||
@@ -795,6 +763,8 @@ bit 1 print system memory info
|
||||
bit 2 print timer info
|
||||
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
|
||||
bit 4 print ftrace buffer
|
||||
bit 5 print all printk messages in buffer
|
||||
bit 6 print all CPUs backtrace (if available in the arch)
|
||||
===== ============================================
|
||||
|
||||
So for example to print tasks and memory info on panic, user can::
|
||||
@@ -1029,23 +999,17 @@ This is a directory, with the following entries:
|
||||
* ``poolsize``: the entropy pool size, in bits;
|
||||
|
||||
* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
|
||||
number of seconds between urandom pool reseeding).
|
||||
number of seconds between urandom pool reseeding). This file is
|
||||
writable for compatibility purposes, but writing to it has no effect
|
||||
on any RNG behavior.
|
||||
|
||||
* ``uuid``: a UUID generated every time this is retrieved (this can
|
||||
thus be used to generate UUIDs at will);
|
||||
|
||||
* ``write_wakeup_threshold``: when the entropy count drops below this
|
||||
(as a number of bits), processes waiting to write to ``/dev/random``
|
||||
are woken up.
|
||||
|
||||
If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
|
||||
defined, these additional entries are present:
|
||||
|
||||
* ``add_interrupt_avg_cycles``: the average number of cycles between
|
||||
interrupts used to feed the pool;
|
||||
|
||||
* ``add_interrupt_avg_deviation``: the standard deviation seen on the
|
||||
number of cycles between interrupts used to feed the pool.
|
||||
are woken up. This file is writable for compatibility purposes, but
|
||||
writing to it has no effect on any RNG behavior.
|
||||
|
||||
|
||||
randomize_va_space
|
||||
|
||||
@@ -365,6 +365,15 @@ new netns has been created.
|
||||
|
||||
Default : 0 (for compatibility reasons)
|
||||
|
||||
txrehash
|
||||
--------
|
||||
|
||||
Controls default hash rethink behaviour on listening socket when SO_TXREHASH
|
||||
option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
|
||||
|
||||
If set to 1 (default), hash rethink is performed on listening socket.
|
||||
If set to 0, hash rethink is not performed.
|
||||
|
||||
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
||||
----------------------------------------------------------
|
||||
|
||||
|
||||
@@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and
|
||||
is relevant to all public releases of the AArch64 Linux kernel.
|
||||
|
||||
The AArch64 exception model is made up of a number of exception levels
|
||||
(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
|
||||
counterpart. EL2 is the hypervisor level and exists only in non-secure
|
||||
mode. EL3 is the highest priority level and exists only in secure mode.
|
||||
(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure
|
||||
counterpart. EL2 is the hypervisor level, EL3 is the highest priority
|
||||
level and exists only in secure mode. Both are architecturally optional.
|
||||
|
||||
For the purposes of this document, we will use the term `boot loader`
|
||||
simply to define all software that executes on the CPU(s) before control
|
||||
@@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met:
|
||||
|
||||
All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
|
||||
IRQ and FIQ).
|
||||
The CPU must be in either EL2 (RECOMMENDED in order to have access to
|
||||
the virtualisation extensions) or non-secure EL1.
|
||||
The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order
|
||||
to have access to the virtualisation extensions), or in EL1.
|
||||
|
||||
- Caches, MMUs
|
||||
|
||||
|
||||
@@ -259,6 +259,11 @@ HWCAP2_RPRES
|
||||
|
||||
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
|
||||
|
||||
HWCAP2_MTE3
|
||||
|
||||
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
|
||||
by Documentation/arm64/memory-tagging-extension.rst.
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
|
||||
@@ -76,6 +76,9 @@ configurable behaviours:
|
||||
with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
|
||||
address is unknown).
|
||||
|
||||
- *Asymmetric* - Reads are handled as for synchronous mode while writes
|
||||
are handled as for asynchronous mode.
|
||||
|
||||
The user can select the above modes, per thread, using the
|
||||
``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
|
||||
contains any number of the following values in the ``PR_MTE_TCF_MASK``
|
||||
@@ -91,8 +94,9 @@ mode is specified, the program will run in that mode. If multiple
|
||||
modes are specified, the mode is selected as described in the "Per-CPU
|
||||
preferred tag checking modes" section below.
|
||||
|
||||
The current tag check fault mode can be read using the
|
||||
``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
|
||||
The current tag check fault configuration can be read using the
|
||||
``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If
|
||||
multiple modes were requested then all will be reported.
|
||||
|
||||
Tag checking can also be disabled for a user thread by setting the
|
||||
``PSTATE.TCO`` bit with ``MSR TCO, #1``.
|
||||
@@ -139,18 +143,25 @@ tag checking mode as the CPU's preferred tag checking mode.
|
||||
|
||||
The preferred tag checking mode for each CPU is controlled by
|
||||
``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
|
||||
privileged user may write the value ``async`` or ``sync``. The default
|
||||
preferred mode for each CPU is ``async``.
|
||||
privileged user may write the value ``async``, ``sync`` or ``asymm``. The
|
||||
default preferred mode for each CPU is ``async``.
|
||||
|
||||
To allow a program to potentially run in the CPU's preferred tag
|
||||
checking mode, the user program may set multiple tag check fault mode
|
||||
bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
|
||||
flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
|
||||
mode is in the task's set of provided tag checking modes (this will
|
||||
always be the case at present because the kernel only supports two
|
||||
tag checking modes, but future kernels may support more modes), that
|
||||
mode will be selected. Otherwise, one of the modes in the task's mode
|
||||
set will be selected in a currently unspecified manner.
|
||||
flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
|
||||
modes are requested then asymmetric mode may also be selected by the
|
||||
kernel. If the CPU's preferred tag checking mode is in the task's set
|
||||
of provided tag checking modes, that mode will be selected. Otherwise,
|
||||
one of the modes in the task's mode will be selected by the kernel
|
||||
from the task's mode set using the preference order:
|
||||
|
||||
1. Asynchronous
|
||||
2. Asymmetric
|
||||
3. Synchronous
|
||||
|
||||
Note that there is no way for userspace to request multiple modes and
|
||||
also disable asymmetric mode.
|
||||
|
||||
Initial process state
|
||||
---------------------
|
||||
@@ -213,6 +224,29 @@ address ABI control and MTE configuration of a process as per the
|
||||
Documentation/arm64/tagged-address-abi.rst and above. The corresponding
|
||||
``regset`` is 1 element of 8 bytes (``sizeof(long))``).
|
||||
|
||||
Core dump support
|
||||
-----------------
|
||||
|
||||
The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
|
||||
in the core file as additional ``PT_AARCH64_MEMTAG_MTE`` segments. The
|
||||
program header for such segment is defined as:
|
||||
|
||||
:``p_type``: ``PT_AARCH64_MEMTAG_MTE``
|
||||
:``p_flags``: 0
|
||||
:``p_offset``: segment file offset
|
||||
:``p_vaddr``: segment virtual address, same as the corresponding
|
||||
``PT_LOAD`` segment
|
||||
:``p_paddr``: 0
|
||||
:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32``
|
||||
(two 4-bit tags cover 32 bytes of memory)
|
||||
:``p_memsz``: segment size in memory, same as the corresponding
|
||||
``PT_LOAD`` segment
|
||||
:``p_align``: 0
|
||||
|
||||
The tags are stored in the core file at ``p_offset`` as two 4-bit tags
|
||||
in a byte. With the tag granule of 16 bytes, a 4K page requires 128
|
||||
bytes in the core file.
|
||||
|
||||
Example of correct usage
|
||||
========================
|
||||
|
||||
|
||||
@@ -136,7 +136,7 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
|
||||
| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Cavium | ThunderX GICv3 | #38539 | N/A |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
@@ -189,6 +189,9 @@ stable kernels.
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Qualcomm Tech. | Kryo4xx Silver | N/A | ARM64_ERRATUM_1024718 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Qualcomm Tech. | Kryo4xx Gold | N/A | ARM64_ERRATUM_1286807 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
| Fujitsu | A64FX | E#010001 | FUJITSU_ERRATUM_010001 |
|
||||
+----------------+-----------------+-----------------+-----------------------------+
|
||||
|
||||
@@ -130,14 +130,13 @@ denoting a range of code via ``SYM_*_START/END`` annotations.
|
||||
In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
|
||||
and ``ENDPROC`` macros.
|
||||
|
||||
* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
|
||||
who decided to have two or more names for one function. The typical use is::
|
||||
* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can
|
||||
be used to define multiple names for a function. The typical use is::
|
||||
|
||||
SYM_FUNC_START_ALIAS(__memset)
|
||||
SYM_FUNC_START(memset)
|
||||
SYM_FUNC_START(__memset)
|
||||
... asm insns ...
|
||||
SYM_FUNC_END(memset)
|
||||
SYM_FUNC_END_ALIAS(__memset)
|
||||
SYN_FUNC_END(__memset)
|
||||
SYM_FUNC_ALIAS(memset, __memset)
|
||||
|
||||
In this example, one can call ``__memset`` or ``memset`` with the same
|
||||
result, except the debug information for the instructions is generated to
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,4 +7,4 @@ This file documents the sysfs file ``block/<disk>/capability``.
|
||||
``capability`` is a bitfield, printed in hexadecimal, indicating which
|
||||
capabilities a specific block device supports:
|
||||
|
||||
.. kernel-doc:: include/linux/genhd.h
|
||||
.. kernel-doc:: include/linux/blkdev.h
|
||||
|
||||
@@ -8,7 +8,6 @@ Block
|
||||
:maxdepth: 1
|
||||
|
||||
bfq-iosched
|
||||
biodoc
|
||||
biovecs
|
||||
blk-mq
|
||||
capability
|
||||
|
||||
@@ -658,7 +658,7 @@ when:
|
||||
|
||||
.. Links
|
||||
.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
|
||||
.. _netdev-FAQ: ../networking/netdev-FAQ.rst
|
||||
.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
|
||||
.. _selftests:
|
||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
|
||||
.. _Documentation/dev-tools/kselftest.rst:
|
||||
|
||||
117
Documentation/bpf/bpf_prog_run.rst
Normal file
117
Documentation/bpf/bpf_prog_run.rst
Normal file
@@ -0,0 +1,117 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===================================
|
||||
Running BPF programs from userspace
|
||||
===================================
|
||||
|
||||
This document describes the ``BPF_PROG_RUN`` facility for running BPF programs
|
||||
from userspace.
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
:depth: 2
|
||||
|
||||
|
||||
Overview
|
||||
--------
|
||||
|
||||
The ``BPF_PROG_RUN`` command can be used through the ``bpf()`` syscall to
|
||||
execute a BPF program in the kernel and return the results to userspace. This
|
||||
can be used to unit test BPF programs against user-supplied context objects, and
|
||||
as way to explicitly execute programs in the kernel for their side effects. The
|
||||
command was previously named ``BPF_PROG_TEST_RUN``, and both constants continue
|
||||
to be defined in the UAPI header, aliased to the same value.
|
||||
|
||||
The ``BPF_PROG_RUN`` command can be used to execute BPF programs of the
|
||||
following types:
|
||||
|
||||
- ``BPF_PROG_TYPE_SOCKET_FILTER``
|
||||
- ``BPF_PROG_TYPE_SCHED_CLS``
|
||||
- ``BPF_PROG_TYPE_SCHED_ACT``
|
||||
- ``BPF_PROG_TYPE_XDP``
|
||||
- ``BPF_PROG_TYPE_SK_LOOKUP``
|
||||
- ``BPF_PROG_TYPE_CGROUP_SKB``
|
||||
- ``BPF_PROG_TYPE_LWT_IN``
|
||||
- ``BPF_PROG_TYPE_LWT_OUT``
|
||||
- ``BPF_PROG_TYPE_LWT_XMIT``
|
||||
- ``BPF_PROG_TYPE_LWT_SEG6LOCAL``
|
||||
- ``BPF_PROG_TYPE_FLOW_DISSECTOR``
|
||||
- ``BPF_PROG_TYPE_STRUCT_OPS``
|
||||
- ``BPF_PROG_TYPE_RAW_TRACEPOINT``
|
||||
- ``BPF_PROG_TYPE_SYSCALL``
|
||||
|
||||
When using the ``BPF_PROG_RUN`` command, userspace supplies an input context
|
||||
object and (for program types operating on network packets) a buffer containing
|
||||
the packet data that the BPF program will operate on. The kernel will then
|
||||
execute the program and return the results to userspace. Note that programs will
|
||||
not have any side effects while being run in this mode; in particular, packets
|
||||
will not actually be redirected or dropped, the program return code will just be
|
||||
returned to userspace. A separate mode for live execution of XDP programs is
|
||||
provided, documented separately below.
|
||||
|
||||
Running XDP programs in "live frame mode"
|
||||
-----------------------------------------
|
||||
|
||||
The ``BPF_PROG_RUN`` command has a separate mode for running live XDP programs,
|
||||
which can be used to execute XDP programs in a way where packets will actually
|
||||
be processed by the kernel after the execution of the XDP program as if they
|
||||
arrived on a physical interface. This mode is activated by setting the
|
||||
``BPF_F_TEST_XDP_LIVE_FRAMES`` flag when supplying an XDP program to
|
||||
``BPF_PROG_RUN``.
|
||||
|
||||
The live packet mode is optimised for high performance execution of the supplied
|
||||
XDP program many times (suitable for, e.g., running as a traffic generator),
|
||||
which means the semantics are not quite as straight-forward as the regular test
|
||||
run mode. Specifically:
|
||||
|
||||
- When executing an XDP program in live frame mode, the result of the execution
|
||||
will not be returned to userspace; instead, the kernel will perform the
|
||||
operation indicated by the program's return code (drop the packet, redirect
|
||||
it, etc). For this reason, setting the ``data_out`` or ``ctx_out`` attributes
|
||||
in the syscall parameters when running in this mode will be rejected. In
|
||||
addition, not all failures will be reported back to userspace directly;
|
||||
specifically, only fatal errors in setup or during execution (like memory
|
||||
allocation errors) will halt execution and return an error. If an error occurs
|
||||
in packet processing, like a failure to redirect to a given interface,
|
||||
execution will continue with the next repetition; these errors can be detected
|
||||
via the same trace points as for regular XDP programs.
|
||||
|
||||
- Userspace can supply an ifindex as part of the context object, just like in
|
||||
the regular (non-live) mode. The XDP program will be executed as though the
|
||||
packet arrived on this interface; i.e., the ``ingress_ifindex`` of the context
|
||||
object will point to that interface. Furthermore, if the XDP program returns
|
||||
``XDP_PASS``, the packet will be injected into the kernel networking stack as
|
||||
though it arrived on that ifindex, and if it returns ``XDP_TX``, the packet
|
||||
will be transmitted *out* of that same interface. Do note, though, that
|
||||
because the program execution is not happening in driver context, an
|
||||
``XDP_TX`` is actually turned into the same action as an ``XDP_REDIRECT`` to
|
||||
that same interface (i.e., it will only work if the driver has support for the
|
||||
``ndo_xdp_xmit`` driver op).
|
||||
|
||||
- When running the program with multiple repetitions, the execution will happen
|
||||
in batches. The batch size defaults to 64 packets (which is same as the
|
||||
maximum NAPI receive batch size), but can be specified by userspace through
|
||||
the ``batch_size`` parameter, up to a maximum of 256 packets. For each batch,
|
||||
the kernel executes the XDP program repeatedly, each invocation getting a
|
||||
separate copy of the packet data. For each repetition, if the program drops
|
||||
the packet, the data page is immediately recycled (see below). Otherwise, the
|
||||
packet is buffered until the end of the batch, at which point all packets
|
||||
buffered this way during the batch are transmitted at once.
|
||||
|
||||
- When setting up the test run, the kernel will initialise a pool of memory
|
||||
pages of the same size as the batch size. Each memory page will be initialised
|
||||
with the initial packet data supplied by userspace at ``BPF_PROG_RUN``
|
||||
invocation. When possible, the pages will be recycled on future program
|
||||
invocations, to improve performance. Pages will generally be recycled a full
|
||||
batch at a time, except when a packet is dropped (by return code or because
|
||||
of, say, a redirection error), in which case that page will be recycled
|
||||
immediately. If a packet ends up being passed to the regular networking stack
|
||||
(because the XDP program returns ``XDP_PASS``, or because it ends up being
|
||||
redirected to an interface that injects it into the stack), the page will be
|
||||
released and a new one will be allocated when the pool is empty.
|
||||
|
||||
When recycling, the page content is not rewritten; only the packet boundary
|
||||
pointers (``data``, ``data_end`` and ``data_meta``) in the context object will
|
||||
be reset to the original values. This means that if a program rewrites the
|
||||
packet contents, it has to be prepared to see either the original content or
|
||||
the modified version on subsequent invocations.
|
||||
@@ -503,6 +503,19 @@ valid index (starting from 0) pointing to a member or an argument.
|
||||
* ``info.vlen``: 0
|
||||
* ``type``: the type with ``btf_type_tag`` attribute
|
||||
|
||||
Currently, ``BTF_KIND_TYPE_TAG`` is only emitted for pointer types.
|
||||
It has the following btf type chain:
|
||||
::
|
||||
|
||||
ptr -> [type_tag]*
|
||||
-> [const | volatile | restrict | typedef]*
|
||||
-> base_type
|
||||
|
||||
Basically, a pointer type points to zero or more
|
||||
type_tag, then zero or more const/volatile/restrict/typedef
|
||||
and finally the base type. The base type is one of
|
||||
int, ptr, array, struct, union, enum, func_proto and float types.
|
||||
|
||||
3. BTF Kernel API
|
||||
=================
|
||||
|
||||
@@ -565,18 +578,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
|
||||
In libbpf, the map can be defined with extra annotation like below:
|
||||
::
|
||||
|
||||
struct bpf_map_def SEC("maps") btf_map = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(int),
|
||||
.value_size = sizeof(struct ipv_counts),
|
||||
.max_entries = 4,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct ipv_counts);
|
||||
__uint(max_entries, 4);
|
||||
} btf_map SEC(".maps");
|
||||
|
||||
Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
|
||||
value types for the map. During ELF parsing, libbpf is able to extract
|
||||
key/value type_id's and assign them to BPF_MAP_CREATE attributes
|
||||
automatically.
|
||||
During ELF parsing, libbpf is able to extract key/value type_id's and assign
|
||||
them to BPF_MAP_CREATE attributes automatically.
|
||||
|
||||
.. _BPF_Prog_Load:
|
||||
|
||||
@@ -824,13 +834,12 @@ structure has bitfields. For example, for the following map,::
|
||||
___A b1:4;
|
||||
enum A b2:4;
|
||||
};
|
||||
struct bpf_map_def SEC("maps") tmpmap = {
|
||||
.type = BPF_MAP_TYPE_ARRAY,
|
||||
.key_size = sizeof(__u32),
|
||||
.value_size = sizeof(struct tmp_t),
|
||||
.max_entries = 1,
|
||||
};
|
||||
BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
|
||||
struct {
|
||||
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||
__type(key, int);
|
||||
__type(value, struct tmp_t);
|
||||
__uint(max_entries, 1);
|
||||
} tmpmap SEC(".maps");
|
||||
|
||||
bpftool is able to pretty print like below:
|
||||
::
|
||||
|
||||
@@ -21,6 +21,7 @@ that goes into great technical depth about the BPF Architecture.
|
||||
helpers
|
||||
programs
|
||||
maps
|
||||
bpf_prog_run
|
||||
classic_vs_extended.rst
|
||||
bpf_licensing
|
||||
test_debug
|
||||
|
||||
@@ -22,7 +22,13 @@ necessary across calls.
|
||||
Instruction encoding
|
||||
====================
|
||||
|
||||
eBPF uses 64-bit instructions with the following encoding:
|
||||
eBPF has two instruction encodings:
|
||||
|
||||
* the basic instruction encoding, which uses 64 bits to encode an instruction
|
||||
* the wide instruction encoding, which appends a second 64-bit immediate value
|
||||
(imm64) after the basic instruction for a total of 128 bits.
|
||||
|
||||
The basic instruction encoding looks as follows:
|
||||
|
||||
============= ======= =============== ==================== ============
|
||||
32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
|
||||
@@ -82,9 +88,9 @@ BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
|
||||
otherwise identical operations.
|
||||
The code field encodes the operation as below:
|
||||
|
||||
======== ===== ==========================
|
||||
======== ===== =================================================
|
||||
code value description
|
||||
======== ===== ==========================
|
||||
======== ===== =================================================
|
||||
BPF_ADD 0x00 dst += src
|
||||
BPF_SUB 0x10 dst -= src
|
||||
BPF_MUL 0x20 dst \*= src
|
||||
@@ -98,8 +104,8 @@ The code field encodes the operation as below:
|
||||
BPF_XOR 0xa0 dst ^= src
|
||||
BPF_MOV 0xb0 dst = src
|
||||
BPF_ARSH 0xc0 sign extending shift right
|
||||
BPF_END 0xd0 endianness conversion
|
||||
======== ===== ==========================
|
||||
BPF_END 0xd0 byte swap operations (see separate section below)
|
||||
======== ===== =================================================
|
||||
|
||||
BPF_ADD | BPF_X | BPF_ALU means::
|
||||
|
||||
@@ -118,6 +124,42 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
|
||||
src_reg = src_reg ^ imm32
|
||||
|
||||
|
||||
Byte swap instructions
|
||||
----------------------
|
||||
|
||||
The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
|
||||
code field of ``BPF_END``.
|
||||
|
||||
The byte swap instructions instructions operate on the destination register
|
||||
only and do not use a separate source register or immediate value.
|
||||
|
||||
The 1-bit source operand field in the opcode is used to to select what byte
|
||||
order the operation convert from or to:
|
||||
|
||||
========= ===== =================================================
|
||||
source value description
|
||||
========= ===== =================================================
|
||||
BPF_TO_LE 0x00 convert between host byte order and little endian
|
||||
BPF_TO_BE 0x08 convert between host byte order and big endian
|
||||
========= ===== =================================================
|
||||
|
||||
The imm field encodes the width of the swap operations. The following widths
|
||||
are supported: 16, 32 and 64.
|
||||
|
||||
Examples:
|
||||
|
||||
``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means::
|
||||
|
||||
dst_reg = htole16(dst_reg)
|
||||
|
||||
``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means::
|
||||
|
||||
dst_reg = htobe64(dst_reg)
|
||||
|
||||
``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and
|
||||
``BPF_TO_LE`` respetively.
|
||||
|
||||
|
||||
Jump instructions
|
||||
-----------------
|
||||
|
||||
@@ -176,63 +218,96 @@ The mode modifier is one of:
|
||||
============= ===== ====================================
|
||||
mode modifier value description
|
||||
============= ===== ====================================
|
||||
BPF_IMM 0x00 used for 64-bit mov
|
||||
BPF_ABS 0x20 legacy BPF packet access
|
||||
BPF_IND 0x40 legacy BPF packet access
|
||||
BPF_MEM 0x60 all normal load and store operations
|
||||
BPF_IMM 0x00 64-bit immediate instructions
|
||||
BPF_ABS 0x20 legacy BPF packet access (absolute)
|
||||
BPF_IND 0x40 legacy BPF packet access (indirect)
|
||||
BPF_MEM 0x60 regular load and store operations
|
||||
BPF_ATOMIC 0xc0 atomic operations
|
||||
============= ===== ====================================
|
||||
|
||||
BPF_MEM | <size> | BPF_STX means::
|
||||
|
||||
Regular load and store operations
|
||||
---------------------------------
|
||||
|
||||
The ``BPF_MEM`` mode modifier is used to encode regular load and store
|
||||
instructions that transfer data between a register and memory.
|
||||
|
||||
``BPF_MEM | <size> | BPF_STX`` means::
|
||||
|
||||
*(size *) (dst_reg + off) = src_reg
|
||||
|
||||
BPF_MEM | <size> | BPF_ST means::
|
||||
``BPF_MEM | <size> | BPF_ST`` means::
|
||||
|
||||
*(size *) (dst_reg + off) = imm32
|
||||
|
||||
BPF_MEM | <size> | BPF_LDX means::
|
||||
``BPF_MEM | <size> | BPF_LDX`` means::
|
||||
|
||||
dst_reg = *(size *) (src_reg + off)
|
||||
|
||||
Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
|
||||
Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``.
|
||||
|
||||
Atomic operations
|
||||
-----------------
|
||||
|
||||
eBPF includes atomic operations, which use the immediate field for extra
|
||||
encoding::
|
||||
Atomic operations are operations that operate on memory and can not be
|
||||
interrupted or corrupted by other access to the same memory region
|
||||
by other eBPF programs or means outside of this specification.
|
||||
|
||||
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
|
||||
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
|
||||
All atomic operations supported by eBPF are encoded as store operations
|
||||
that use the ``BPF_ATOMIC`` mode modifier as follows:
|
||||
|
||||
The basic atomic operations supported are::
|
||||
* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
|
||||
* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations
|
||||
* 8-bit and 16-bit wide atomic operations are not supported.
|
||||
|
||||
BPF_ADD
|
||||
BPF_AND
|
||||
BPF_OR
|
||||
BPF_XOR
|
||||
The imm field is used to encode the actual atomic operation.
|
||||
Simple atomic operation use a subset of the values defined to encode
|
||||
arithmetic operations in the imm field to encode the atomic operation:
|
||||
|
||||
Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
|
||||
memory location addresed by ``dst_reg + off`` is atomically modified, with
|
||||
``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
|
||||
immediate, then these operations also overwrite ``src_reg`` with the
|
||||
value that was in memory before it was modified.
|
||||
======== ===== ===========
|
||||
imm value description
|
||||
======== ===== ===========
|
||||
BPF_ADD 0x00 atomic add
|
||||
BPF_OR 0x40 atomic or
|
||||
BPF_AND 0x50 atomic and
|
||||
BPF_XOR 0xa0 atomic xor
|
||||
======== ===== ===========
|
||||
|
||||
The more special operations are::
|
||||
|
||||
BPF_XCHG
|
||||
``BPF_ATOMIC | BPF_W | BPF_STX`` with imm = BPF_ADD means::
|
||||
|
||||
This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
|
||||
off``. ::
|
||||
*(u32 *)(dst_reg + off16) += src_reg
|
||||
|
||||
BPF_CMPXCHG
|
||||
``BPF_ATOMIC | BPF_DW | BPF_STX`` with imm = BPF ADD means::
|
||||
|
||||
This atomically compares the value addressed by ``dst_reg + off`` with
|
||||
``R0``. If they match it is replaced with ``src_reg``. In either case, the
|
||||
value that was there before is zero-extended and loaded back to ``R0``.
|
||||
*(u64 *)(dst_reg + off16) += src_reg
|
||||
|
||||
Note that 1 and 2 byte atomic operations are not supported.
|
||||
``BPF_XADD`` is a deprecated name for ``BPF_ATOMIC | BPF_ADD``.
|
||||
|
||||
In addition to the simple atomic operations, there also is a modifier and
|
||||
two complex atomic operations:
|
||||
|
||||
=========== ================ ===========================
|
||||
imm value description
|
||||
=========== ================ ===========================
|
||||
BPF_FETCH 0x01 modifier: return old value
|
||||
BPF_XCHG 0xe0 | BPF_FETCH atomic exchange
|
||||
BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange
|
||||
=========== ================ ===========================
|
||||
|
||||
The ``BPF_FETCH`` modifier is optional for simple atomic operations, and
|
||||
always set for the complex atomic operations. If the ``BPF_FETCH`` flag
|
||||
is set, then the operation also overwrites ``src_reg`` with the value that
|
||||
was in memory before it was modified.
|
||||
|
||||
The ``BPF_XCHG`` operation atomically exchanges ``src_reg`` with the value
|
||||
addressed by ``dst_reg + off``.
|
||||
|
||||
The ``BPF_CMPXCHG`` operation atomically compares the value addressed by
|
||||
``dst_reg + off`` with ``R0``. If they match, the value addressed by
|
||||
``dst_reg + off`` is replaced with ``src_reg``. In either case, the
|
||||
value that was at ``dst_reg + off`` before the operation is zero-extended
|
||||
and loaded back to ``R0``.
|
||||
|
||||
Clang can generate atomic instructions by default when ``-mcpu=v3`` is
|
||||
enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
|
||||
@@ -240,40 +315,52 @@ Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable
|
||||
the atomics features, while keeping a lower ``-mcpu`` version, you can use
|
||||
``-Xclang -target-feature -Xclang +alu32``.
|
||||
|
||||
You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
|
||||
referring to the exclusive-add operation encoded when the immediate field is
|
||||
zero.
|
||||
64-bit immediate instructions
|
||||
-----------------------------
|
||||
|
||||
16-byte instructions
|
||||
--------------------
|
||||
Instructions with the ``BPF_IMM`` mode modifier use the wide instruction
|
||||
encoding for an extra imm64 value.
|
||||
|
||||
eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
|
||||
of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
|
||||
instruction that loads 64-bit immediate value into a dst_reg.
|
||||
There is currently only one such instruction.
|
||||
|
||||
Packet access instructions
|
||||
--------------------------
|
||||
``BPF_LD | BPF_DW | BPF_IMM`` means::
|
||||
|
||||
eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
|
||||
(BPF_IND | <size> | BPF_LD) which are used to access packet data.
|
||||
dst_reg = imm64
|
||||
|
||||
They had to be carried over from classic BPF to have strong performance of
|
||||
socket filters running in eBPF interpreter. These instructions can only
|
||||
be used when interpreter context is a pointer to ``struct sk_buff`` and
|
||||
have seven implicit operands. Register R6 is an implicit input that must
|
||||
contain pointer to sk_buff. Register R0 is an implicit output which contains
|
||||
the data fetched from the packet. Registers R1-R5 are scratch registers
|
||||
and must not be used to store the data across BPF_ABS | BPF_LD or
|
||||
BPF_IND | BPF_LD instructions.
|
||||
|
||||
These instructions have implicit program exit condition as well. When
|
||||
eBPF program is trying to access the data beyond the packet boundary,
|
||||
the interpreter will abort the execution of the program. JIT compilers
|
||||
therefore must preserve this property. src_reg and imm32 fields are
|
||||
explicit inputs to these instructions.
|
||||
Legacy BPF Packet access instructions
|
||||
-------------------------------------
|
||||
|
||||
For example, BPF_IND | BPF_W | BPF_LD means::
|
||||
eBPF has special instructions for access to packet data that have been
|
||||
carried over from classic BPF to retain the performance of legacy socket
|
||||
filters running in the eBPF interpreter.
|
||||
|
||||
The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and
|
||||
``BPF_IND | <size> | BPF_LD``.
|
||||
|
||||
These instructions are used to access packet data and can only be used when
|
||||
the program context is a pointer to networking packet. ``BPF_ABS``
|
||||
accesses packet data at an absolute offset specified by the immediate data
|
||||
and ``BPF_IND`` access packet data at an offset that includes the value of
|
||||
a register in addition to the immediate data.
|
||||
|
||||
These instructions have seven implicit operands:
|
||||
|
||||
* Register R6 is an implicit input that must contain pointer to a
|
||||
struct sk_buff.
|
||||
* Register R0 is an implicit output which contains the data fetched from
|
||||
the packet.
|
||||
* Registers R1-R5 are scratch registers that are clobbered after a call to
|
||||
``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
|
||||
|
||||
These instructions have an implicit program exit condition as well. When an
|
||||
eBPF program is trying to access the data beyond the packet boundary, the
|
||||
program execution will be aborted.
|
||||
|
||||
``BPF_ABS | BPF_W | BPF_LD`` means::
|
||||
|
||||
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + imm32))
|
||||
|
||||
``BPF_IND | BPF_W | BPF_LD`` means::
|
||||
|
||||
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
|
||||
|
||||
and R1 - R5 are clobbered.
|
||||
|
||||
@@ -329,7 +329,7 @@ Program with unreachable instructions::
|
||||
BPF_EXIT_INSN(),
|
||||
};
|
||||
|
||||
Error:
|
||||
Error::
|
||||
|
||||
unreachable insn 1
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ Getting started quick
|
||||
- Compile and install kernel and modules, reboot.
|
||||
|
||||
- You need the udftools package (pktsetup, mkudffs, cdrwtool).
|
||||
Download from http://sourceforge.net/projects/linux-udf/
|
||||
Download from https://github.com/pali/udftools
|
||||
|
||||
- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
|
||||
as appropriate)::
|
||||
@@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface
|
||||
|
||||
Since Linux 2.6.20, the pktcdvd module has a sysfs interface
|
||||
and can be controlled by it. For example the "pktcdvd" tool uses
|
||||
this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
|
||||
this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
|
||||
|
||||
"pktcdvd" works similar to "pktsetup", e.g.::
|
||||
|
||||
|
||||
@@ -409,135 +409,25 @@ latex_elements = {
|
||||
|
||||
# Additional stuff for the LaTeX preamble.
|
||||
'preamble': '''
|
||||
% Prevent column squeezing of tabulary.
|
||||
\\setlength{\\tymin}{20em}
|
||||
% Use some font with UTF-8 support with XeLaTeX
|
||||
\\usepackage{fontspec}
|
||||
\\setsansfont{DejaVu Sans}
|
||||
\\setromanfont{DejaVu Serif}
|
||||
\\setmonofont{DejaVu Sans Mono}
|
||||
% Adjust \\headheight for fancyhdr
|
||||
\\addtolength{\\headheight}{1.6pt}
|
||||
\\addtolength{\\topmargin}{-1.6pt}
|
||||
''',
|
||||
}
|
||||
|
||||
# Translations have Asian (CJK) characters which are only displayed if
|
||||
# xeCJK is used
|
||||
|
||||
latex_elements['preamble'] += '''
|
||||
\\IfFontExistsTF{Noto Sans CJK SC}{
|
||||
% This is needed for translations
|
||||
\\usepackage{xeCJK}
|
||||
\\IfFontExistsTF{Noto Serif CJK SC}{
|
||||
\\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant]
|
||||
}{
|
||||
\\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
}
|
||||
\\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
\\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
|
||||
% CJK Language-specific font choices
|
||||
\\IfFontExistsTF{Noto Serif CJK SC}{
|
||||
\\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
|
||||
\\IfFontExistsTF{Noto Serif CJK TC}{
|
||||
\\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
|
||||
\\IfFontExistsTF{Noto Serif CJK KR}{
|
||||
\\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
|
||||
\\IfFontExistsTF{Noto Serif CJK JP}{
|
||||
\\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
|
||||
}{
|
||||
\\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
|
||||
}
|
||||
\\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
|
||||
\\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
|
||||
% Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
|
||||
\\providecommand{\\onehalfspacing}{}
|
||||
\\providecommand{\\singlespacing}{}
|
||||
% Define custom macros to on/off CJK
|
||||
\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing}
|
||||
\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing}
|
||||
\\newcommand{\\kerneldocBeginSC}{%
|
||||
\\begingroup%
|
||||
\\scmain%
|
||||
}
|
||||
\\newcommand{\\kerneldocEndSC}{\\endgroup}
|
||||
\\newcommand{\\kerneldocBeginTC}{%
|
||||
\\begingroup%
|
||||
\\tcmain%
|
||||
\\renewcommand{\\CJKrmdefault}{TCserif}%
|
||||
\\renewcommand{\\CJKsfdefault}{TCsans}%
|
||||
\\renewcommand{\\CJKttdefault}{TCmono}%
|
||||
}
|
||||
\\newcommand{\\kerneldocEndTC}{\\endgroup}
|
||||
\\newcommand{\\kerneldocBeginKR}{%
|
||||
\\begingroup%
|
||||
\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
|
||||
\\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
|
||||
\\krmain%
|
||||
\\renewcommand{\\CJKrmdefault}{KRserif}%
|
||||
\\renewcommand{\\CJKsfdefault}{KRsans}%
|
||||
\\renewcommand{\\CJKttdefault}{KRmono}%
|
||||
\\xeCJKsetup{CJKspace = true} % For inter-phrase space
|
||||
}
|
||||
\\newcommand{\\kerneldocEndKR}{\\endgroup}
|
||||
\\newcommand{\\kerneldocBeginJP}{%
|
||||
\\begingroup%
|
||||
\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
|
||||
\\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
|
||||
\\jpmain%
|
||||
\\renewcommand{\\CJKrmdefault}{JPserif}%
|
||||
\\renewcommand{\\CJKsfdefault}{JPsans}%
|
||||
\\renewcommand{\\CJKttdefault}{JPmono}%
|
||||
}
|
||||
\\newcommand{\\kerneldocEndJP}{\\endgroup}
|
||||
% Single spacing in literal blocks
|
||||
\\fvset{baselinestretch=1}
|
||||
% To customize \\sphinxtableofcontents
|
||||
\\usepackage{etoolbox}
|
||||
% Inactivate CJK after tableofcontents
|
||||
\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
|
||||
}{ % No CJK font found
|
||||
% Custom macros to on/off CJK (Dummy)
|
||||
\\newcommand{\\kerneldocCJKon}{}
|
||||
\\newcommand{\\kerneldocCJKoff}{}
|
||||
\\newcommand{\\kerneldocBeginSC}{}
|
||||
\\newcommand{\\kerneldocEndSC}{}
|
||||
\\newcommand{\\kerneldocBeginTC}{}
|
||||
\\newcommand{\\kerneldocEndTC}{}
|
||||
\\newcommand{\\kerneldocBeginKR}{}
|
||||
\\newcommand{\\kerneldocEndKR}{}
|
||||
\\newcommand{\\kerneldocBeginJP}{}
|
||||
\\newcommand{\\kerneldocEndJP}{}
|
||||
}
|
||||
'''
|
||||
|
||||
# Fix reference escape troubles with Sphinx 1.4.x
|
||||
if major == 1:
|
||||
latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
|
||||
|
||||
|
||||
# Load kerneldoc specific LaTeX settings
|
||||
latex_elements['preamble'] += '''
|
||||
% Load kerneldoc specific LaTeX settings
|
||||
\\input{kerneldoc-preamble.sty}
|
||||
'''
|
||||
|
||||
# With Sphinx 1.6, it is possible to change the Bg color directly
|
||||
# by using:
|
||||
# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
|
||||
@@ -599,6 +489,11 @@ for fn in os.listdir('.'):
|
||||
# If false, no module index is generated.
|
||||
#latex_domain_indices = True
|
||||
|
||||
# Additional LaTeX stuff to be copied to build directory
|
||||
latex_additional_files = [
|
||||
'sphinx/kerneldoc-preamble.sty',
|
||||
]
|
||||
|
||||
|
||||
# -- Options for manual page output ---------------------------------------
|
||||
|
||||
|
||||
279
Documentation/core-api/entry.rst
Normal file
279
Documentation/core-api/entry.rst
Normal file
@@ -0,0 +1,279 @@
|
||||
Entry/exit handling for exceptions, interrupts, syscalls and KVM
|
||||
================================================================
|
||||
|
||||
All transitions between execution domains require state updates which are
|
||||
subject to strict ordering constraints. State updates are required for the
|
||||
following:
|
||||
|
||||
* Lockdep
|
||||
* RCU / Context tracking
|
||||
* Preemption counter
|
||||
* Tracing
|
||||
* Time accounting
|
||||
|
||||
The update order depends on the transition type and is explained below in
|
||||
the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular
|
||||
exceptions`_, `NMI and NMI-like exceptions`_.
|
||||
|
||||
Non-instrumentable code - noinstr
|
||||
---------------------------------
|
||||
|
||||
Most instrumentation facilities depend on RCU, so intrumentation is prohibited
|
||||
for entry code before RCU starts watching and exit code after RCU stops
|
||||
watching. In addition, many architectures must save and restore register state,
|
||||
which means that (for example) a breakpoint in the breakpoint entry code would
|
||||
overwrite the debug registers of the initial breakpoint.
|
||||
|
||||
Such code must be marked with the 'noinstr' attribute, placing that code into a
|
||||
special section inaccessible to instrumentation and debug facilities. Some
|
||||
functions are partially instrumentable, which is handled by marking them
|
||||
noinstr and using instrumentation_begin() and instrumentation_end() to flag the
|
||||
instrumentable ranges of code:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void entry(void)
|
||||
{
|
||||
handle_entry(); // <-- must be 'noinstr' or '__always_inline'
|
||||
...
|
||||
|
||||
instrumentation_begin();
|
||||
handle_context(); // <-- instrumentable code
|
||||
instrumentation_end();
|
||||
|
||||
...
|
||||
handle_exit(); // <-- must be 'noinstr' or '__always_inline'
|
||||
}
|
||||
|
||||
This allows verification of the 'noinstr' restrictions via objtool on
|
||||
supported architectures.
|
||||
|
||||
Invoking non-instrumentable functions from instrumentable context has no
|
||||
restrictions and is useful to protect e.g. state switching which would
|
||||
cause malfunction if instrumented.
|
||||
|
||||
All non-instrumentable entry/exit code sections before and after the RCU
|
||||
state transitions must run with interrupts disabled.
|
||||
|
||||
Syscalls
|
||||
--------
|
||||
|
||||
Syscall-entry code starts in assembly code and calls out into low-level C code
|
||||
after establishing low-level architecture-specific state and stack frames. This
|
||||
low-level C code must not be instrumented. A typical syscall handling function
|
||||
invoked from low-level assembly code looks like this:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void syscall(struct pt_regs *regs, int nr)
|
||||
{
|
||||
arch_syscall_enter(regs);
|
||||
nr = syscall_enter_from_user_mode(regs, nr);
|
||||
|
||||
instrumentation_begin();
|
||||
if (!invoke_syscall(regs, nr) && nr != -1)
|
||||
result_reg(regs) = __sys_ni_syscall(regs);
|
||||
instrumentation_end();
|
||||
|
||||
syscall_exit_to_user_mode(regs);
|
||||
}
|
||||
|
||||
syscall_enter_from_user_mode() first invokes enter_from_user_mode() which
|
||||
establishes state in the following order:
|
||||
|
||||
* Lockdep
|
||||
* RCU / Context tracking
|
||||
* Tracing
|
||||
|
||||
and then invokes the various entry work functions like ptrace, seccomp, audit,
|
||||
syscall tracing, etc. After all that is done, the instrumentable invoke_syscall
|
||||
function can be invoked. The instrumentable code section then ends, after which
|
||||
syscall_exit_to_user_mode() is invoked.
|
||||
|
||||
syscall_exit_to_user_mode() handles all work which needs to be done before
|
||||
returning to user space like tracing, audit, signals, task work etc. After
|
||||
that it invokes exit_to_user_mode() which again handles the state
|
||||
transition in the reverse order:
|
||||
|
||||
* Tracing
|
||||
* RCU / Context tracking
|
||||
* Lockdep
|
||||
|
||||
syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also
|
||||
available as fine grained subfunctions in cases where the architecture code
|
||||
has to do extra work between the various steps. In such cases it has to
|
||||
ensure that enter_from_user_mode() is called first on entry and
|
||||
exit_to_user_mode() is called last on exit.
|
||||
|
||||
Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
|
||||
to print a warning.
|
||||
|
||||
KVM
|
||||
---
|
||||
|
||||
Entering or exiting guest mode is very similar to syscalls. From the host
|
||||
kernel point of view the CPU goes off into user space when entering the
|
||||
guest and returns to the kernel on exit.
|
||||
|
||||
kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
|
||||
and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
|
||||
The state operations have the same ordering.
|
||||
|
||||
Task work handling is done separately for guest at the boundary of the
|
||||
vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
|
||||
the work handled on return to user space.
|
||||
|
||||
Do not nest KVM entry/exit transitions because doing so is nonsensical.
|
||||
|
||||
Interrupts and regular exceptions
|
||||
---------------------------------
|
||||
|
||||
Interrupts entry and exit handling is slightly more complex than syscalls
|
||||
and KVM transitions.
|
||||
|
||||
If an interrupt is raised while the CPU executes in user space, the entry
|
||||
and exit handling is exactly the same as for syscalls.
|
||||
|
||||
If the interrupt is raised while the CPU executes in kernel space the entry and
|
||||
exit handling is slightly different. RCU state is only updated when the
|
||||
interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will
|
||||
already be watching. Lockdep and tracing have to be updated unconditionally.
|
||||
|
||||
irqentry_enter() and irqentry_exit() provide the implementation for this.
|
||||
|
||||
The architecture-specific part looks similar to syscall handling:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void interrupt(struct pt_regs *regs, int nr)
|
||||
{
|
||||
arch_interrupt_enter(regs);
|
||||
state = irqentry_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
|
||||
irq_enter_rcu();
|
||||
invoke_irq_handler(regs, nr);
|
||||
irq_exit_rcu();
|
||||
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_exit(regs, state);
|
||||
}
|
||||
|
||||
Note that the invocation of the actual interrupt handler is within a
|
||||
irq_enter_rcu() and irq_exit_rcu() pair.
|
||||
|
||||
irq_enter_rcu() updates the preemption count which makes in_hardirq()
|
||||
return true, handles NOHZ tick state and interrupt time accounting. This
|
||||
means that up to the point where irq_enter_rcu() is invoked in_hardirq()
|
||||
returns false.
|
||||
|
||||
irq_exit_rcu() handles interrupt time accounting, undoes the preemption
|
||||
count update and eventually handles soft interrupts and NOHZ tick state.
|
||||
|
||||
In theory, the preemption count could be updated in irqentry_enter(). In
|
||||
practice, deferring this update to irq_enter_rcu() allows the preemption-count
|
||||
code to be traced, while also maintaining symmetry with irq_exit_rcu() and
|
||||
irqentry_exit(), which are described in the next paragraph. The only downside
|
||||
is that the early entry code up to irq_enter_rcu() must be aware that the
|
||||
preemption count has not yet been updated with the HARDIRQ_OFFSET state.
|
||||
|
||||
Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count
|
||||
before it handles soft interrupts, whose handlers must run in BH context rather
|
||||
than irq-disabled context. In addition, irqentry_exit() might schedule, which
|
||||
also requires that HARDIRQ_OFFSET has been removed from the preemption count.
|
||||
|
||||
Even though interrupt handlers are expected to run with local interrupts
|
||||
disabled, interrupt nesting is common from an entry/exit perspective. For
|
||||
example, softirq handling happens within an irqentry_{enter,exit}() block with
|
||||
local interrupts enabled. Also, although uncommon, nothing prevents an
|
||||
interrupt handler from re-enabling interrupts.
|
||||
|
||||
Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it
|
||||
runs with local interrupts disabled. But NMIs can happen anytime, and a lot of
|
||||
the entry code is shared between the two.
|
||||
|
||||
NMI and NMI-like exceptions
|
||||
---------------------------
|
||||
|
||||
NMIs and NMI-like exceptions (machine checks, double faults, debug
|
||||
interrupts, etc.) can hit any context and must be extra careful with
|
||||
the state.
|
||||
|
||||
State changes for debug exceptions and machine-check exceptions depend on
|
||||
whether these exceptions happened in user-space (breakpoints or watchpoints) or
|
||||
in kernel mode (code patching). From user-space, they are treated like
|
||||
interrupts, while from kernel mode they are treated like NMIs.
|
||||
|
||||
NMIs and other NMI-like exceptions handle state transitions without
|
||||
distinguishing between user-mode and kernel-mode origin.
|
||||
|
||||
The state update on entry is handled in irqentry_nmi_enter() which updates
|
||||
state in the following order:
|
||||
|
||||
* Preemption counter
|
||||
* Lockdep
|
||||
* RCU / Context tracking
|
||||
* Tracing
|
||||
|
||||
The exit counterpart irqentry_nmi_exit() does the reverse operation in the
|
||||
reverse order.
|
||||
|
||||
Note that the update of the preemption counter has to be the first
|
||||
operation on enter and the last operation on exit. The reason is that both
|
||||
lockdep and RCU rely on in_nmi() returning true in this case. The
|
||||
preemption count modification in the NMI entry/exit case must not be
|
||||
traced.
|
||||
|
||||
Architecture-specific code looks like this:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void nmi(struct pt_regs *regs)
|
||||
{
|
||||
arch_nmi_enter(regs);
|
||||
state = irqentry_nmi_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
nmi_handler(regs);
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_nmi_exit(regs);
|
||||
}
|
||||
|
||||
and for e.g. a debug exception it can look like this:
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
noinstr void debug(struct pt_regs *regs)
|
||||
{
|
||||
arch_nmi_enter(regs);
|
||||
|
||||
debug_regs = save_debug_regs();
|
||||
|
||||
if (user_mode(regs)) {
|
||||
state = irqentry_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
user_mode_debug_handler(regs, debug_regs);
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_exit(regs, state);
|
||||
} else {
|
||||
state = irqentry_nmi_enter(regs);
|
||||
|
||||
instrumentation_begin();
|
||||
kernel_mode_debug_handler(regs, debug_regs);
|
||||
instrumentation_end();
|
||||
|
||||
irqentry_nmi_exit(regs, state);
|
||||
}
|
||||
}
|
||||
|
||||
There is no combined irqentry_nmi_if_kernel() function available as the
|
||||
above cannot be handled in an exception-agnostic way.
|
||||
|
||||
NMIs can happen in any context. For example, an NMI-like exception triggered
|
||||
while handling an NMI. So NMI entry code has to be reentrant and state updates
|
||||
need to handle nesting.
|
||||
@@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel.
|
||||
timekeeping
|
||||
errseq
|
||||
|
||||
Low level entry and exit
|
||||
========================
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
entry
|
||||
|
||||
Concurrency primitives
|
||||
======================
|
||||
|
||||
|
||||
@@ -58,15 +58,30 @@ Virtually Contiguous Mappings
|
||||
File Mapping and Page Cache
|
||||
===========================
|
||||
|
||||
.. kernel-doc:: mm/readahead.c
|
||||
:export:
|
||||
Filemap
|
||||
-------
|
||||
|
||||
.. kernel-doc:: mm/filemap.c
|
||||
:export:
|
||||
|
||||
Readahead
|
||||
---------
|
||||
|
||||
.. kernel-doc:: mm/readahead.c
|
||||
:doc: Readahead Overview
|
||||
|
||||
.. kernel-doc:: mm/readahead.c
|
||||
:export:
|
||||
|
||||
Writeback
|
||||
---------
|
||||
|
||||
.. kernel-doc:: mm/page-writeback.c
|
||||
:export:
|
||||
|
||||
Truncate
|
||||
--------
|
||||
|
||||
.. kernel-doc:: mm/truncate.c
|
||||
:export:
|
||||
|
||||
|
||||
@@ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct
|
||||
pages* array, and the function then pins pages by incrementing each by a special
|
||||
value: GUP_PIN_COUNTING_BIAS.
|
||||
|
||||
For huge pages (and in fact, any compound page of more than 2 pages), the
|
||||
GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting
|
||||
is achieved, by using the 3rd struct page in the compound page. A new struct
|
||||
page field, hpage_pinned_refcount, has been added in order to support this.
|
||||
For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead,
|
||||
an exact form of pin counting is achieved, by using the 2nd struct page
|
||||
in the compound page. A new struct page field, compound_pincount, has
|
||||
been added in order to support this.
|
||||
|
||||
This approach for compound pages avoids the counting upper limit problems that
|
||||
are discussed below. Those limitations would have been aggravated severely by
|
||||
huge pages, because each tail page adds a refcount to the head page. And in
|
||||
fact, testing revealed that, without a separate hpage_pinned_refcount field,
|
||||
fact, testing revealed that, without a separate compound_pincount field,
|
||||
page overflows were seen in some huge page stress tests.
|
||||
|
||||
This also means that huge pages and compound pages (of order > 1) do not suffer
|
||||
This also means that huge pages and compound pages do not suffer
|
||||
from the false positives problem that is mentioned below.::
|
||||
|
||||
Function
|
||||
@@ -264,9 +264,9 @@ place.)
|
||||
Other diagnostics
|
||||
=================
|
||||
|
||||
dump_page() has been enhanced slightly, to handle these new counting fields, and
|
||||
to better report on compound pages in general. Specifically, for compound pages
|
||||
with order > 1, the exact (hpage_pinned_refcount) pincount is reported.
|
||||
dump_page() has been enhanced slightly, to handle these new counting
|
||||
fields, and to better report on compound pages in general. Specifically,
|
||||
for compound pages, the exact (compound_pincount) pincount is reported.
|
||||
|
||||
References
|
||||
==========
|
||||
|
||||
@@ -315,11 +315,15 @@ indeed the normal API is implemented in terms of the advanced API. The
|
||||
advanced API is only available to modules with a GPL-compatible license.
|
||||
|
||||
The advanced API is based around the xa_state. This is an opaque data
|
||||
structure which you declare on the stack using the XA_STATE()
|
||||
macro. This macro initialises the xa_state ready to start walking
|
||||
around the XArray. It is used as a cursor to maintain the position
|
||||
in the XArray and let you compose various operations together without
|
||||
having to restart from the top every time.
|
||||
structure which you declare on the stack using the XA_STATE() macro.
|
||||
This macro initialises the xa_state ready to start walking around the
|
||||
XArray. It is used as a cursor to maintain the position in the XArray
|
||||
and let you compose various operations together without having to restart
|
||||
from the top every time. The contents of the xa_state are protected by
|
||||
the rcu_read_lock() or the xas_lock(). If you need to drop whichever of
|
||||
those locks is protecting your state and tree, you must call xas_pause()
|
||||
so that future calls do not rely on the parts of the state which were
|
||||
left unprotected.
|
||||
|
||||
The xa_state is also used to store errors. You can call
|
||||
xas_error() to retrieve the error. All operations check whether
|
||||
|
||||
@@ -30,7 +30,7 @@ Software tag-based KASAN mode is only supported in Clang.
|
||||
|
||||
The hardware KASAN mode (#3) relies on hardware to perform the checks but
|
||||
still requires a compiler version that supports memory tagging instructions.
|
||||
This mode is supported in GCC 10+ and Clang 11+.
|
||||
This mode is supported in GCC 10+ and Clang 12+.
|
||||
|
||||
Both software KASAN modes work with SLUB and SLAB memory allocators,
|
||||
while the hardware tag-based KASAN currently only supports SLUB.
|
||||
@@ -206,6 +206,9 @@ additional boot parameters that allow disabling KASAN or controlling features:
|
||||
Asymmetric mode: a bad access is detected synchronously on reads and
|
||||
asynchronously on writes.
|
||||
|
||||
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
||||
allocations (default: ``on``).
|
||||
|
||||
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
||||
traces collection (default: ``on``).
|
||||
|
||||
@@ -279,8 +282,8 @@ Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Software tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
Software tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and vmalloc memory.
|
||||
|
||||
Hardware tag-based KASAN
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@@ -303,8 +306,8 @@ Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||
reserved to tag freed memory regions.
|
||||
|
||||
Hardware tag-based KASAN currently only supports tagging of slab and page_alloc
|
||||
memory.
|
||||
Hardware tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||
and VM_ALLOC-based vmalloc memory.
|
||||
|
||||
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
|
||||
will not be enabled. In this case, all KASAN boot parameters are ignored.
|
||||
@@ -319,6 +322,8 @@ checking gets disabled.
|
||||
Shadow memory
|
||||
-------------
|
||||
|
||||
The contents of this section are only applicable to software KASAN modes.
|
||||
|
||||
The kernel maps memory in several different parts of the address space.
|
||||
The range of kernel virtual addresses is large: there is not enough real
|
||||
memory to support a real shadow region for every address that could be
|
||||
@@ -349,7 +354,7 @@ CONFIG_KASAN_VMALLOC
|
||||
|
||||
With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
|
||||
cost of greater memory usage. Currently, this is supported on x86,
|
||||
riscv, s390, and powerpc.
|
||||
arm64, riscv, s390, and powerpc.
|
||||
|
||||
This works by hooking into vmalloc and vmap and dynamically
|
||||
allocating real shadow memory to back the mappings.
|
||||
|
||||
@@ -41,6 +41,18 @@ guarded by KFENCE. The default is configurable via the Kconfig option
|
||||
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
|
||||
disables KFENCE.
|
||||
|
||||
The sample interval controls a timer that sets up KFENCE allocations. By
|
||||
default, to keep the real sample interval predictable, the normal timer also
|
||||
causes CPU wake-ups when the system is completely idle. This may be undesirable
|
||||
on power-constrained systems. The boot parameter ``kfence.deferrable=1``
|
||||
instead switches to a "deferrable" timer which does not force CPU wake-ups on
|
||||
idle systems, at the risk of unpredictable sample intervals. The default is
|
||||
configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
|
||||
|
||||
.. warning::
|
||||
The KUnit test suite is very likely to fail when using a deferrable timer
|
||||
since it currently causes very unpredictable sample intervals.
|
||||
|
||||
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
||||
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
||||
255), the number of available guarded objects can be controlled. Each object
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
========================================
|
||||
The Kernel Test Anything Protocol (KTAP)
|
||||
========================================
|
||||
===================================================
|
||||
The Kernel Test Anything Protocol (KTAP), version 1
|
||||
===================================================
|
||||
|
||||
TAP, or the Test Anything Protocol is a format for specifying test results used
|
||||
by a number of projects. It's website and specification are found at this `link
|
||||
@@ -68,7 +68,7 @@ Test case result lines
|
||||
Test case result lines indicate the final status of a test.
|
||||
They are required and must have the format:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
<result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
|
||||
|
||||
@@ -117,32 +117,32 @@ separator.
|
||||
|
||||
Example result lines include:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
ok 1 test_case_name
|
||||
|
||||
The test "test_case_name" passed.
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
not ok 1 test_case_name
|
||||
|
||||
The test "test_case_name" failed.
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
ok 1 test # SKIP necessary dependency unavailable
|
||||
|
||||
The test "test" was SKIPPED with the diagnostic message "necessary dependency
|
||||
unavailable".
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
not ok 1 test # TIMEOUT 30 seconds
|
||||
|
||||
The test "test" timed out, with diagnostic data "30 seconds".
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
ok 5 check return code # rcode=0
|
||||
|
||||
@@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of
|
||||
the four formats for lines described above. This is allowed, however, they will
|
||||
not influence the status of the tests.
|
||||
|
||||
This is an important difference from TAP. Kernel tests may print messages
|
||||
to the system console or a log file. Both of these destinations may contain
|
||||
messages either from unrelated kernel or userspace activity, or kernel
|
||||
messages from non-test code that is invoked by the test. The kernel code
|
||||
invoked by the test likely is not aware that a test is in progress and
|
||||
thus can not print the message as a diagnostic message.
|
||||
|
||||
Nested tests
|
||||
------------
|
||||
|
||||
@@ -186,13 +193,16 @@ starting with another KTAP version line and test plan, and end with the overall
|
||||
result. If one of the subtests fail, for example, the parent test should also
|
||||
fail.
|
||||
|
||||
Additionally, all result lines in a subtest should be indented. One level of
|
||||
Additionally, all lines in a subtest should be indented. One level of
|
||||
indentation is two spaces: " ". The indentation should begin at the version
|
||||
line and should end before the parent test's result line.
|
||||
|
||||
"Unknown lines" are not considered to be lines in a subtest and thus are
|
||||
allowed to be either indented or not indented.
|
||||
|
||||
An example of a test with two nested subtests:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
KTAP version 1
|
||||
1..1
|
||||
@@ -205,7 +215,7 @@ An example of a test with two nested subtests:
|
||||
|
||||
An example format with multiple levels of nested testing:
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
KTAP version 1
|
||||
1..2
|
||||
@@ -224,10 +234,15 @@ An example format with multiple levels of nested testing:
|
||||
Major differences between TAP and KTAP
|
||||
--------------------------------------
|
||||
|
||||
Note the major differences between the TAP and KTAP specification:
|
||||
- yaml and json are not recommended in diagnostic messages
|
||||
- TODO directive not recognized
|
||||
- KTAP allows for an arbitrary number of tests to be nested
|
||||
================================================== ========= ===============
|
||||
Feature TAP KTAP
|
||||
================================================== ========= ===============
|
||||
yaml and json in diagnosic message ok not recommended
|
||||
TODO directive ok not recognized
|
||||
allows an arbitrary number of tests to be nested no yes
|
||||
"Unknown lines" are in category of "Anything else" yes no
|
||||
"Unknown lines" are incorrect allowed
|
||||
================================================== ========= ===============
|
||||
|
||||
The TAP14 specification does permit nested tests, but instead of using another
|
||||
nested version line, uses a line of the form
|
||||
@@ -235,7 +250,7 @@ nested version line, uses a line of the form
|
||||
|
||||
Example KTAP output
|
||||
--------------------
|
||||
.. code-block::
|
||||
.. code-block:: none
|
||||
|
||||
KTAP version 1
|
||||
1..1
|
||||
|
||||
@@ -26,10 +26,7 @@ The fundamental unit in KUnit is the test case. The KUnit test cases are
|
||||
grouped into KUnit suites. A KUnit test case is a function with type
|
||||
signature ``void (*)(struct kunit *test)``.
|
||||
These test case functions are wrapped in a struct called
|
||||
``struct kunit_case``. For code, see:
|
||||
|
||||
.. kernel-doc:: include/kunit/test.h
|
||||
:identifiers: kunit_case
|
||||
struct kunit_case.
|
||||
|
||||
.. note:
|
||||
``generate_params`` is optional for non-parameterized tests.
|
||||
@@ -152,18 +149,12 @@ Parameterized Tests
|
||||
Each KUnit parameterized test is associated with a collection of
|
||||
parameters. The test is invoked multiple times, once for each parameter
|
||||
value and the parameter is stored in the ``param_value`` field.
|
||||
The test case includes a ``KUNIT_CASE_PARAM()`` macro that accepts a
|
||||
The test case includes a KUNIT_CASE_PARAM() macro that accepts a
|
||||
generator function.
|
||||
The generator function is passed the previous parameter and returns the next
|
||||
parameter. It also provides a macro to generate common-case generators based on
|
||||
arrays.
|
||||
|
||||
For code, see:
|
||||
|
||||
.. kernel-doc:: include/kunit/test.h
|
||||
:identifiers: KUNIT_ARRAY_PARAM
|
||||
|
||||
|
||||
kunit_tool (Command Line Test Harness)
|
||||
======================================
|
||||
|
||||
|
||||
@@ -41,13 +41,18 @@ or ``VFAT_FS``. To run ``FAT_KUNIT_TEST``, the ``.kunitconfig`` has:
|
||||
CONFIG_MSDOS_FS=y
|
||||
CONFIG_FAT_KUNIT_TEST=y
|
||||
|
||||
1. A good starting point for the ``.kunitconfig``, is the KUnit default
|
||||
config. Run the command:
|
||||
1. A good starting point for the ``.kunitconfig`` is the KUnit default config.
|
||||
You can generate it by running:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
cd $PATH_TO_LINUX_REPO
|
||||
cp tools/testing/kunit/configs/default.config .kunitconfig
|
||||
tools/testing/kunit/kunit.py config
|
||||
cat .kunit/.kunitconfig
|
||||
|
||||
.. note ::
|
||||
``.kunitconfig`` lives in the ``--build_dir`` used by kunit.py, which is
|
||||
``.kunit`` by default.
|
||||
|
||||
.. note ::
|
||||
You may want to remove CONFIG_KUNIT_ALL_TESTS from the ``.kunitconfig`` as
|
||||
|
||||
@@ -100,3 +100,5 @@ have already built it.
|
||||
|
||||
The optional make variable CF can be used to pass arguments to sparse. The
|
||||
build system passes -Wbitwise to sparse automatically.
|
||||
|
||||
Note that sparse defines the __CHECKER__ preprocessor symbol.
|
||||
|
||||
@@ -3,9 +3,10 @@ DT_DOC_CHECKER ?= dt-doc-validate
|
||||
DT_EXTRACT_EX ?= dt-extract-example
|
||||
DT_MK_SCHEMA ?= dt-mk-schema
|
||||
|
||||
DT_SCHEMA_LINT = $(shell which yamllint)
|
||||
DT_SCHEMA_LINT = $(shell which yamllint || \
|
||||
echo "warning: python package 'yamllint' not installed, skipping" >&2)
|
||||
|
||||
DT_SCHEMA_MIN_VERSION = 2021.2.1
|
||||
DT_SCHEMA_MIN_VERSION = 2022.3
|
||||
|
||||
PHONY += check_dtschema_version
|
||||
check_dtschema_version:
|
||||
@@ -24,18 +25,11 @@ quiet_cmd_extract_ex = DTEX $@
|
||||
$(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE
|
||||
$(call if_changed,extract_ex)
|
||||
|
||||
# Use full schemas when checking %.example.dts
|
||||
DT_TMP_SCHEMA := $(obj)/processed-schema-examples.json
|
||||
|
||||
find_all_cmd = find $(srctree)/$(src) \( -name '*.yaml' ! \
|
||||
-name 'processed-schema*' ! \
|
||||
-name '*.example.dt.yaml' \)
|
||||
-name 'processed-schema*' \)
|
||||
|
||||
ifeq ($(DT_SCHEMA_FILES),)
|
||||
find_cmd = $(find_all_cmd)
|
||||
else
|
||||
find_cmd = echo $(addprefix $(srctree)/, $(DT_SCHEMA_FILES))
|
||||
endif
|
||||
find_cmd = $(find_all_cmd) | grep -F "$(DT_SCHEMA_FILES)"
|
||||
CHK_DT_DOCS := $(shell $(find_cmd))
|
||||
|
||||
quiet_cmd_yamllint = LINT $(src)
|
||||
cmd_yamllint = ($(find_cmd) | \
|
||||
@@ -72,35 +66,14 @@ override DTC_FLAGS := \
|
||||
# Disable undocumented compatible checks until warning free
|
||||
override DT_CHECKER_FLAGS ?=
|
||||
|
||||
$(obj)/processed-schema-examples.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
|
||||
$(obj)/processed-schema.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
|
||||
$(call if_changed_rule,chkdt)
|
||||
|
||||
ifeq ($(DT_SCHEMA_FILES),)
|
||||
|
||||
# Unless DT_SCHEMA_FILES is specified, use the full schema for dtbs_check too.
|
||||
# Just copy processed-schema-examples.json
|
||||
|
||||
$(obj)/processed-schema.json: $(obj)/processed-schema-examples.json FORCE
|
||||
$(call if_changed,copy)
|
||||
|
||||
DT_SCHEMA_FILES = $(DT_DOCS)
|
||||
|
||||
else
|
||||
|
||||
# If DT_SCHEMA_FILES is specified, use it for processed-schema.json
|
||||
|
||||
$(obj)/processed-schema.json: DT_MK_SCHEMA_FLAGS := -u
|
||||
$(obj)/processed-schema.json: $(DT_SCHEMA_FILES) check_dtschema_version FORCE
|
||||
$(call if_changed,mk_schema)
|
||||
|
||||
endif
|
||||
|
||||
always-$(CHECK_DT_BINDING) += processed-schema-examples.json
|
||||
always-$(CHECK_DTBS) += processed-schema.json
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
|
||||
always-y += processed-schema.json
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dts, $(CHK_DT_DOCS))
|
||||
always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dtb, $(CHK_DT_DOCS))
|
||||
|
||||
# Hack: avoid 'Argument list too long' error for 'make clean'. Remove most of
|
||||
# build artifacts here before they are processed by scripts/Makefile.clean
|
||||
clean-files = $(shell find $(obj) \( -name '*.example.dts' -o \
|
||||
-name '*.example.dt.yaml' \) -delete 2>/dev/null)
|
||||
-name '*.example.dtb' \) -delete 2>/dev/null)
|
||||
|
||||
28
Documentation/devicetree/bindings/arm/airoha.yaml
Normal file
28
Documentation/devicetree/bindings/arm/airoha.yaml
Normal file
@@ -0,0 +1,28 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/arm/airoha.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Airoha SoC based Platforms Device Tree Bindings
|
||||
|
||||
maintainers:
|
||||
- Felix Fietkau <nbd@nbd.name>
|
||||
- John Crispin <john@phrozen.org>
|
||||
|
||||
description:
|
||||
Boards with an Airoha SoC shall have the following properties.
|
||||
|
||||
properties:
|
||||
$nodename:
|
||||
const: '/'
|
||||
compatible:
|
||||
oneOf:
|
||||
- items:
|
||||
- enum:
|
||||
- airoha,en7523-evb
|
||||
- const: airoha,en7523
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
...
|
||||
@@ -13,11 +13,45 @@ properties:
|
||||
$nodename:
|
||||
const: "/"
|
||||
compatible:
|
||||
oneOf:
|
||||
- description: Arria 5 boards
|
||||
items:
|
||||
- enum:
|
||||
- altr,socfpga-cyclone5
|
||||
- altr,socfpga-arria5
|
||||
- altr,socfpga-arria10
|
||||
- altr,socfpga-arria5-socdk
|
||||
- const: altr,socfpga-arria5
|
||||
- const: altr,socfpga
|
||||
|
||||
- description: Arria 10 boards
|
||||
items:
|
||||
- enum:
|
||||
- altr,socfpga-arria10-socdk
|
||||
- enclustra,mercury-aa1
|
||||
- const: altr,socfpga-arria10
|
||||
- const: altr,socfpga
|
||||
|
||||
- description: Cyclone 5 boards
|
||||
items:
|
||||
- enum:
|
||||
- altr,socfpga-cyclone5-socdk
|
||||
- denx,mcvevk
|
||||
- ebv,socrates
|
||||
- macnica,sodia
|
||||
- novtech,chameleon96
|
||||
- samtec,vining
|
||||
- terasic,de0-atlas
|
||||
- terasic,socfpga-cyclone5-sockit
|
||||
- const: altr,socfpga-cyclone5
|
||||
- const: altr,socfpga
|
||||
|
||||
- description: Stratix 10 boards
|
||||
items:
|
||||
- enum:
|
||||
- altr,socfpga-stratix10-socdk
|
||||
- const: altr,socfpga-stratix10
|
||||
|
||||
- description: SoCFPGA VT
|
||||
items:
|
||||
- const: altr,socfpga-vt
|
||||
- const: altr,socfpga
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
@@ -108,6 +108,7 @@ properties:
|
||||
- amlogic,p230
|
||||
- amlogic,p231
|
||||
- libretech,aml-s905d-pc
|
||||
- osmc,vero4k-plus
|
||||
- phicomm,n1
|
||||
- smartlabs,sml5442tw
|
||||
- videostrong,gxl-kii-pro
|
||||
@@ -170,9 +171,14 @@ properties:
|
||||
- description: Boards with the Amlogic Meson SM1 S905X3/D3/Y3 SoC
|
||||
items:
|
||||
- enum:
|
||||
- amediatech,x96-air
|
||||
- amediatech,x96-air-gbit
|
||||
- bananapi,bpi-m5
|
||||
- cyx,a95xf3-air
|
||||
- cyx,a95xf3-air-gbit
|
||||
- hardkernel,odroid-c4
|
||||
- hardkernel,odroid-hc4
|
||||
- haochuangyi,h96-max
|
||||
- khadas,vim3l
|
||||
- seirobotics,sei610
|
||||
- const: amlogic,sm1
|
||||
@@ -183,6 +189,12 @@ properties:
|
||||
- amlogic,ad401
|
||||
- const: amlogic,a1
|
||||
|
||||
- description: Boards with the Amlogic Meson S4 S805X2 SoC
|
||||
items:
|
||||
- enum:
|
||||
- amlogic,aq222
|
||||
- const: amlogic,s4
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
...
|
||||
|
||||
@@ -42,7 +42,7 @@ patternProperties:
|
||||
description:
|
||||
The individual power management domains within this controller
|
||||
type: object
|
||||
$ref: /power/apple,pmgr-pwrstate.yaml#
|
||||
$ref: /schemas/power/apple,pmgr-pwrstate.yaml#
|
||||
|
||||
required:
|
||||
- compatible
|
||||
|
||||
@@ -119,6 +119,11 @@ examples:
|
||||
arm,hbi = <0x249>;
|
||||
interrupt-parent = <&gic>;
|
||||
|
||||
gic: interrupt-controller {
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <3>;
|
||||
};
|
||||
|
||||
/*
|
||||
* This CCI node corresponds to a CCI component whose control
|
||||
* registers sits at address 0x000000002c090000.
|
||||
|
||||
@@ -175,6 +175,15 @@ properties:
|
||||
- const: microchip,lan9668
|
||||
- const: microchip,lan966
|
||||
|
||||
- description: Kontron KSwitch D10 MMT series
|
||||
items:
|
||||
- enum:
|
||||
- kontron,kswitch-d10-mmt-8g
|
||||
- kontron,kswitch-d10-mmt-6g-2gs
|
||||
- const: kontron,s1921
|
||||
- const: microchip,lan9668
|
||||
- const: microchip,lan966
|
||||
|
||||
- items:
|
||||
- enum:
|
||||
- atmel,sams70j19
|
||||
|
||||
@@ -51,6 +51,7 @@ properties:
|
||||
- raspberrypi,3-model-b-plus
|
||||
- raspberrypi,3-compute-module
|
||||
- raspberrypi,3-compute-module-lite
|
||||
- raspberrypi,model-zero-2-w
|
||||
- const: brcm,bcm2837
|
||||
|
||||
additionalProperties: true
|
||||
|
||||
@@ -62,8 +62,8 @@ Example 1 (ARM 64-bit, 6-cpu system, two clusters):
|
||||
The capacities-dmips-mhz or DMIPS/MHz values (scaled to 1024)
|
||||
are 1024 and 578 for cluster0 and cluster1. Further normalization
|
||||
is done by the operating system based on cluster0@max-freq=1100 and
|
||||
custer1@max-freq=850, final capacities are 1024 for cluster0 and
|
||||
446 for cluster1 (576*850/1100).
|
||||
cluster1@max-freq=850, final capacities are 1024 for cluster0 and
|
||||
446 for cluster1 (578*850/1100).
|
||||
|
||||
cpus {
|
||||
#address-cells = <2>;
|
||||
|
||||
@@ -173,6 +173,7 @@ properties:
|
||||
- nvidia,tegra194-carmel
|
||||
- qcom,krait
|
||||
- qcom,kryo
|
||||
- qcom,kryo250
|
||||
- qcom,kryo260
|
||||
- qcom,kryo280
|
||||
- qcom,kryo385
|
||||
@@ -232,17 +233,19 @@ properties:
|
||||
- ti,am4372
|
||||
|
||||
cpu-release-addr:
|
||||
$ref: '/schemas/types.yaml#/definitions/uint64'
|
||||
|
||||
oneOf:
|
||||
- $ref: '/schemas/types.yaml#/definitions/uint32'
|
||||
- $ref: '/schemas/types.yaml#/definitions/uint64'
|
||||
description:
|
||||
The DT specification defines this as 64-bit always, but some 32-bit Arm
|
||||
systems have used a 32-bit value which must be supported.
|
||||
Required for systems that have an "enable-method"
|
||||
property value of "spin-table".
|
||||
On ARM v8 64-bit systems must be a two cell
|
||||
property identifying a 64-bit zero-initialised
|
||||
memory location.
|
||||
|
||||
cpu-idle-states:
|
||||
$ref: '/schemas/types.yaml#/definitions/phandle-array'
|
||||
items:
|
||||
maxItems: 1
|
||||
description: |
|
||||
List of phandles to idle state nodes supported
|
||||
by this cpu (see ./idle-states.yaml).
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user