Merge drm/drm-next into drm-misc-next
Let's start the 5.19 development cycle. Signed-off-by: Maxime Ripard <maxime@cerno.tech>
This commit is contained in:
11
.mailmap
11
.mailmap
@@ -71,6 +71,7 @@ Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
|
|||||||
Brian Avery <b.avery@hp.com>
|
Brian Avery <b.avery@hp.com>
|
||||||
Brian King <brking@us.ibm.com>
|
Brian King <brking@us.ibm.com>
|
||||||
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
||||||
|
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
|
||||||
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
|
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
|
||||||
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
|
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
|
||||||
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
|
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
|
||||||
@@ -80,6 +81,9 @@ Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
|
|||||||
Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
|
Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
|
||||||
Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com>
|
Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com>
|
||||||
Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
|
Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
|
||||||
|
Christian Brauner <brauner@kernel.org> <christian@brauner.io>
|
||||||
|
Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
|
||||||
|
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
|
||||||
Christophe Ricard <christophe.ricard@gmail.com>
|
Christophe Ricard <christophe.ricard@gmail.com>
|
||||||
Christoph Hellwig <hch@lst.de>
|
Christoph Hellwig <hch@lst.de>
|
||||||
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
|
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
|
||||||
@@ -184,6 +188,8 @@ Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
|
|||||||
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
|
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
|
||||||
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.cz>
|
Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.cz>
|
||||||
Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
|
Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
|
||||||
|
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
|
||||||
|
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
|
||||||
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
|
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
|
||||||
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
|
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
|
||||||
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
|
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
|
||||||
@@ -207,12 +213,14 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
|
|||||||
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
||||||
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
||||||
Kenneth W Chen <kenneth.w.chen@intel.com>
|
Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||||
|
Kirill Tkhai <kirill.tkhai@openvz.org> <ktkhai@virtuozzo.com>
|
||||||
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
|
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
|
||||||
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
|
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
|
||||||
Koushik <raghavendra.koushik@neterion.com>
|
Koushik <raghavendra.koushik@neterion.com>
|
||||||
Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
|
Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
|
||||||
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
|
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
|
||||||
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
|
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
|
||||||
|
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
|
||||||
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
|
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
|
||||||
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
|
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
|
||||||
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
|
Leonardo Bras <leobras.c@gmail.com> <leonardo@linux.ibm.com>
|
||||||
@@ -330,6 +338,9 @@ Rémi Denis-Courmont <rdenis@simphalempin.com>
|
|||||||
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
|
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
|
||||||
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
|
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
|
||||||
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
|
Ricardo Ribalda <ribalda@kernel.org> <ricardo.ribalda@gmail.com>
|
||||||
|
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
|
||||||
|
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
|
||||||
|
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
|
||||||
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
|
Ross Zwisler <zwisler@kernel.org> <ross.zwisler@linux.intel.com>
|
||||||
Rudolf Marek <R.Marek@sh.cvut.cz>
|
Rudolf Marek <R.Marek@sh.cvut.cz>
|
||||||
Rui Saraiva <rmps@joel.ist.utl.pt>
|
Rui Saraiva <rmps@joel.ist.utl.pt>
|
||||||
|
|||||||
6
CREDITS
6
CREDITS
@@ -895,6 +895,12 @@ S: 3000 FORE Drive
|
|||||||
S: Warrendale, Pennsylvania 15086
|
S: Warrendale, Pennsylvania 15086
|
||||||
S: USA
|
S: USA
|
||||||
|
|
||||||
|
N: Ludovic Desroches
|
||||||
|
E: ludovic.desroches@microchip.com
|
||||||
|
D: Maintainer for ARM/Microchip (AT91) SoC support
|
||||||
|
D: Author of ADC, pinctrl, XDMA and SDHCI drivers for this platform
|
||||||
|
S: France
|
||||||
|
|
||||||
N: Martin Devera
|
N: Martin Devera
|
||||||
E: devik@cdi.cz
|
E: devik@cdi.cz
|
||||||
W: http://luxik.cdi.cz/~devik/qos/
|
W: http://luxik.cdi.cz/~devik/qos/
|
||||||
|
|||||||
10
Documentation/ABI/obsolete/procfs-i8k
Normal file
10
Documentation/ABI/obsolete/procfs-i8k
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
What: /proc/i8k
|
||||||
|
Date: November 2001
|
||||||
|
KernelVersion: 2.4.14
|
||||||
|
Contact: Pali Rohár <pali@kernel.org>
|
||||||
|
Description: Legacy interface for getting/setting sensor information like
|
||||||
|
fan speed, temperature, serial number, hotkey status etc
|
||||||
|
on Dell Laptops.
|
||||||
|
Since the driver is now using the standard hwmon sysfs interface,
|
||||||
|
the procfs interface is deprecated.
|
||||||
|
Users: https://github.com/vitorafsr/i8kutils
|
||||||
37
Documentation/ABI/removed/sysfs-mce
Normal file
37
Documentation/ABI/removed/sysfs-mce
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
||||||
|
Contact: Borislav Petkov <bp@suse.de>
|
||||||
|
Date: Dec, 2021
|
||||||
|
Description:
|
||||||
|
Unused and obsolete after the advent of recoverable machine
|
||||||
|
checks (see last sentence below) and those are present since
|
||||||
|
2010 (Nehalem).
|
||||||
|
|
||||||
|
Original description:
|
||||||
|
|
||||||
|
The entries appear for each CPU, but they are truly shared
|
||||||
|
between all CPUs.
|
||||||
|
|
||||||
|
Tolerance level. When a machine check exception occurs for a
|
||||||
|
non corrected machine check the kernel can take different
|
||||||
|
actions.
|
||||||
|
|
||||||
|
Since machine check exceptions can happen any time it is
|
||||||
|
sometimes risky for the kernel to kill a process because it
|
||||||
|
defies normal kernel locking rules. The tolerance level
|
||||||
|
configures how hard the kernel tries to recover even at some
|
||||||
|
risk of deadlock. Higher tolerant values trade potentially
|
||||||
|
better uptime with the risk of a crash or even corruption
|
||||||
|
(for tolerant >= 3).
|
||||||
|
|
||||||
|
== ===========================================================
|
||||||
|
0 always panic on uncorrected errors, log corrected errors
|
||||||
|
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
||||||
|
2 SIGBUS or log uncorrected errors, log corrected errors
|
||||||
|
3 never panic or SIGBUS, log all errors (for testing only)
|
||||||
|
== ===========================================================
|
||||||
|
|
||||||
|
Default: 1
|
||||||
|
|
||||||
|
Note this only makes a difference if the CPU allows recovery
|
||||||
|
from a machine check exception. Current x86 CPUs generally
|
||||||
|
do not.
|
||||||
@@ -155,6 +155,55 @@ Description:
|
|||||||
last zone of the device which may be smaller.
|
last zone of the device which may be smaller.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/crypto/
|
||||||
|
Date: February 2022
|
||||||
|
Contact: linux-block@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
The presence of this subdirectory of /sys/block/<disk>/queue/
|
||||||
|
indicates that the device supports inline encryption. This
|
||||||
|
subdirectory contains files which describe the inline encryption
|
||||||
|
capabilities of the device. For more information about inline
|
||||||
|
encryption, refer to Documentation/block/inline-encryption.rst.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/crypto/max_dun_bits
|
||||||
|
Date: February 2022
|
||||||
|
Contact: linux-block@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
[RO] This file shows the maximum length, in bits, of data unit
|
||||||
|
numbers accepted by the device in inline encryption requests.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/crypto/modes/<mode>
|
||||||
|
Date: February 2022
|
||||||
|
Contact: linux-block@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
[RO] For each crypto mode (i.e., encryption/decryption
|
||||||
|
algorithm) the device supports with inline encryption, a file
|
||||||
|
will exist at this location. It will contain a hexadecimal
|
||||||
|
number that is a bitmask of the supported data unit sizes, in
|
||||||
|
bytes, for that crypto mode.
|
||||||
|
|
||||||
|
Currently, the crypto modes that may be supported are:
|
||||||
|
|
||||||
|
* AES-256-XTS
|
||||||
|
* AES-128-CBC-ESSIV
|
||||||
|
* Adiantum
|
||||||
|
|
||||||
|
For example, if a device supports AES-256-XTS inline encryption
|
||||||
|
with data unit sizes of 512 and 4096 bytes, the file
|
||||||
|
/sys/block/<disk>/queue/crypto/modes/AES-256-XTS will exist and
|
||||||
|
will contain "0x1200".
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/crypto/num_keyslots
|
||||||
|
Date: February 2022
|
||||||
|
Contact: linux-block@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
[RO] This file shows the number of keyslots the device has for
|
||||||
|
use with inline encryption.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/block/<disk>/queue/dax
|
What: /sys/block/<disk>/queue/dax
|
||||||
Date: June 2016
|
Date: June 2016
|
||||||
Contact: linux-block@vger.kernel.org
|
Contact: linux-block@vger.kernel.org
|
||||||
|
|||||||
@@ -86,6 +86,10 @@ What: /sys/devices/system/cpu/cpuX/topology/die_cpus
|
|||||||
Description: internal kernel map of CPUs within the same die.
|
Description: internal kernel map of CPUs within the same die.
|
||||||
Values: hexadecimal bitmask.
|
Values: hexadecimal bitmask.
|
||||||
|
|
||||||
|
What: /sys/devices/system/cpu/cpuX/topology/ppin
|
||||||
|
Description: per-socket protected processor inventory number
|
||||||
|
Values: hexadecimal.
|
||||||
|
|
||||||
What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list
|
What: /sys/devices/system/cpu/cpuX/topology/die_cpus_list
|
||||||
Description: human-readable list of CPUs within the same die.
|
Description: human-readable list of CPUs within the same die.
|
||||||
The format is like 0-3, 8-11, 14,17.
|
The format is like 0-3, 8-11, 14,17.
|
||||||
|
|||||||
@@ -113,3 +113,144 @@ Description:
|
|||||||
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/health_status
|
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/health_status
|
||||||
|
|
||||||
Users: Xilinx
|
Users: Xilinx
|
||||||
|
|
||||||
|
What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "Ronak Jain" <ronak.jain@xilinx.com>
|
||||||
|
Description:
|
||||||
|
This sysfs interface allows user to configure features at
|
||||||
|
runtime. The user can enable or disable features running at
|
||||||
|
firmware as well as the user can configure the parameters of
|
||||||
|
the features at runtime. The supported features are over
|
||||||
|
temperature and external watchdog. Here, the external watchdog
|
||||||
|
is completely different than the /dev/watchdog as the external
|
||||||
|
watchdog is running on the firmware and it is used to monitor
|
||||||
|
the health of firmware not APU(Linux). Also, the external
|
||||||
|
watchdog is interfaced outside of the zynqmp soc.
|
||||||
|
|
||||||
|
The supported config ids are for the feature configuration is,
|
||||||
|
1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
|
||||||
|
disable the over temperature feature.
|
||||||
|
2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
|
||||||
|
over temperature limit in Degree Celsius.
|
||||||
|
3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
|
||||||
|
the external watchdog feature.
|
||||||
|
4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
|
||||||
|
external watchdog feature.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
Select over temperature config ID to enable/disable feature
|
||||||
|
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
|
||||||
|
Check over temperature config ID is selected or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
The expected result is 1.
|
||||||
|
|
||||||
|
Select over temperature config ID to configure OT limit
|
||||||
|
# echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
|
||||||
|
Check over temperature config ID is selected or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
The expected result is 2.
|
||||||
|
|
||||||
|
Select external watchdog config ID to enable/disable feature
|
||||||
|
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
|
||||||
|
Check external watchdog config ID is selected or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
The expected result is 3.
|
||||||
|
|
||||||
|
Select external watchdog config ID to configure time interval
|
||||||
|
# echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
|
||||||
|
Check external watchdog config ID is selected or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
The expected result is 4.
|
||||||
|
|
||||||
|
Users: Xilinx
|
||||||
|
|
||||||
|
What: /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "Ronak Jain" <ronak.jain@xilinx.com>
|
||||||
|
Description:
|
||||||
|
This sysfs interface allows to configure features at runtime.
|
||||||
|
The user can enable or disable features running at firmware.
|
||||||
|
Also, the user can configure the parameters of the features
|
||||||
|
at runtime. The supported features are over temperature and
|
||||||
|
external watchdog. Here, the external watchdog is completely
|
||||||
|
different than the /dev/watchdog as the external watchdog is
|
||||||
|
running on the firmware and it is used to monitor the health
|
||||||
|
of firmware not APU(Linux). Also, the external watchdog is
|
||||||
|
interfaced outside of the zynqmp soc.
|
||||||
|
|
||||||
|
By default the features are disabled in the firmware. The user
|
||||||
|
can enable features by querying appropriate config id of the
|
||||||
|
features.
|
||||||
|
|
||||||
|
The default limit for the over temperature is 90 Degree Celsius.
|
||||||
|
The default timer interval for the external watchdog is 570ms.
|
||||||
|
|
||||||
|
The supported config ids are for the feature configuration is,
|
||||||
|
1. PM_FEATURE_OVERTEMP_STATUS = 1, the user can enable or
|
||||||
|
disable the over temperature feature.
|
||||||
|
2. PM_FEATURE_OVERTEMP_VALUE = 2, the user can configure the
|
||||||
|
over temperature limit in Degree Celsius.
|
||||||
|
3. PM_FEATURE_EXTWDT_STATUS = 3, the user can enable or disable
|
||||||
|
the external watchdog feature.
|
||||||
|
4. PM_FEATURE_EXTWDT_VALUE = 4, the user can configure the
|
||||||
|
external watchdog feature.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
Enable over temperature feature
|
||||||
|
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
|
||||||
|
Check whether the over temperature feature is enabled or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
The expected result is 1.
|
||||||
|
|
||||||
|
Disable over temperature feature
|
||||||
|
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
|
||||||
|
Check whether the over temperature feature is disabled or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
The expected result is 0.
|
||||||
|
|
||||||
|
Configure over temperature limit to 50 Degree Celsius
|
||||||
|
# echo 2 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
# echo 50 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
|
||||||
|
Check whether the over temperature limit is configured or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
The expected result is 50.
|
||||||
|
|
||||||
|
Enable external watchdog feature
|
||||||
|
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
# echo 1 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
|
||||||
|
Check whether the external watchdog feature is enabled or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
The expected result is 1.
|
||||||
|
|
||||||
|
Disable external watchdog feature
|
||||||
|
# echo 3 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
# echo 0 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
|
||||||
|
Check whether the external watchdog feature is disabled or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
The expected result is 0.
|
||||||
|
|
||||||
|
Configure external watchdog timer interval to 500ms
|
||||||
|
# echo 4 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_id
|
||||||
|
# echo 500 > /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
|
||||||
|
Check whether the external watchdog timer interval is configured or not
|
||||||
|
# cat /sys/devices/platform/firmware\:zynqmp-firmware/feature_config_value
|
||||||
|
The expected result is 500.
|
||||||
|
|
||||||
|
Users: Xilinx
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Description:
|
|||||||
|
|
||||||
===================== =======================================
|
===================== =======================================
|
||||||
c_chmask capture channel mask
|
c_chmask capture channel mask
|
||||||
c_srate capture sampling rate
|
c_srate list of capture sampling rates (comma-separated)
|
||||||
c_ssize capture sample size (bytes)
|
c_ssize capture sample size (bytes)
|
||||||
c_mute_present capture mute control enable
|
c_mute_present capture mute control enable
|
||||||
c_volume_present capture volume control enable
|
c_volume_present capture volume control enable
|
||||||
@@ -17,7 +17,7 @@ Description:
|
|||||||
c_volume_res capture volume control resolution
|
c_volume_res capture volume control resolution
|
||||||
(in 1/256 dB)
|
(in 1/256 dB)
|
||||||
p_chmask playback channel mask
|
p_chmask playback channel mask
|
||||||
p_srate playback sampling rate
|
p_srate list of playback sampling rates (comma-separated)
|
||||||
p_ssize playback sample size (bytes)
|
p_ssize playback sample size (bytes)
|
||||||
p_mute_present playback mute control enable
|
p_mute_present playback mute control enable
|
||||||
p_volume_present playback volume control enable
|
p_volume_present playback volume control enable
|
||||||
@@ -29,4 +29,5 @@ Description:
|
|||||||
(in 1/256 dB)
|
(in 1/256 dB)
|
||||||
req_number the number of pre-allocated requests
|
req_number the number of pre-allocated requests
|
||||||
for both capture and playback
|
for both capture and playback
|
||||||
|
function_name name of the interface
|
||||||
===================== =======================================
|
===================== =======================================
|
||||||
|
|||||||
@@ -6,8 +6,9 @@ Description:
|
|||||||
|
|
||||||
===================== =======================================
|
===================== =======================================
|
||||||
c_chmask capture channel mask
|
c_chmask capture channel mask
|
||||||
c_srate capture sampling rate
|
c_srate list of capture sampling rates (comma-separated)
|
||||||
c_ssize capture sample size (bytes)
|
c_ssize capture sample size (bytes)
|
||||||
|
c_hs_bint capture bInterval for HS/SS (1-4: fixed, 0: auto)
|
||||||
c_sync capture synchronization type
|
c_sync capture synchronization type
|
||||||
(async/adaptive)
|
(async/adaptive)
|
||||||
c_mute_present capture mute control enable
|
c_mute_present capture mute control enable
|
||||||
@@ -20,8 +21,9 @@ Description:
|
|||||||
(in 1/256 dB)
|
(in 1/256 dB)
|
||||||
fb_max maximum extra bandwidth in async mode
|
fb_max maximum extra bandwidth in async mode
|
||||||
p_chmask playback channel mask
|
p_chmask playback channel mask
|
||||||
p_srate playback sampling rate
|
p_srate list of playback sampling rates (comma-separated)
|
||||||
p_ssize playback sample size (bytes)
|
p_ssize playback sample size (bytes)
|
||||||
|
p_hs_bint playback bInterval for HS/SS (1-4: fixed, 0: auto)
|
||||||
p_mute_present playback mute control enable
|
p_mute_present playback mute control enable
|
||||||
p_volume_present playback volume control enable
|
p_volume_present playback volume control enable
|
||||||
p_volume_min playback volume control min value
|
p_volume_min playback volume control min value
|
||||||
@@ -32,4 +34,5 @@ Description:
|
|||||||
(in 1/256 dB)
|
(in 1/256 dB)
|
||||||
req_number the number of pre-allocated requests
|
req_number the number of pre-allocated requests
|
||||||
for both capture and playback
|
for both capture and playback
|
||||||
|
function_name name of the interface
|
||||||
===================== =======================================
|
===================== =======================================
|
||||||
|
|||||||
@@ -12,24 +12,7 @@ What: /sys/kernel/debug/habanalabs/hl<n>/clk_gate
|
|||||||
Date: May 2020
|
Date: May 2020
|
||||||
KernelVersion: 5.8
|
KernelVersion: 5.8
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Allow the root user to disable/enable in runtime the clock
|
Description: This setting is now deprecated as clock gating is handled solely by the f/w
|
||||||
gating mechanism in Gaudi. Due to how Gaudi is built, the
|
|
||||||
clock gating needs to be disabled in order to access the
|
|
||||||
registers of the TPC and MME engines. This is sometimes needed
|
|
||||||
during debug by the user and hence the user needs this option.
|
|
||||||
The user can supply a bitmask value, each bit represents
|
|
||||||
a different engine to disable/enable its clock gating feature.
|
|
||||||
The bitmask is composed of 20 bits:
|
|
||||||
|
|
||||||
======= ============
|
|
||||||
0 - 7 DMA channels
|
|
||||||
8 - 11 MME engines
|
|
||||||
12 - 19 TPC engines
|
|
||||||
======= ============
|
|
||||||
|
|
||||||
The bit's location of a specific engine can be determined
|
|
||||||
using (1 << GAUDI_ENGINE_ID_*). GAUDI_ENGINE_ID_* values
|
|
||||||
are defined in uapi habanalabs.h file in enum gaudi_engine_id
|
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
|
What: /sys/kernel/debug/habanalabs/hl<n>/command_buffers
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
@@ -239,6 +222,7 @@ KernelVersion: 5.6
|
|||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Sets the stop-on_error option for the device engines. Value of
|
Description: Sets the stop-on_error option for the device engines. Value of
|
||||||
"0" is for disable, otherwise enable.
|
"0" is for disable, otherwise enable.
|
||||||
|
Relevant only for GOYA and GAUDI.
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
|
What: /sys/kernel/debug/habanalabs/hl<n>/timeout_locked
|
||||||
Date: Sep 2021
|
Date: Sep 2021
|
||||||
|
|||||||
@@ -27,6 +27,16 @@ Description: One HPRE controller has one PF and multiple VFs, each function
|
|||||||
has a QM. Select the QM which below qm refers to.
|
has a QM. Select the QM which below qm refers to.
|
||||||
Only available for PF.
|
Only available for PF.
|
||||||
|
|
||||||
|
What: /sys/kernel/debug/hisi_hpre/<bdf>/alg_qos
|
||||||
|
Date: Jun 2021
|
||||||
|
Contact: linux-crypto@vger.kernel.org
|
||||||
|
Description: The <bdf> is related the function for PF and VF.
|
||||||
|
HPRE driver supports to configure each function's QoS, the driver
|
||||||
|
supports to write <bdf> value to alg_qos in the host. Such as
|
||||||
|
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||||
|
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||||
|
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||||
|
|
||||||
What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
|
What: /sys/kernel/debug/hisi_hpre/<bdf>/regs
|
||||||
Date: Sep 2019
|
Date: Sep 2019
|
||||||
Contact: linux-crypto@vger.kernel.org
|
Contact: linux-crypto@vger.kernel.org
|
||||||
|
|||||||
@@ -14,6 +14,16 @@ Description: One SEC controller has one PF and multiple VFs, each function
|
|||||||
qm refers to.
|
qm refers to.
|
||||||
Only available for PF.
|
Only available for PF.
|
||||||
|
|
||||||
|
What: /sys/kernel/debug/hisi_sec2/<bdf>/alg_qos
|
||||||
|
Date: Jun 2021
|
||||||
|
Contact: linux-crypto@vger.kernel.org
|
||||||
|
Description: The <bdf> is related the function for PF and VF.
|
||||||
|
SEC driver supports to configure each function's QoS, the driver
|
||||||
|
supports to write <bdf> value to alg_qos in the host. Such as
|
||||||
|
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||||
|
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||||
|
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||||
|
|
||||||
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
|
What: /sys/kernel/debug/hisi_sec2/<bdf>/qm/qm_regs
|
||||||
Date: Oct 2019
|
Date: Oct 2019
|
||||||
Contact: linux-crypto@vger.kernel.org
|
Contact: linux-crypto@vger.kernel.org
|
||||||
|
|||||||
@@ -26,6 +26,16 @@ Description: One ZIP controller has one PF and multiple VFs, each function
|
|||||||
has a QM. Select the QM which below qm refers to.
|
has a QM. Select the QM which below qm refers to.
|
||||||
Only available for PF.
|
Only available for PF.
|
||||||
|
|
||||||
|
What: /sys/kernel/debug/hisi_zip/<bdf>/alg_qos
|
||||||
|
Date: Jun 2021
|
||||||
|
Contact: linux-crypto@vger.kernel.org
|
||||||
|
Description: The <bdf> is related the function for PF and VF.
|
||||||
|
ZIP driver supports to configure each function's QoS, the driver
|
||||||
|
supports to write <bdf> value to alg_qos in the host. Such as
|
||||||
|
"echo <bdf> value > alg_qos". The qos value is 1~1000, means
|
||||||
|
1/1000~1000/1000 of total QoS. The driver reading alg_qos to
|
||||||
|
get related QoS in the host and VM, Such as "cat alg_qos".
|
||||||
|
|
||||||
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
|
What: /sys/kernel/debug/hisi_zip/<bdf>/qm/regs
|
||||||
Date: Nov 2018
|
Date: Nov 2018
|
||||||
Contact: linux-crypto@vger.kernel.org
|
Contact: linux-crypto@vger.kernel.org
|
||||||
|
|||||||
@@ -1,3 +1,12 @@
|
|||||||
|
What: /sys/bus/cxl/flush
|
||||||
|
Date: Januarry, 2022
|
||||||
|
KernelVersion: v5.18
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(WO) If userspace manually unbinds a port the kernel schedules
|
||||||
|
all descendant memdevs for unbind. Writing '1' to this attribute
|
||||||
|
flushes that work.
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/memX/firmware_version
|
What: /sys/bus/cxl/devices/memX/firmware_version
|
||||||
Date: December, 2020
|
Date: December, 2020
|
||||||
KernelVersion: v5.12
|
KernelVersion: v5.12
|
||||||
@@ -25,6 +34,24 @@ Description:
|
|||||||
identically named field in the Identify Memory Device Output
|
identically named field in the Identify Memory Device Output
|
||||||
Payload in the CXL-2.0 specification.
|
Payload in the CXL-2.0 specification.
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/memX/serial
|
||||||
|
Date: January, 2022
|
||||||
|
KernelVersion: v5.18
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RO) 64-bit serial number per the PCIe Device Serial Number
|
||||||
|
capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
|
||||||
|
Memory Device PCIe Capabilities and Extended Capabilities.
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/memX/numa_node
|
||||||
|
Date: January, 2022
|
||||||
|
KernelVersion: v5.18
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RO) If NUMA is enabled and the platform has affinitized the
|
||||||
|
host PCI device for this memory device, emit the CPU node
|
||||||
|
affinity for this device.
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/*/devtype
|
What: /sys/bus/cxl/devices/*/devtype
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
@@ -34,6 +61,15 @@ Description:
|
|||||||
the same value communicated in the DEVTYPE environment variable
|
the same value communicated in the DEVTYPE environment variable
|
||||||
for uevents for devices on the "cxl" bus.
|
for uevents for devices on the "cxl" bus.
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/*/modalias
|
||||||
|
Date: December, 2021
|
||||||
|
KernelVersion: v5.18
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
CXL device objects export the modalias attribute which mirrors
|
||||||
|
the same value communicated in the MODALIAS environment variable
|
||||||
|
for uevents for devices on the "cxl" bus.
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/portX/uport
|
What: /sys/bus/cxl/devices/portX/uport
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
|
|||||||
@@ -476,6 +476,7 @@ What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
|
|||||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage_calibscale
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
What: /sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
|
What: /sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
|
What: /sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
|
||||||
@@ -1213,6 +1214,32 @@ Description:
|
|||||||
number or direction is not specified, applies to all channels of
|
number or direction is not specified, applies to all channels of
|
||||||
this type.
|
this type.
|
||||||
|
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_en
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_en
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_en
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_en
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_en
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_en
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Similar to in_accel_mag[_y][_rising|_falling]_en, but the event
|
||||||
|
value is relative to a reference magnitude. The reference magnitude
|
||||||
|
includes the graviational acceleration.
|
||||||
|
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_value
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_rising_value
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_mag_referenced_falling_value
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_value
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_rising_value
|
||||||
|
What: /sys/.../iio:deviceX/events/in_accel_y_mag_referenced_falling_value
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
The value to which the reference magnitude of the channel is
|
||||||
|
compared. If the axis is not specified, it applies to all channels
|
||||||
|
of this type.
|
||||||
|
|
||||||
What: /sys/.../events/in_steps_change_en
|
What: /sys/.../events/in_steps_change_en
|
||||||
KernelVersion: 4.0
|
KernelVersion: 4.0
|
||||||
Contact: linux-iio@vger.kernel.org
|
Contact: linux-iio@vger.kernel.org
|
||||||
@@ -1252,6 +1279,10 @@ Description:
|
|||||||
Actually start the buffer capture up. Will start trigger
|
Actually start the buffer capture up. Will start trigger
|
||||||
if first device and appropriate.
|
if first device and appropriate.
|
||||||
|
|
||||||
|
Note that it might be impossible to configure other attributes,
|
||||||
|
(e.g.: events, scale, sampling rate) if they impact the currently
|
||||||
|
active buffer capture session.
|
||||||
|
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/bufferY
|
What: /sys/bus/iio/devices/iio:deviceX/bufferY
|
||||||
KernelVersion: 5.11
|
KernelVersion: 5.11
|
||||||
Contact: linux-iio@vger.kernel.org
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
|||||||
13
Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
Normal file
13
Documentation/ABI/testing/sysfs-bus-iio-adc-ad7280a
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_en
|
||||||
|
KernelVersion: 5.14
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Used to enable an output for balancing cells for time
|
||||||
|
controlled via in_voltage_Y-voltageZ_balance_switch_timer.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_balance_switch_timer
|
||||||
|
KernelVersion: 5.14
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Time in seconds for which balance switch will be turned on.
|
||||||
|
Multiple of 71.5 seconds.
|
||||||
86
Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
Normal file
86
Documentation/ABI/testing/sysfs-bus-iio-dac-ltc2688
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_en
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Dither enable. Write 1 to enable dither or 0 to disable it. This is useful
|
||||||
|
for changing the dither parameters. They way it should be done is:
|
||||||
|
|
||||||
|
- disable dither operation;
|
||||||
|
- change dither parameters (eg: frequency, phase...);
|
||||||
|
- enabled dither operation
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
This raw, unscaled value refers to the dither signal amplitude.
|
||||||
|
The same scale as in out_voltageY_raw applies. However, the
|
||||||
|
offset might be different as it's always 0 for this attribute.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_raw_available
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Available range for dither raw amplitude values.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_offset
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Offset applied to out_voltageY_dither_raw. Read only attribute
|
||||||
|
always set to 0.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Sets the dither signal frequency. Units are in Hz.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_frequency_available
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Returns the available values for the dither frequency.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Sets the dither signal phase. Units are in Radians.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_dither_phase_available
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Returns the available values for the dither phase.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_toggle_en
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Toggle enable. Write 1 to enable toggle or 0 to disable it. This is
|
||||||
|
useful when one wants to change the DAC output codes. The way it should
|
||||||
|
be done is:
|
||||||
|
|
||||||
|
- disable toggle operation;
|
||||||
|
- change out_voltageY_raw0 and out_voltageY_raw1;
|
||||||
|
- enable toggle operation.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw0
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_raw1
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
It has the same meaning as out_voltageY_raw. This attribute is
|
||||||
|
specific to toggle enabled channels and refers to the DAC output
|
||||||
|
code in INPUT_A (_raw0) and INPUT_B (_raw1). The same scale and offset
|
||||||
|
as in out_voltageY_raw applies.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/out_voltageY_symbol
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Performs a SW toggle. This attribute is specific to toggle
|
||||||
|
enabled channels and allows to toggle between out_voltageY_raw0
|
||||||
|
and out_voltageY_raw1 through software. Writing 0 will select
|
||||||
|
out_voltageY_raw0 while 1 selects out_voltageY_raw1.
|
||||||
23
Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
Normal file
23
Documentation/ABI/testing/sysfs-bus-iio-frequency-admv1014
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_coarse
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Read/write value for the digital attenuator gain (IF_I) with coarse steps.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_coarse
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Read/write value for the digital attenuator gain (IF_Q) with coarse steps.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_i_calibscale_fine
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Read/write value for the digital attenuator gain (IF_I) with fine steps.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_altvoltage0_q_calibscale_fine
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Read/write value for the digital attenuator gain (IF_Q) with fine steps.
|
||||||
28
Documentation/ABI/testing/sysfs-bus-iio-sx9324
Normal file
28
Documentation/ABI/testing/sysfs-bus-iio-sx9324
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
What: /sys/bus/iio/devices/iio:deviceX/in_proximity<id>_setup
|
||||||
|
Date: November 2021
|
||||||
|
KernelVersion: 5.17
|
||||||
|
Contact: Gwendal Grignou <gwendal@chromium.org>
|
||||||
|
Description:
|
||||||
|
SX9324 has 3 inputs, CS0, CS1 and CS2. Hardware layout
|
||||||
|
defines if the input is
|
||||||
|
+ not connected (HZ),
|
||||||
|
+ grounded (GD),
|
||||||
|
+ connected to an antenna where it can act as a base
|
||||||
|
(DS - data shield), or measured input (MI).
|
||||||
|
|
||||||
|
The sensor rotates measurement across 4 phases
|
||||||
|
(PH0, PH1, PH2, PH3), where the inputs are configured
|
||||||
|
and then measured.
|
||||||
|
|
||||||
|
By default, during the first phase, [PH0], CS0 is measured,
|
||||||
|
while CS1 and CS2 are used as shields.
|
||||||
|
`cat in_proximity0_setup` returns "MI,DS,DS".
|
||||||
|
[PH1], CS1 is measured, CS0 and CS2 are shield:
|
||||||
|
`cat in_proximity1_setup` returns "DS,MI,DS".
|
||||||
|
[PH2], CS2 is measured, CS0 and CS1 are shield:
|
||||||
|
`cat in_proximity1_setup` returns "DS,DS,MI".
|
||||||
|
[PH3], CS1 and CS2 are measured (combo mode):
|
||||||
|
`cat in_proximity1_setup` returns "DS,MI,MI".
|
||||||
|
|
||||||
|
Note, these are the chip default. Hardware layout will most
|
||||||
|
likely dictate different output. The entry is read-only.
|
||||||
@@ -6,3 +6,38 @@ Description:
|
|||||||
|
|
||||||
The libnvdimm sub-system implements a common sysfs interface for
|
The libnvdimm sub-system implements a common sysfs interface for
|
||||||
platform nvdimm resources. See Documentation/driver-api/nvdimm/.
|
platform nvdimm resources. See Documentation/driver-api/nvdimm/.
|
||||||
|
|
||||||
|
What: /sys/bus/event_source/devices/nmemX/format
|
||||||
|
Date: February 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||||
|
Description: (RO) Attribute group to describe the magic bits
|
||||||
|
that go into perf_event_attr.config for a particular pmu.
|
||||||
|
(See ABI/testing/sysfs-bus-event_source-devices-format).
|
||||||
|
|
||||||
|
Each attribute under this group defines a bit range of the
|
||||||
|
perf_event_attr.config. Supported attribute is listed
|
||||||
|
below::
|
||||||
|
event = "config:0-4" - event ID
|
||||||
|
|
||||||
|
For example::
|
||||||
|
ctl_res_cnt = "event=0x1"
|
||||||
|
|
||||||
|
What: /sys/bus/event_source/devices/nmemX/events
|
||||||
|
Date: February 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||||
|
Description: (RO) Attribute group to describe performance monitoring events
|
||||||
|
for the nvdimm memory device. Each attribute in this group
|
||||||
|
describes a single performance monitoring event supported by
|
||||||
|
this nvdimm pmu. The name of the file is the name of the event.
|
||||||
|
(See ABI/testing/sysfs-bus-event_source-devices-events). A
|
||||||
|
listing of the events supported by a given nvdimm provider type
|
||||||
|
can be found in Documentation/driver-api/nvdimm/$provider.
|
||||||
|
|
||||||
|
What: /sys/bus/event_source/devices/nmemX/cpumask
|
||||||
|
Date: February 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: Kajol Jain <kjain@linux.ibm.com>
|
||||||
|
Description: (RO) This sysfs file exposes the cpumask which is designated to
|
||||||
|
to retrieve nvdimm pmu event counter data.
|
||||||
|
|||||||
@@ -61,3 +61,15 @@ Description:
|
|||||||
* "CchRHCnt" : Cache Read Hit Count
|
* "CchRHCnt" : Cache Read Hit Count
|
||||||
* "CchWHCnt" : Cache Write Hit Count
|
* "CchWHCnt" : Cache Write Hit Count
|
||||||
* "FastWCnt" : Fast Write Count
|
* "FastWCnt" : Fast Write Count
|
||||||
|
|
||||||
|
What: /sys/bus/nd/devices/nmemX/papr/health_bitmap_inject
|
||||||
|
Date: Jan, 2022
|
||||||
|
KernelVersion: v5.17
|
||||||
|
Contact: linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
|
||||||
|
Description:
|
||||||
|
(RO) Reports the health bitmap inject bitmap that is applied to
|
||||||
|
bitmap received from PowerVM via the H_SCM_HEALTH. This is used
|
||||||
|
to forcibly set specific bits returned from Hcall. These is then
|
||||||
|
used to simulate various health or shutdown states for an nvdimm
|
||||||
|
and are set by user-space tools like ndctl by issuing a PAPR DSM.
|
||||||
|
|
||||||
|
|||||||
16
Documentation/ABI/testing/sysfs-bus-peci
Normal file
16
Documentation/ABI/testing/sysfs-bus-peci
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
What: /sys/bus/peci/rescan
|
||||||
|
Date: July 2021
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: Iwona Winiarska <iwona.winiarska@intel.com>
|
||||||
|
Description:
|
||||||
|
Writing a non-zero value to this attribute will
|
||||||
|
initiate scan for PECI devices on all PECI controllers
|
||||||
|
in the system.
|
||||||
|
|
||||||
|
What: /sys/bus/peci/devices/<controller_id>-<device_addr>/remove
|
||||||
|
Date: July 2021
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: Iwona Winiarska <iwona.winiarska@intel.com>
|
||||||
|
Description:
|
||||||
|
Writing a non-zero value to this attribute will
|
||||||
|
remove the PECI device and any of its children.
|
||||||
@@ -246,6 +246,51 @@ Description:
|
|||||||
that is being referenced (e.g hdd0, hdd1 etc)
|
that is being referenced (e.g hdd0, hdd1 etc)
|
||||||
This attribute defaults to device 0.
|
This attribute defaults to device 0.
|
||||||
|
|
||||||
|
certificate:
|
||||||
|
signature:
|
||||||
|
save_signature:
|
||||||
|
These attributes are used for certificate based authentication. This is
|
||||||
|
used in conjunction with a signing server as an alternative to password
|
||||||
|
based authentication.
|
||||||
|
The user writes to the attribute(s) with a BASE64 encoded string obtained
|
||||||
|
from the signing server.
|
||||||
|
The attributes can be displayed to check the stored value.
|
||||||
|
|
||||||
|
Some usage examples:
|
||||||
|
Installing a certificate to enable feature:
|
||||||
|
echo <supervisor password > authentication/Admin/current_password
|
||||||
|
echo <signed certificate> > authentication/Admin/certificate
|
||||||
|
|
||||||
|
Updating the installed certificate:
|
||||||
|
echo <signature> > authentication/Admin/signature
|
||||||
|
echo <signed certificate> > authentication/Admin/certificate
|
||||||
|
|
||||||
|
Removing the installed certificate:
|
||||||
|
echo <signature> > authentication/Admin/signature
|
||||||
|
echo '' > authentication/Admin/certificate
|
||||||
|
|
||||||
|
Changing a BIOS setting:
|
||||||
|
echo <signature> > authentication/Admin/signature
|
||||||
|
echo <save signature> > authentication/Admin/save_signature
|
||||||
|
echo Enable > attribute/PasswordBeep/current_value
|
||||||
|
|
||||||
|
You cannot enable certificate authentication if a supervisor password
|
||||||
|
has not been set.
|
||||||
|
Clearing the certificate results in no bios-admin authentication method
|
||||||
|
being configured allowing anyone to make changes.
|
||||||
|
After any of these operations the system must reboot for the changes to
|
||||||
|
take effect.
|
||||||
|
|
||||||
|
certificate_thumbprint:
|
||||||
|
Read only attribute used to display the MD5, SHA1 and SHA256 thumbprints
|
||||||
|
for the certificate installed in the BIOS.
|
||||||
|
|
||||||
|
certificate_to_password:
|
||||||
|
Write only attribute used to switch from certificate based authentication
|
||||||
|
back to password based.
|
||||||
|
Usage:
|
||||||
|
echo <signature> > authentication/Admin/signature
|
||||||
|
echo <password> > authentication/Admin/certificate_to_password
|
||||||
|
|
||||||
|
|
||||||
What: /sys/class/firmware-attributes/*/attributes/pending_reboot
|
What: /sys/class/firmware-attributes/*/attributes/pending_reboot
|
||||||
|
|||||||
@@ -9,6 +9,14 @@ Description:
|
|||||||
|
|
||||||
RO
|
RO
|
||||||
|
|
||||||
|
What: /sys/class/hwmon/hwmonX/label
|
||||||
|
Description:
|
||||||
|
A descriptive label that allows to uniquely identify a
|
||||||
|
device within the system.
|
||||||
|
The contents of the label are free-form.
|
||||||
|
|
||||||
|
RO
|
||||||
|
|
||||||
What: /sys/class/hwmon/hwmonX/update_interval
|
What: /sys/class/hwmon/hwmonX/update_interval
|
||||||
Description:
|
Description:
|
||||||
The interval at which the chip will update readings.
|
The interval at which the chip will update readings.
|
||||||
|
|||||||
@@ -380,13 +380,17 @@ Description:
|
|||||||
algorithm to adjust the charge rate dynamically, without
|
algorithm to adjust the charge rate dynamically, without
|
||||||
any user configuration required. "Custom" means that the charger
|
any user configuration required. "Custom" means that the charger
|
||||||
uses the charge_control_* properties as configuration for some
|
uses the charge_control_* properties as configuration for some
|
||||||
different algorithm.
|
different algorithm. "Long Life" means the charger reduces its
|
||||||
|
charging rate in order to prolong the battery health. "Bypass"
|
||||||
|
means the charger bypasses the charging path around the
|
||||||
|
integrated converter allowing for a "smart" wall adaptor to
|
||||||
|
perform the power conversion externally.
|
||||||
|
|
||||||
Access: Read, Write
|
Access: Read, Write
|
||||||
|
|
||||||
Valid values:
|
Valid values:
|
||||||
"Unknown", "N/A", "Trickle", "Fast", "Standard",
|
"Unknown", "N/A", "Trickle", "Fast", "Standard",
|
||||||
"Adaptive", "Custom"
|
"Adaptive", "Custom", "Long Life", "Bypass"
|
||||||
|
|
||||||
What: /sys/class/power_supply/<supply_name>/charge_term_current
|
What: /sys/class/power_supply/<supply_name>/charge_term_current
|
||||||
Date: July 2014
|
Date: July 2014
|
||||||
@@ -468,6 +472,7 @@ Description:
|
|||||||
auto: Charge normally, respect thresholds
|
auto: Charge normally, respect thresholds
|
||||||
inhibit-charge: Do not charge while AC is attached
|
inhibit-charge: Do not charge while AC is attached
|
||||||
force-discharge: Force discharge while AC is attached
|
force-discharge: Force discharge while AC is attached
|
||||||
|
================ ====================================
|
||||||
|
|
||||||
What: /sys/class/power_supply/<supply_name>/technology
|
What: /sys/class/power_supply/<supply_name>/technology
|
||||||
Date: May 2007
|
Date: May 2007
|
||||||
|
|||||||
@@ -73,6 +73,7 @@ What: /sys/devices/system/cpu/cpuX/topology/core_id
|
|||||||
/sys/devices/system/cpu/cpuX/topology/physical_package_id
|
/sys/devices/system/cpu/cpuX/topology/physical_package_id
|
||||||
/sys/devices/system/cpu/cpuX/topology/thread_siblings
|
/sys/devices/system/cpu/cpuX/topology/thread_siblings
|
||||||
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
|
/sys/devices/system/cpu/cpuX/topology/thread_siblings_list
|
||||||
|
/sys/devices/system/cpu/cpuX/topology/ppin
|
||||||
Date: December 2008
|
Date: December 2008
|
||||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||||
Description: CPU topology files that describe a logical CPU's relationship
|
Description: CPU topology files that describe a logical CPU's relationship
|
||||||
@@ -103,6 +104,11 @@ Description: CPU topology files that describe a logical CPU's relationship
|
|||||||
thread_siblings_list: human-readable list of cpuX's hardware
|
thread_siblings_list: human-readable list of cpuX's hardware
|
||||||
threads within the same core as cpuX
|
threads within the same core as cpuX
|
||||||
|
|
||||||
|
ppin: human-readable Protected Processor Identification
|
||||||
|
Number of the socket the cpu# belongs to. There should be
|
||||||
|
one per physical_package_id. File is readable only to
|
||||||
|
admin.
|
||||||
|
|
||||||
See Documentation/admin-guide/cputopology.rst for more information.
|
See Documentation/admin-guide/cputopology.rst for more information.
|
||||||
|
|
||||||
|
|
||||||
@@ -662,6 +668,7 @@ Description: Preferred MTE tag checking mode
|
|||||||
|
|
||||||
================ ==============================================
|
================ ==============================================
|
||||||
"sync" Prefer synchronous mode
|
"sync" Prefer synchronous mode
|
||||||
|
"asymm" Prefer asymmetric mode
|
||||||
"async" Prefer asynchronous mode
|
"async" Prefer asynchronous mode
|
||||||
================ ==============================================
|
================ ==============================================
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
What: /sys/bus/platform/drivers/aspeed-uart-routing/*/uart*
|
What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/uart\*
|
||||||
Date: September 2021
|
Date: September 2021
|
||||||
Contact: Oskar Senft <osk@google.com>
|
Contact: Oskar Senft <osk@google.com>
|
||||||
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
|
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
|
||||||
@@ -9,7 +9,7 @@ Description: Selects the RX source of the UARTx device.
|
|||||||
depends on the selected file.
|
depends on the selected file.
|
||||||
|
|
||||||
e.g.
|
e.g.
|
||||||
cat /sys/bus/platform/drivers/aspeed-uart-routing/*.uart_routing/uart1
|
cat /sys/bus/platform/drivers/aspeed-uart-routing/\*.uart_routing/uart1
|
||||||
[io1] io2 io3 io4 uart2 uart3 uart4 io6
|
[io1] io2 io3 io4 uart2 uart3 uart4 io6
|
||||||
|
|
||||||
In this case, UART1 gets its input from IO1 (physical serial port 1).
|
In this case, UART1 gets its input from IO1 (physical serial port 1).
|
||||||
@@ -17,7 +17,7 @@ Description: Selects the RX source of the UARTx device.
|
|||||||
Users: OpenBMC. Proposed changes should be mailed to
|
Users: OpenBMC. Proposed changes should be mailed to
|
||||||
openbmc@lists.ozlabs.org
|
openbmc@lists.ozlabs.org
|
||||||
|
|
||||||
What: /sys/bus/platform/drivers/aspeed-uart-routing/*/io*
|
What: /sys/bus/platform/drivers/aspeed-uart-routing/\*/io\*
|
||||||
Date: September 2021
|
Date: September 2021
|
||||||
Contact: Oskar Senft <osk@google.com>
|
Contact: Oskar Senft <osk@google.com>
|
||||||
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
|
Chia-Wei Wang <chiawei_wang@aspeedtech.com>
|
||||||
|
|||||||
9
Documentation/ABI/testing/sysfs-driver-eud
Normal file
9
Documentation/ABI/testing/sysfs-driver-eud
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
What: /sys/bus/platform/drivers/eud/.../enable
|
||||||
|
Date: February 2022
|
||||||
|
Contact: Souradeep Chowdhury <quic_schowdhu@quicinc.com>
|
||||||
|
Description:
|
||||||
|
The Enable/Disable sysfs interface for Embedded
|
||||||
|
USB Debugger(EUD). This enables and disables the
|
||||||
|
EUD based on a 1 or a 0 value. By enabling EUD,
|
||||||
|
the user is able to activate the mini-usb hub of
|
||||||
|
EUD for debug and trace capabilities.
|
||||||
@@ -69,6 +69,12 @@ KernelVersion: 5.1
|
|||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Displays the device's version from the eFuse
|
Description: Displays the device's version from the eFuse
|
||||||
|
|
||||||
|
What: /sys/class/habanalabs/hl<n>/fw_os_ver
|
||||||
|
Date: Dec 2021
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: ogabbay@kernel.org
|
||||||
|
Description: Version of the firmware OS running on the device's CPU
|
||||||
|
|
||||||
What: /sys/class/habanalabs/hl<n>/hard_reset
|
What: /sys/class/habanalabs/hl<n>/hard_reset
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
@@ -115,7 +121,7 @@ What: /sys/class/habanalabs/hl<n>/infineon_ver
|
|||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Version of the Device's power supply F/W code
|
Description: Version of the Device's power supply F/W code. Relevant only to GOYA and GAUDI
|
||||||
|
|
||||||
What: /sys/class/habanalabs/hl<n>/max_power
|
What: /sys/class/habanalabs/hl<n>/max_power
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
@@ -221,3 +227,9 @@ Date: Jan 2019
|
|||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Version of the u-boot running on the device's CPU
|
Description: Version of the u-boot running on the device's CPU
|
||||||
|
|
||||||
|
What: /sys/class/habanalabs/hl<n>/vrm_ver
|
||||||
|
Date: Jan 2022
|
||||||
|
KernelVersion: not yet upstreamed
|
||||||
|
Contact: ogabbay@kernel.org
|
||||||
|
Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
|
||||||
|
|||||||
77
Documentation/ABI/testing/sysfs-driver-intel_sdsi
Normal file
77
Documentation/ABI/testing/sysfs-driver-intel_sdsi
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This directory contains interface files for accessing Intel
|
||||||
|
Software Defined Silicon (SDSi) features on a CPU. X
|
||||||
|
represents the socket instance (though not the socket ID).
|
||||||
|
The socket ID is determined by reading the registers file
|
||||||
|
and decoding it per the specification.
|
||||||
|
|
||||||
|
Some files communicate with SDSi hardware through a mailbox.
|
||||||
|
Should the operation fail, one of the following error codes
|
||||||
|
may be returned:
|
||||||
|
|
||||||
|
Error Code Cause
|
||||||
|
---------- -----
|
||||||
|
EIO General mailbox failure. Log may indicate cause.
|
||||||
|
EBUSY Mailbox is owned by another agent.
|
||||||
|
EPERM SDSI capability is not enabled in hardware.
|
||||||
|
EPROTO Failure in mailbox protocol detected by driver.
|
||||||
|
See log for details.
|
||||||
|
EOVERFLOW For provision commands, the size of the data
|
||||||
|
exceeds what may be written.
|
||||||
|
ESPIPE Seeking is not allowed.
|
||||||
|
ETIMEDOUT Failure to complete mailbox transaction in time.
|
||||||
|
|
||||||
|
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/guid
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
(RO) The GUID for the registers file. The GUID identifies
|
||||||
|
the layout of the registers file in this directory.
|
||||||
|
Information about the register layouts for a particular GUID
|
||||||
|
is available at http://github.com/intel/intel-sdsi
|
||||||
|
|
||||||
|
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/registers
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
(RO) Contains information needed by applications to provision
|
||||||
|
a CPU and monitor status information. The layout of this file
|
||||||
|
is determined by the GUID in this directory. Information about
|
||||||
|
the layout for a particular GUID is available at
|
||||||
|
http://github.com/intel/intel-sdsi
|
||||||
|
|
||||||
|
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_akc
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
(WO) Used to write an Authentication Key Certificate (AKC) to
|
||||||
|
the SDSi NVRAM for the CPU. The AKC is used to authenticate a
|
||||||
|
Capability Activation Payload. Mailbox command.
|
||||||
|
|
||||||
|
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/provision_cap
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
(WO) Used to write a Capability Activation Payload (CAP) to the
|
||||||
|
SDSi NVRAM for the CPU. CAPs are used to activate a given CPU
|
||||||
|
feature. A CAP is validated by SDSi hardware using a previously
|
||||||
|
provisioned AKC file. Upon successful authentication, the CPU
|
||||||
|
configuration is updated. A cold reboot is required to fully
|
||||||
|
activate the feature. Mailbox command.
|
||||||
|
|
||||||
|
What: /sys/bus/auxiliary/devices/intel_vsec.sdsi.X/state_certificate
|
||||||
|
Date: Feb 2022
|
||||||
|
KernelVersion: 5.18
|
||||||
|
Contact: "David E. Box" <david.e.box@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
(RO) Used to read back the current State Certificate for the CPU
|
||||||
|
from SDSi hardware. The State Certificate contains information
|
||||||
|
about the current licenses on the CPU. Mailbox command.
|
||||||
@@ -0,0 +1,29 @@
|
|||||||
|
What: /sys/firmware/papr/energy_scale_info
|
||||||
|
Date: February 2022
|
||||||
|
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||||
|
Description: Directory hosting a set of platform attributes like
|
||||||
|
energy/frequency on Linux running as a PAPR guest.
|
||||||
|
|
||||||
|
Each file in a directory contains a platform
|
||||||
|
attribute hierarchy pertaining to performance/
|
||||||
|
energy-savings mode and processor frequency.
|
||||||
|
|
||||||
|
What: /sys/firmware/papr/energy_scale_info/<id>
|
||||||
|
Date: February 2022
|
||||||
|
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||||
|
Description: Energy, frequency attributes directory for POWERVM servers
|
||||||
|
|
||||||
|
What: /sys/firmware/papr/energy_scale_info/<id>/desc
|
||||||
|
Date: February 2022
|
||||||
|
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||||
|
Description: String description of the energy attribute of <id>
|
||||||
|
|
||||||
|
What: /sys/firmware/papr/energy_scale_info/<id>/value
|
||||||
|
Date: February 2022
|
||||||
|
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||||
|
Description: Numeric value of the energy attribute of <id>
|
||||||
|
|
||||||
|
What: /sys/firmware/papr/energy_scale_info/<id>/value_desc
|
||||||
|
Date: February 2022
|
||||||
|
Contact: Linux for PowerPC mailing list <linuxppc-dev@ozlabs.org>
|
||||||
|
Description: String value of the energy attribute of <id>
|
||||||
@@ -55,8 +55,9 @@ Description: Controls the in-place-update policy.
|
|||||||
0x04 F2FS_IPU_UTIL
|
0x04 F2FS_IPU_UTIL
|
||||||
0x08 F2FS_IPU_SSR_UTIL
|
0x08 F2FS_IPU_SSR_UTIL
|
||||||
0x10 F2FS_IPU_FSYNC
|
0x10 F2FS_IPU_FSYNC
|
||||||
0x20 F2FS_IPU_ASYNC,
|
0x20 F2FS_IPU_ASYNC
|
||||||
0x40 F2FS_IPU_NOCACHE
|
0x40 F2FS_IPU_NOCACHE
|
||||||
|
0x80 F2FS_IPU_HONOR_OPU_WRITE
|
||||||
==== =================
|
==== =================
|
||||||
|
|
||||||
Refer segment.h for details.
|
Refer segment.h for details.
|
||||||
@@ -98,6 +99,33 @@ Description: Controls the issue rate of discard commands that consist of small
|
|||||||
checkpoint is triggered, and issued during the checkpoint.
|
checkpoint is triggered, and issued during the checkpoint.
|
||||||
By default, it is disabled with 0.
|
By default, it is disabled with 0.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/max_discard_request
|
||||||
|
Date: December 2021
|
||||||
|
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||||
|
Description: Controls the number of discards a thread will issue at a time.
|
||||||
|
Higher number will allow the discard thread to finish its work
|
||||||
|
faster, at the cost of higher latency for incomming I/O.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/min_discard_issue_time
|
||||||
|
Date: December 2021
|
||||||
|
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||||
|
Description: Controls the interval the discard thread will wait between
|
||||||
|
issuing discard requests when there are discards to be issued and
|
||||||
|
no I/O aware interruptions occur.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/mid_discard_issue_time
|
||||||
|
Date: December 2021
|
||||||
|
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||||
|
Description: Controls the interval the discard thread will wait between
|
||||||
|
issuing discard requests when there are discards to be issued and
|
||||||
|
an I/O aware interruption occurs.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/max_discard_issue_time
|
||||||
|
Date: December 2021
|
||||||
|
Contact: "Konstantin Vyshetsky" <vkon@google.com>
|
||||||
|
Description: Controls the interval the discard thread will wait when there are
|
||||||
|
no discard operations to be issued.
|
||||||
|
|
||||||
What: /sys/fs/f2fs/<disk>/discard_granularity
|
What: /sys/fs/f2fs/<disk>/discard_granularity
|
||||||
Date: July 2017
|
Date: July 2017
|
||||||
Contact: "Chao Yu" <yuchao0@huawei.com>
|
Contact: "Chao Yu" <yuchao0@huawei.com>
|
||||||
@@ -269,11 +297,16 @@ Description: Shows current reserved blocks in system, it may be temporarily
|
|||||||
What: /sys/fs/f2fs/<disk>/gc_urgent
|
What: /sys/fs/f2fs/<disk>/gc_urgent
|
||||||
Date: August 2017
|
Date: August 2017
|
||||||
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||||
Description: Do background GC aggressively when set. When gc_urgent = 1,
|
Description: Do background GC aggressively when set. Set to 0 by default.
|
||||||
background thread starts to do GC by given gc_urgent_sleep_time
|
gc urgent high(1): does GC forcibly in a period of given
|
||||||
interval. When gc_urgent = 2, F2FS will lower the bar of
|
gc_urgent_sleep_time and ignores I/O idling check. uses greedy
|
||||||
checking idle in order to process outstanding discard commands
|
GC approach and turns SSR mode on.
|
||||||
and GC a little bit aggressively. It is set to 0 by default.
|
gc urgent low(2): lowers the bar of checking I/O idling in
|
||||||
|
order to process outstanding discard commands and GC a
|
||||||
|
little bit aggressively. uses cost benefit GC approach.
|
||||||
|
gc urgent mid(3): does GC forcibly in a period of given
|
||||||
|
gc_urgent_sleep_time and executes a mid level of I/O idling check.
|
||||||
|
uses cost benefit GC approach.
|
||||||
|
|
||||||
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
|
What: /sys/fs/f2fs/<disk>/gc_urgent_sleep_time
|
||||||
Date: August 2017
|
Date: August 2017
|
||||||
@@ -430,6 +463,7 @@ Description: Show status of f2fs superblock in real time.
|
|||||||
0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
|
0x800 SBI_QUOTA_SKIP_FLUSH skip flushing quota in current CP
|
||||||
0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
|
0x1000 SBI_QUOTA_NEED_REPAIR quota file may be corrupted
|
||||||
0x2000 SBI_IS_RESIZEFS resizefs is in process
|
0x2000 SBI_IS_RESIZEFS resizefs is in process
|
||||||
|
0x4000 SBI_IS_FREEZING freefs is in process
|
||||||
====== ===================== =================================
|
====== ===================== =================================
|
||||||
|
|
||||||
What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
|
What: /sys/fs/f2fs/<disk>/ckpt_thread_ioprio
|
||||||
@@ -503,7 +537,7 @@ Date: July 2021
|
|||||||
Contact: "Daeho Jeong" <daehojeong@google.com>
|
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||||
Description: Show how many segments have been reclaimed by GC during a specific
|
Description: Show how many segments have been reclaimed by GC during a specific
|
||||||
GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
|
GC mode (0: GC normal, 1: GC idle CB, 2: GC idle greedy,
|
||||||
3: GC idle AT, 4: GC urgent high, 5: GC urgent low)
|
3: GC idle AT, 4: GC urgent high, 5: GC urgent low 6: GC urgent mid)
|
||||||
You can re-initialize this value to "0".
|
You can re-initialize this value to "0".
|
||||||
|
|
||||||
What: /sys/fs/f2fs/<disk>/gc_segment_mode
|
What: /sys/fs/f2fs/<disk>/gc_segment_mode
|
||||||
@@ -540,3 +574,9 @@ Contact: "Daeho Jeong" <daehojeong@google.com>
|
|||||||
Description: You can set the trial count limit for GC urgent high mode with this value.
|
Description: You can set the trial count limit for GC urgent high mode with this value.
|
||||||
If GC thread gets to the limit, the mode will turn back to GC normal mode.
|
If GC thread gets to the limit, the mode will turn back to GC normal mode.
|
||||||
By default, the value is zero, which means there is no limit like before.
|
By default, the value is zero, which means there is no limit like before.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/max_roll_forward_node_blocks
|
||||||
|
Date: January 2022
|
||||||
|
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||||
|
Description: Controls max # of node block writes to be used for roll forward
|
||||||
|
recovery. This can limit the roll forward recovery time.
|
||||||
|
|||||||
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
274
Documentation/ABI/testing/sysfs-kernel-mm-damon
Normal file
@@ -0,0 +1,274 @@
|
|||||||
|
what: /sys/kernel/mm/damon/
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Interface for Data Access MONitoring (DAMON). Contains files
|
||||||
|
for controlling DAMON. For more details on DAMON itself,
|
||||||
|
please refer to Documentation/admin-guide/mm/damon/index.rst.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Interface for privileged users of DAMON. Contains files for
|
||||||
|
controlling DAMON that aimed to be used by privileged users.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/nr_kdamonds
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON worker thread (kdamond)
|
||||||
|
named '0' to 'N-1' under the kdamonds/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/state
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing 'on' or 'off' to this file makes the kdamond starts or
|
||||||
|
stops, respectively. Reading the file returns the keywords
|
||||||
|
based on the current status. Writing 'update_schemes_stats' to
|
||||||
|
the file updates contents of schemes stats files of the
|
||||||
|
kdamond.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/pid
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the pid of the kdamond if it is
|
||||||
|
running.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/nr_contexts
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON context named '0' to
|
||||||
|
'N-1' under the contexts/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/operations
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a keyword for a monitoring operations set ('vaddr' for
|
||||||
|
virtual address spaces monitoring, and 'paddr' for the physical
|
||||||
|
address space monitoring) to this file makes the context to use
|
||||||
|
the operations set. Reading the file returns the keyword for
|
||||||
|
the operations set the context is set to use.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/sample_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the sampling interval of the
|
||||||
|
DAMON context in microseconds as the value. Reading this file
|
||||||
|
returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/aggr_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the aggregation interval of
|
||||||
|
the DAMON context in microseconds as the value. Reading this
|
||||||
|
file returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/intervals/update_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the update interval of the
|
||||||
|
DAMON context in microseconds as the value. Reading this file
|
||||||
|
returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/min
|
||||||
|
|
||||||
|
WDate: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the minimum number of
|
||||||
|
monitoring regions of the DAMON context as the value. Reading
|
||||||
|
this file returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/monitoring_attrs/nr_regions/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a value to this file sets the maximum number of
|
||||||
|
monitoring regions of the DAMON context as the value. Reading
|
||||||
|
this file returns the value.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/nr_targets
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON target of the context
|
||||||
|
named '0' to 'N-1' under the contexts/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/pid_target
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the pid of
|
||||||
|
the target process if the context is for virtual address spaces
|
||||||
|
monitoring, respectively.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/nr_regions
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for setting each DAMON target memory region of the
|
||||||
|
context named '0' to 'N-1' under the regions/ directory. In
|
||||||
|
case of the virtual address space monitoring, DAMON
|
||||||
|
automatically sets the target memory region based on the target
|
||||||
|
processes' mappings.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/start
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the start
|
||||||
|
address of the monitoring region.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/targets/<T>/regions/<R>/end
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the end
|
||||||
|
address of the monitoring region.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/nr_schemes
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing a number 'N' to this file creates the number of
|
||||||
|
directories for controlling each DAMON-based operation scheme
|
||||||
|
of the context named '0' to 'N-1' under the schemes/ directory.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/action
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the action
|
||||||
|
of the scheme.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/min
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the mimimum
|
||||||
|
size of the scheme's target regions in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/sz/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the maximum
|
||||||
|
size of the scheme's target regions in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/min
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the manimum
|
||||||
|
'nr_accesses' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/nr_accesses/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the maximum
|
||||||
|
'nr_accesses' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/min
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the minimum
|
||||||
|
'age' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/access_pattern/age/max
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the maximum
|
||||||
|
'age' of the scheme's target regions.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/ms
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the time
|
||||||
|
quota of the scheme in milliseconds.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/bytes
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the size
|
||||||
|
quota of the scheme in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/reset_interval_ms
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the quotas
|
||||||
|
charge reset interval of the scheme in milliseconds.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/sz_permil
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the
|
||||||
|
under-quota limit regions prioritization weight for 'size' in
|
||||||
|
permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/nr_accesses_permil
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the
|
||||||
|
under-quota limit regions prioritization weight for
|
||||||
|
'nr_accesses' in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/quotas/weights/age_permil
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the
|
||||||
|
under-quota limit regions prioritization weight for 'age' in
|
||||||
|
permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/metric
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the metric
|
||||||
|
of the watermarks for the scheme. The writable/readable
|
||||||
|
keywords for this file are 'none' for disabling the watermarks
|
||||||
|
feature, or 'free_mem_rate' for the system's global free memory
|
||||||
|
rate in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/interval_us
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the metric
|
||||||
|
check interval of the watermarks for the scheme in
|
||||||
|
microseconds.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/high
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the high
|
||||||
|
watermark of the scheme in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/mid
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the mid
|
||||||
|
watermark of the scheme in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/watermarks/low
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Writing to and reading from this file sets and gets the low
|
||||||
|
watermark of the scheme in permil.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_tried
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the number of regions that the action
|
||||||
|
of the scheme has tried to be applied.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_tried
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the total size of regions that the
|
||||||
|
action of the scheme has tried to be applied in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/nr_applied
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the number of regions that the action
|
||||||
|
of the scheme has successfully applied.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/sz_applied
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the total size of regions that the
|
||||||
|
action of the scheme has successfully applied in bytes.
|
||||||
|
|
||||||
|
What: /sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/stats/qt_exceeds
|
||||||
|
Date: Mar 2022
|
||||||
|
Contact: SeongJae Park <sj@kernel.org>
|
||||||
|
Description: Reading this file returns the number of the exceed events of
|
||||||
|
the scheme's quotas.
|
||||||
@@ -53,38 +53,6 @@ Description:
|
|||||||
(but some corrected errors might be still reported
|
(but some corrected errors might be still reported
|
||||||
in other ways)
|
in other ways)
|
||||||
|
|
||||||
What: /sys/devices/system/machinecheck/machinecheckX/tolerant
|
|
||||||
Contact: Andi Kleen <ak@linux.intel.com>
|
|
||||||
Date: Feb, 2007
|
|
||||||
Description:
|
|
||||||
The entries appear for each CPU, but they are truly shared
|
|
||||||
between all CPUs.
|
|
||||||
|
|
||||||
Tolerance level. When a machine check exception occurs for a
|
|
||||||
non corrected machine check the kernel can take different
|
|
||||||
actions.
|
|
||||||
|
|
||||||
Since machine check exceptions can happen any time it is
|
|
||||||
sometimes risky for the kernel to kill a process because it
|
|
||||||
defies normal kernel locking rules. The tolerance level
|
|
||||||
configures how hard the kernel tries to recover even at some
|
|
||||||
risk of deadlock. Higher tolerant values trade potentially
|
|
||||||
better uptime with the risk of a crash or even corruption
|
|
||||||
(for tolerant >= 3).
|
|
||||||
|
|
||||||
== ===========================================================
|
|
||||||
0 always panic on uncorrected errors, log corrected errors
|
|
||||||
1 panic or SIGBUS on uncorrected errors, log corrected errors
|
|
||||||
2 SIGBUS or log uncorrected errors, log corrected errors
|
|
||||||
3 never panic or SIGBUS, log all errors (for testing only)
|
|
||||||
== ===========================================================
|
|
||||||
|
|
||||||
Default: 1
|
|
||||||
|
|
||||||
Note this only makes a difference if the CPU allows recovery
|
|
||||||
from a machine check exception. Current x86 CPUs generally
|
|
||||||
do not.
|
|
||||||
|
|
||||||
What: /sys/devices/system/machinecheck/machinecheckX/trigger
|
What: /sys/devices/system/machinecheck/machinecheckX/trigger
|
||||||
Contact: Andi Kleen <ak@linux.intel.com>
|
Contact: Andi Kleen <ak@linux.intel.com>
|
||||||
Date: Feb, 2007
|
Date: Feb, 2007
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ Date: October 2018
|
|||||||
KernelVersion: 4.20
|
KernelVersion: 4.20
|
||||||
Contact: "Matan Ziv-Av <matan@svgalib.org>
|
Contact: "Matan Ziv-Av <matan@svgalib.org>
|
||||||
Description:
|
Description:
|
||||||
|
Deprecated use /sys/class/power_supply/CMB0/charge_control_end_threshold
|
||||||
Maximal battery charge level. Accepted values are 80 or 100.
|
Maximal battery charge level. Accepted values are 80 or 100.
|
||||||
|
|
||||||
What: /sys/devices/platform/lg-laptop/fan_mode
|
What: /sys/devices/platform/lg-laptop/fan_mode
|
||||||
|
|||||||
@@ -37,8 +37,15 @@ Description: (RO) Set of available destinations (sinks) for a SMA
|
|||||||
PPS2 signal is sent to the PPS2 selector
|
PPS2 signal is sent to the PPS2 selector
|
||||||
TS1 signal is sent to timestamper 1
|
TS1 signal is sent to timestamper 1
|
||||||
TS2 signal is sent to timestamper 2
|
TS2 signal is sent to timestamper 2
|
||||||
|
TS3 signal is sent to timestamper 3
|
||||||
|
TS4 signal is sent to timestamper 4
|
||||||
IRIG signal is sent to the IRIG-B module
|
IRIG signal is sent to the IRIG-B module
|
||||||
DCF signal is sent to the DCF module
|
DCF signal is sent to the DCF module
|
||||||
|
FREQ1 signal is sent to frequency counter 1
|
||||||
|
FREQ2 signal is sent to frequency counter 2
|
||||||
|
FREQ3 signal is sent to frequency counter 3
|
||||||
|
FREQ4 signal is sent to frequency counter 4
|
||||||
|
None signal input is disabled
|
||||||
===== ================================================
|
===== ================================================
|
||||||
|
|
||||||
What: /sys/class/timecard/ocpN/available_sma_outputs
|
What: /sys/class/timecard/ocpN/available_sma_outputs
|
||||||
@@ -50,10 +57,16 @@ Description: (RO) Set of available sources for a SMA output signal.
|
|||||||
10Mhz output is from the 10Mhz reference clock
|
10Mhz output is from the 10Mhz reference clock
|
||||||
PHC output PPS is from the PHC clock
|
PHC output PPS is from the PHC clock
|
||||||
MAC output PPS is from the Miniature Atomic Clock
|
MAC output PPS is from the Miniature Atomic Clock
|
||||||
GNSS output PPS is from the GNSS module
|
GNSS1 output PPS is from the first GNSS module
|
||||||
GNSS2 output PPS is from the second GNSS module
|
GNSS2 output PPS is from the second GNSS module
|
||||||
IRIG output is from the PHC, in IRIG-B format
|
IRIG output is from the PHC, in IRIG-B format
|
||||||
DCF output is from the PHC, in DCF format
|
DCF output is from the PHC, in DCF format
|
||||||
|
GEN1 output is from frequency generator 1
|
||||||
|
GEN2 output is from frequency generator 2
|
||||||
|
GEN3 output is from frequency generator 3
|
||||||
|
GEN4 output is from frequency generator 4
|
||||||
|
GND output is GND
|
||||||
|
VCC output is VCC
|
||||||
===== ================================================
|
===== ================================================
|
||||||
|
|
||||||
What: /sys/class/timecard/ocpN/clock_source
|
What: /sys/class/timecard/ocpN/clock_source
|
||||||
@@ -63,6 +76,97 @@ Description: (RW) Contains the current synchronization source used by
|
|||||||
the PHC. May be changed by writing one of the listed
|
the PHC. May be changed by writing one of the listed
|
||||||
values from the available_clock_sources attribute set.
|
values from the available_clock_sources attribute set.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/clock_status_drift
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Contains the current drift value used by the firmware
|
||||||
|
for internal disciplining of the atomic clock.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/clock_status_offset
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Contains the current offset value used by the firmware
|
||||||
|
for internal disciplining of the atomic clock.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/freqX
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Optional directory containing the sysfs nodes for
|
||||||
|
frequency counter <X>.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/freqX/frequency
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Contains the measured frequency over the specified
|
||||||
|
measurement period.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/freqX/seconds
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RW) Specifies the number of seconds from 0-255 that the
|
||||||
|
frequency should be measured over. Write 0 to disable.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Optional directory containing the sysfs nodes for
|
||||||
|
frequency generator <X>.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX/duty
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Specifies the signal duty cycle as a percentage from 1-99.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX/period
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Specifies the signal period in nanoseconds.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX/phase
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Specifies the signal phase offset in nanoseconds.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX/polarity
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Specifies the signal polarity, either 1 or 0.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX/running
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Either 0 or 1, showing if the signal generator is running.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX/start
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RO) Shows the time in <sec>.<nsec> that the signal generator
|
||||||
|
started running.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/genX/signal
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RW) Used to start the signal generator, and summarize
|
||||||
|
the current status.
|
||||||
|
|
||||||
|
The signal generator may be started by writing the signal
|
||||||
|
period, followed by the optional signal values. If the
|
||||||
|
optional values are not provided, they default to the current
|
||||||
|
settings, which may be obtained from the other sysfs nodes.
|
||||||
|
|
||||||
|
period [duty [phase [polarity]]]
|
||||||
|
|
||||||
|
echo 500000000 > signal # 1/2 second period
|
||||||
|
echo 1000000 40 100 > signal
|
||||||
|
echo 0 > signal # turn off generator
|
||||||
|
|
||||||
|
Period and phase are specified in nanoseconds. Duty cycle is
|
||||||
|
a percentage from 1-99. Polarity is 1 or 0.
|
||||||
|
|
||||||
|
Reading this node will return:
|
||||||
|
|
||||||
|
period duty phase polarity start_time
|
||||||
|
|
||||||
What: /sys/class/timecard/ocpN/gnss_sync
|
What: /sys/class/timecard/ocpN/gnss_sync
|
||||||
Date: September 2021
|
Date: September 2021
|
||||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
@@ -126,6 +230,16 @@ Description: (RW) These attributes specify the direction of the signal
|
|||||||
The 10Mhz reference clock input is currently only valid
|
The 10Mhz reference clock input is currently only valid
|
||||||
on SMA1 and may not be combined with other destination sinks.
|
on SMA1 and may not be combined with other destination sinks.
|
||||||
|
|
||||||
|
What: /sys/class/timecard/ocpN/tod_correction
|
||||||
|
Date: March 2022
|
||||||
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
Description: (RW) The incoming GNSS signal is in UTC time, and the NMEA
|
||||||
|
format messages do not provide a TAI offset. This sets the
|
||||||
|
correction value for the incoming time.
|
||||||
|
|
||||||
|
If UBX_LS is enabled, this should be 0, and the offset is
|
||||||
|
taken from the UBX-NAV-TIMELS message.
|
||||||
|
|
||||||
What: /sys/class/timecard/ocpN/ts_window_adjust
|
What: /sys/class/timecard/ocpN/ts_window_adjust
|
||||||
Date: September 2021
|
Date: September 2021
|
||||||
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
Contact: Jonathan Lemon <jonathan.lemon@gmail.com>
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ SPHINX_CONF = conf.py
|
|||||||
PAPER =
|
PAPER =
|
||||||
BUILDDIR = $(obj)/output
|
BUILDDIR = $(obj)/output
|
||||||
PDFLATEX = xelatex
|
PDFLATEX = xelatex
|
||||||
LATEXOPTS = -interaction=batchmode
|
LATEXOPTS = -interaction=batchmode -no-shell-escape
|
||||||
|
|
||||||
ifeq ($(KBUILD_VERBOSE),0)
|
ifeq ($(KBUILD_VERBOSE),0)
|
||||||
SPHINXOPTS += "-q"
|
SPHINXOPTS += "-q"
|
||||||
|
|||||||
@@ -278,20 +278,20 @@ appropriate parameters. In general this allows more efficient DMA
|
|||||||
on systems where System RAM exists above 4G _physical_ address.
|
on systems where System RAM exists above 4G _physical_ address.
|
||||||
|
|
||||||
Drivers for all PCI-X and PCIe compliant devices must call
|
Drivers for all PCI-X and PCIe compliant devices must call
|
||||||
pci_set_dma_mask() as they are 64-bit DMA devices.
|
set_dma_mask() as they are 64-bit DMA devices.
|
||||||
|
|
||||||
Similarly, drivers must also "register" this capability if the device
|
Similarly, drivers must also "register" this capability if the device
|
||||||
can directly address "consistent memory" in System RAM above 4G physical
|
can directly address "coherent memory" in System RAM above 4G physical
|
||||||
address by calling pci_set_consistent_dma_mask().
|
address by calling dma_set_coherent_mask().
|
||||||
Again, this includes drivers for all PCI-X and PCIe compliant devices.
|
Again, this includes drivers for all PCI-X and PCIe compliant devices.
|
||||||
Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
|
Many 64-bit "PCI" devices (before PCI-X) and some PCI-X devices are
|
||||||
64-bit DMA capable for payload ("streaming") data but not control
|
64-bit DMA capable for payload ("streaming") data but not control
|
||||||
("consistent") data.
|
("coherent") data.
|
||||||
|
|
||||||
|
|
||||||
Setup shared control data
|
Setup shared control data
|
||||||
-------------------------
|
-------------------------
|
||||||
Once the DMA masks are set, the driver can allocate "consistent" (a.k.a. shared)
|
Once the DMA masks are set, the driver can allocate "coherent" (a.k.a. shared)
|
||||||
memory. See Documentation/core-api/dma-api.rst for a full description of
|
memory. See Documentation/core-api/dma-api.rst for a full description of
|
||||||
the DMA APIs. This section is just a reminder that it needs to be done
|
the DMA APIs. This section is just a reminder that it needs to be done
|
||||||
before enabling DMA on the device.
|
before enabling DMA on the device.
|
||||||
@@ -367,7 +367,7 @@ steps need to be performed:
|
|||||||
- Disable the device from generating IRQs
|
- Disable the device from generating IRQs
|
||||||
- Release the IRQ (free_irq())
|
- Release the IRQ (free_irq())
|
||||||
- Stop all DMA activity
|
- Stop all DMA activity
|
||||||
- Release DMA buffers (both streaming and consistent)
|
- Release DMA buffers (both streaming and coherent)
|
||||||
- Unregister from other subsystems (e.g. scsi or netdev)
|
- Unregister from other subsystems (e.g. scsi or netdev)
|
||||||
- Disable device from responding to MMIO/IO Port addresses
|
- Disable device from responding to MMIO/IO Port addresses
|
||||||
- Release MMIO/IO Port resource(s)
|
- Release MMIO/IO Port resource(s)
|
||||||
@@ -420,7 +420,7 @@ Once DMA is stopped, clean up streaming DMA first.
|
|||||||
I.e. unmap data buffers and return buffers to "upstream"
|
I.e. unmap data buffers and return buffers to "upstream"
|
||||||
owners if there is one.
|
owners if there is one.
|
||||||
|
|
||||||
Then clean up "consistent" buffers which contain the control data.
|
Then clean up "coherent" buffers which contain the control data.
|
||||||
|
|
||||||
See Documentation/core-api/dma-api.rst for details on unmapping interfaces.
|
See Documentation/core-api/dma-api.rst for details on unmapping interfaces.
|
||||||
|
|
||||||
|
|||||||
@@ -60,3 +60,31 @@ For example::
|
|||||||
|
|
||||||
When a given field is not populated or its value provided by the platform
|
When a given field is not populated or its value provided by the platform
|
||||||
firmware is invalid, the "not-defined" string is shown instead of the value.
|
firmware is invalid, the "not-defined" string is shown instead of the value.
|
||||||
|
|
||||||
|
ACPI Fan Fine Grain Control
|
||||||
|
=============================
|
||||||
|
|
||||||
|
When _FIF object specifies support for fine grain control, then fan speed
|
||||||
|
can be set from 0 to 100% with the recommended minimum "step size" via
|
||||||
|
_FSL object. User can adjust fan speed using thermal sysfs cooling device.
|
||||||
|
|
||||||
|
Here use can look at fan performance states for a reference speed (speed_rpm)
|
||||||
|
and set it by changing cooling device cur_state. If the fine grain control
|
||||||
|
is supported then user can also adjust to some other speeds which are
|
||||||
|
not defined in the performance states.
|
||||||
|
|
||||||
|
The support of fine grain control is presented via sysfs attribute
|
||||||
|
"fine_grain_control". If fine grain control is present, this attribute
|
||||||
|
will show "1" otherwise "0".
|
||||||
|
|
||||||
|
This sysfs attribute is presented in the same directory as performance states.
|
||||||
|
|
||||||
|
ACPI Fan Performance Feedback
|
||||||
|
=============================
|
||||||
|
|
||||||
|
The optional _FST object provides status information for the fan device.
|
||||||
|
This includes field to provide current fan speed in revolutions per minute
|
||||||
|
at which the fan is rotating.
|
||||||
|
|
||||||
|
This speed is presented in the sysfs using the attribute "fan_speed_rpm",
|
||||||
|
in the same directory as performance states.
|
||||||
|
|||||||
@@ -315,8 +315,8 @@ To use the feature, admin should set up backing device via::
|
|||||||
|
|
||||||
echo /dev/sda5 > /sys/block/zramX/backing_dev
|
echo /dev/sda5 > /sys/block/zramX/backing_dev
|
||||||
|
|
||||||
before disksize setting. It supports only partition at this moment.
|
before disksize setting. It supports only partitions at this moment.
|
||||||
If admin wants to use incompressible page writeback, they could do via::
|
If admin wants to use incompressible page writeback, they could do it via::
|
||||||
|
|
||||||
echo huge > /sys/block/zramX/writeback
|
echo huge > /sys/block/zramX/writeback
|
||||||
|
|
||||||
@@ -341,9 +341,9 @@ Admin can request writeback of those idle pages at right timing via::
|
|||||||
|
|
||||||
echo idle > /sys/block/zramX/writeback
|
echo idle > /sys/block/zramX/writeback
|
||||||
|
|
||||||
With the command, zram writeback idle pages from memory to the storage.
|
With the command, zram will writeback idle pages from memory to the storage.
|
||||||
|
|
||||||
If admin want to write a specific page in zram device to backing device,
|
If an admin wants to write a specific page in zram device to the backing device,
|
||||||
they could write a page index into the interface.
|
they could write a page index into the interface.
|
||||||
|
|
||||||
echo "page_index=1251" > /sys/block/zramX/writeback
|
echo "page_index=1251" > /sys/block/zramX/writeback
|
||||||
@@ -354,7 +354,7 @@ to guarantee storage health for entire product life.
|
|||||||
|
|
||||||
To overcome the concern, zram supports "writeback_limit" feature.
|
To overcome the concern, zram supports "writeback_limit" feature.
|
||||||
The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
|
The "writeback_limit_enable"'s default value is 0 so that it doesn't limit
|
||||||
any writeback. IOW, if admin wants to apply writeback budget, he should
|
any writeback. IOW, if admin wants to apply writeback budget, they should
|
||||||
enable writeback_limit_enable via::
|
enable writeback_limit_enable via::
|
||||||
|
|
||||||
$ echo 1 > /sys/block/zramX/writeback_limit_enable
|
$ echo 1 > /sys/block/zramX/writeback_limit_enable
|
||||||
@@ -365,7 +365,7 @@ until admin sets the budget via /sys/block/zramX/writeback_limit.
|
|||||||
(If admin doesn't enable writeback_limit_enable, writeback_limit's value
|
(If admin doesn't enable writeback_limit_enable, writeback_limit's value
|
||||||
assigned via /sys/block/zramX/writeback_limit is meaningless.)
|
assigned via /sys/block/zramX/writeback_limit is meaningless.)
|
||||||
|
|
||||||
If admin want to limit writeback as per-day 400M, he could do it
|
If admin wants to limit writeback as per-day 400M, they could do it
|
||||||
like below::
|
like below::
|
||||||
|
|
||||||
$ MB_SHIFT=20
|
$ MB_SHIFT=20
|
||||||
@@ -375,16 +375,16 @@ like below::
|
|||||||
$ echo 1 > /sys/block/zram0/writeback_limit_enable
|
$ echo 1 > /sys/block/zram0/writeback_limit_enable
|
||||||
|
|
||||||
If admins want to allow further write again once the budget is exhausted,
|
If admins want to allow further write again once the budget is exhausted,
|
||||||
he could do it like below::
|
they could do it like below::
|
||||||
|
|
||||||
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
$ echo $((400<<MB_SHIFT>>4K_SHIFT)) > \
|
||||||
/sys/block/zram0/writeback_limit
|
/sys/block/zram0/writeback_limit
|
||||||
|
|
||||||
If admin wants to see remaining writeback budget since last set::
|
If an admin wants to see the remaining writeback budget since last set::
|
||||||
|
|
||||||
$ cat /sys/block/zramX/writeback_limit
|
$ cat /sys/block/zramX/writeback_limit
|
||||||
|
|
||||||
If admin want to disable writeback limit, he could do::
|
If an admin wants to disable writeback limit, they could do::
|
||||||
|
|
||||||
$ echo 0 > /sys/block/zramX/writeback_limit_enable
|
$ echo 0 > /sys/block/zramX/writeback_limit_enable
|
||||||
|
|
||||||
@@ -393,7 +393,7 @@ system reboot, echo 1 > /sys/block/zramX/reset) so keeping how many of
|
|||||||
writeback happened until you reset the zram to allocate extra writeback
|
writeback happened until you reset the zram to allocate extra writeback
|
||||||
budget in next setting is user's job.
|
budget in next setting is user's job.
|
||||||
|
|
||||||
If admin wants to measure writeback count in a certain period, he could
|
If admin wants to measure writeback count in a certain period, they could
|
||||||
know it via /sys/block/zram0/bd_stat's 3rd column.
|
know it via /sys/block/zram0/bd_stat's 3rd column.
|
||||||
|
|
||||||
memory tracking
|
memory tracking
|
||||||
|
|||||||
@@ -64,6 +64,7 @@ Brief summary of control files.
|
|||||||
threads
|
threads
|
||||||
cgroup.procs show list of processes
|
cgroup.procs show list of processes
|
||||||
cgroup.event_control an interface for event_fd()
|
cgroup.event_control an interface for event_fd()
|
||||||
|
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||||
memory.usage_in_bytes show current usage for memory
|
memory.usage_in_bytes show current usage for memory
|
||||||
(See 5.5 for details)
|
(See 5.5 for details)
|
||||||
memory.memsw.usage_in_bytes show current usage for memory+Swap
|
memory.memsw.usage_in_bytes show current usage for memory+Swap
|
||||||
@@ -75,6 +76,7 @@ Brief summary of control files.
|
|||||||
memory.max_usage_in_bytes show max memory usage recorded
|
memory.max_usage_in_bytes show max memory usage recorded
|
||||||
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
memory.memsw.max_usage_in_bytes show max memory+Swap usage recorded
|
||||||
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
memory.soft_limit_in_bytes set/show soft limit of memory usage
|
||||||
|
This knob is not available on CONFIG_PREEMPT_RT systems.
|
||||||
memory.stat show various statistics
|
memory.stat show various statistics
|
||||||
memory.use_hierarchy set/show hierarchical account enabled
|
memory.use_hierarchy set/show hierarchical account enabled
|
||||||
This knob is deprecated and shouldn't be
|
This knob is deprecated and shouldn't be
|
||||||
|
|||||||
@@ -1301,6 +1301,11 @@ PAGE_SIZE multiple when read back.
|
|||||||
Amount of memory used to cache filesystem data,
|
Amount of memory used to cache filesystem data,
|
||||||
including tmpfs and shared memory.
|
including tmpfs and shared memory.
|
||||||
|
|
||||||
|
kernel (npn)
|
||||||
|
Amount of total kernel memory, including
|
||||||
|
(kernel_stack, pagetables, percpu, vmalloc, slab) in
|
||||||
|
addition to other kernel memory use cases.
|
||||||
|
|
||||||
kernel_stack
|
kernel_stack
|
||||||
Amount of memory allocated to kernel stacks.
|
Amount of memory allocated to kernel stacks.
|
||||||
|
|
||||||
|
|||||||
@@ -60,8 +60,8 @@ privileged data touched during the speculative execution.
|
|||||||
Spectre variant 1 attacks take advantage of speculative execution of
|
Spectre variant 1 attacks take advantage of speculative execution of
|
||||||
conditional branches, while Spectre variant 2 attacks use speculative
|
conditional branches, while Spectre variant 2 attacks use speculative
|
||||||
execution of indirect branches to leak privileged memory.
|
execution of indirect branches to leak privileged memory.
|
||||||
See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
|
See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[6] <spec_ref6>`
|
||||||
:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
|
:ref:`[7] <spec_ref7>` :ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
|
||||||
|
|
||||||
Spectre variant 1 (Bounds Check Bypass)
|
Spectre variant 1 (Bounds Check Bypass)
|
||||||
---------------------------------------
|
---------------------------------------
|
||||||
@@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the
|
|||||||
speculative execution's side effects left in level 1 cache to infer the
|
speculative execution's side effects left in level 1 cache to infer the
|
||||||
victim's data.
|
victim's data.
|
||||||
|
|
||||||
|
Yet another variant 2 attack vector is for the attacker to poison the
|
||||||
|
Branch History Buffer (BHB) to speculatively steer an indirect branch
|
||||||
|
to a specific Branch Target Buffer (BTB) entry, even if the entry isn't
|
||||||
|
associated with the source address of the indirect branch. Specifically,
|
||||||
|
the BHB might be shared across privilege levels even in the presence of
|
||||||
|
Enhanced IBRS.
|
||||||
|
|
||||||
|
Currently the only known real-world BHB attack vector is via
|
||||||
|
unprivileged eBPF. Therefore, it's highly recommended to not enable
|
||||||
|
unprivileged eBPF, especially when eIBRS is used (without retpolines).
|
||||||
|
For a full mitigation against BHB attacks, it's recommended to use
|
||||||
|
retpolines (or eIBRS combined with retpolines).
|
||||||
|
|
||||||
Attack scenarios
|
Attack scenarios
|
||||||
----------------
|
----------------
|
||||||
|
|
||||||
@@ -364,13 +377,15 @@ The possible values in this file are:
|
|||||||
|
|
||||||
- Kernel status:
|
- Kernel status:
|
||||||
|
|
||||||
==================================== =================================
|
======================================== =================================
|
||||||
'Not affected' The processor is not vulnerable
|
'Not affected' The processor is not vulnerable
|
||||||
'Vulnerable' Vulnerable, no mitigation
|
'Mitigation: None' Vulnerable, no mitigation
|
||||||
'Mitigation: Full generic retpoline' Software-focused mitigation
|
'Mitigation: Retpolines' Use Retpoline thunks
|
||||||
'Mitigation: Full AMD retpoline' AMD-specific software mitigation
|
'Mitigation: LFENCE' Use LFENCE instructions
|
||||||
'Mitigation: Enhanced IBRS' Hardware-focused mitigation
|
'Mitigation: Enhanced IBRS' Hardware-focused mitigation
|
||||||
==================================== =================================
|
'Mitigation: Enhanced IBRS + Retpolines' Hardware-focused + Retpolines
|
||||||
|
'Mitigation: Enhanced IBRS + LFENCE' Hardware-focused + LFENCE
|
||||||
|
======================================== =================================
|
||||||
|
|
||||||
- Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
|
- Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
|
||||||
used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
|
used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
|
||||||
@@ -583,12 +598,13 @@ kernel command line.
|
|||||||
|
|
||||||
Specific mitigations can also be selected manually:
|
Specific mitigations can also be selected manually:
|
||||||
|
|
||||||
retpoline
|
retpoline auto pick between generic,lfence
|
||||||
replace indirect branches
|
retpoline,generic Retpolines
|
||||||
retpoline,generic
|
retpoline,lfence LFENCE; indirect branch
|
||||||
google's original retpoline
|
retpoline,amd alias for retpoline,lfence
|
||||||
retpoline,amd
|
eibrs enhanced IBRS
|
||||||
AMD-specific minimal thunk
|
eibrs,retpoline enhanced IBRS + Retpolines
|
||||||
|
eibrs,lfence enhanced IBRS + LFENCE
|
||||||
|
|
||||||
Not specifying this option is equivalent to
|
Not specifying this option is equivalent to
|
||||||
spectre_v2=auto.
|
spectre_v2=auto.
|
||||||
@@ -599,7 +615,7 @@ kernel command line.
|
|||||||
spectre_v2=off. Spectre variant 1 mitigations
|
spectre_v2=off. Spectre variant 1 mitigations
|
||||||
cannot be disabled.
|
cannot be disabled.
|
||||||
|
|
||||||
For spectre_v2_user see :doc:`/admin-guide/kernel-parameters`.
|
For spectre_v2_user see Documentation/admin-guide/kernel-parameters.txt
|
||||||
|
|
||||||
Mitigation selection guide
|
Mitigation selection guide
|
||||||
--------------------------
|
--------------------------
|
||||||
@@ -681,7 +697,7 @@ AMD white papers:
|
|||||||
|
|
||||||
.. _spec_ref6:
|
.. _spec_ref6:
|
||||||
|
|
||||||
[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
|
[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf>`_.
|
||||||
|
|
||||||
ARM white papers:
|
ARM white papers:
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ problems and bugs in particular.
|
|||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
reporting-issues
|
reporting-issues
|
||||||
|
reporting-regressions
|
||||||
security-bugs
|
security-bugs
|
||||||
bug-hunting
|
bug-hunting
|
||||||
bug-bisect
|
bug-bisect
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ Field 3 -- # of sectors read (unsigned long)
|
|||||||
|
|
||||||
Field 4 -- # of milliseconds spent reading (unsigned int)
|
Field 4 -- # of milliseconds spent reading (unsigned int)
|
||||||
This is the total number of milliseconds spent by all reads (as
|
This is the total number of milliseconds spent by all reads (as
|
||||||
measured from __make_request() to end_that_request_last()).
|
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||||
|
|
||||||
Field 5 -- # of writes completed (unsigned long)
|
Field 5 -- # of writes completed (unsigned long)
|
||||||
This is the total number of writes completed successfully.
|
This is the total number of writes completed successfully.
|
||||||
@@ -89,7 +89,7 @@ Field 7 -- # of sectors written (unsigned long)
|
|||||||
|
|
||||||
Field 8 -- # of milliseconds spent writing (unsigned int)
|
Field 8 -- # of milliseconds spent writing (unsigned int)
|
||||||
This is the total number of milliseconds spent by all writes (as
|
This is the total number of milliseconds spent by all writes (as
|
||||||
measured from __make_request() to end_that_request_last()).
|
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||||
|
|
||||||
Field 9 -- # of I/Os currently in progress (unsigned int)
|
Field 9 -- # of I/Os currently in progress (unsigned int)
|
||||||
The only field that should go to zero. Incremented as requests are
|
The only field that should go to zero. Incremented as requests are
|
||||||
@@ -120,7 +120,7 @@ Field 14 -- # of sectors discarded (unsigned long)
|
|||||||
|
|
||||||
Field 15 -- # of milliseconds spent discarding (unsigned int)
|
Field 15 -- # of milliseconds spent discarding (unsigned int)
|
||||||
This is the total number of milliseconds spent by all discards (as
|
This is the total number of milliseconds spent by all discards (as
|
||||||
measured from __make_request() to end_that_request_last()).
|
measured from blk_mq_alloc_request() to __blk_mq_end_request()).
|
||||||
|
|
||||||
Field 16 -- # of flush requests completed
|
Field 16 -- # of flush requests completed
|
||||||
This is the total number of flush requests completed successfully.
|
This is the total number of flush requests completed successfully.
|
||||||
|
|||||||
@@ -146,9 +146,9 @@ System kernel config options
|
|||||||
CONFIG_SYSFS=y
|
CONFIG_SYSFS=y
|
||||||
|
|
||||||
Note that "sysfs file system support" might not appear in the "Pseudo
|
Note that "sysfs file system support" might not appear in the "Pseudo
|
||||||
filesystems" menu if "Configure standard kernel features (for small
|
filesystems" menu if "Configure standard kernel features (expert users)"
|
||||||
systems)" is not enabled in "General Setup." In this case, check the
|
is not enabled in "General Setup." In this case, check the .config file
|
||||||
.config file itself to ensure that sysfs is turned on, as follows::
|
itself to ensure that sysfs is turned on, as follows::
|
||||||
|
|
||||||
grep 'CONFIG_SYSFS' .config
|
grep 'CONFIG_SYSFS' .config
|
||||||
|
|
||||||
@@ -533,6 +533,10 @@ the following command::
|
|||||||
|
|
||||||
cp /proc/vmcore <dump-file>
|
cp /proc/vmcore <dump-file>
|
||||||
|
|
||||||
|
or use scp to write out the dump file between hosts on a network, e.g::
|
||||||
|
|
||||||
|
scp /proc/vmcore remote_username@remote_ip:<dump-file>
|
||||||
|
|
||||||
You can also use makedumpfile utility to write out the dump file
|
You can also use makedumpfile utility to write out the dump file
|
||||||
with specified options to filter out unwanted contents, e.g::
|
with specified options to filter out unwanted contents, e.g::
|
||||||
|
|
||||||
|
|||||||
@@ -494,6 +494,14 @@ architecture which is used to lookup the page-tables for the Virtual
|
|||||||
addresses in the higher VA range (refer to ARMv8 ARM document for
|
addresses in the higher VA range (refer to ARMv8 ARM document for
|
||||||
more details).
|
more details).
|
||||||
|
|
||||||
|
MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END
|
||||||
|
-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Used to get the correct ranges:
|
||||||
|
MODULES_VADDR ~ MODULES_END-1 : Kernel module space.
|
||||||
|
VMALLOC_START ~ VMALLOC_END-1 : vmalloc() / ioremap() space.
|
||||||
|
VMEMMAP_START ~ VMEMMAP_END-1 : vmemmap region, used for struct page array.
|
||||||
|
|
||||||
arm
|
arm
|
||||||
===
|
===
|
||||||
|
|
||||||
|
|||||||
@@ -724,6 +724,12 @@
|
|||||||
hvc<n> Use the hypervisor console device <n>. This is for
|
hvc<n> Use the hypervisor console device <n>. This is for
|
||||||
both Xen and PowerPC hypervisors.
|
both Xen and PowerPC hypervisors.
|
||||||
|
|
||||||
|
{ null | "" }
|
||||||
|
Use to disable console output, i.e., to have kernel
|
||||||
|
console messages discarded.
|
||||||
|
This must be the only console= parameter used on the
|
||||||
|
kernel command line.
|
||||||
|
|
||||||
If the device connected to the port is not a TTY but a braille
|
If the device connected to the port is not a TTY but a braille
|
||||||
device, prepend "brl," before the device type, for instance
|
device, prepend "brl," before the device type, for instance
|
||||||
console=brl,ttyS0
|
console=brl,ttyS0
|
||||||
@@ -944,6 +950,30 @@
|
|||||||
dump out devices still on the deferred probe list after
|
dump out devices still on the deferred probe list after
|
||||||
retrying.
|
retrying.
|
||||||
|
|
||||||
|
dell_smm_hwmon.ignore_dmi=
|
||||||
|
[HW] Continue probing hardware even if DMI data
|
||||||
|
indicates that the driver is running on unsupported
|
||||||
|
hardware.
|
||||||
|
|
||||||
|
dell_smm_hwmon.force=
|
||||||
|
[HW] Activate driver even if SMM BIOS signature does
|
||||||
|
not match list of supported models and enable otherwise
|
||||||
|
blacklisted features.
|
||||||
|
|
||||||
|
dell_smm_hwmon.power_status=
|
||||||
|
[HW] Report power status in /proc/i8k
|
||||||
|
(disabled by default).
|
||||||
|
|
||||||
|
dell_smm_hwmon.restricted=
|
||||||
|
[HW] Allow controlling fans only if SYS_ADMIN
|
||||||
|
capability is set.
|
||||||
|
|
||||||
|
dell_smm_hwmon.fan_mult=
|
||||||
|
[HW] Factor to multiply fan speed with.
|
||||||
|
|
||||||
|
dell_smm_hwmon.fan_max=
|
||||||
|
[HW] Maximum configurable fan speed.
|
||||||
|
|
||||||
dfltcc= [HW,S390]
|
dfltcc= [HW,S390]
|
||||||
Format: { on | off | def_only | inf_only | always }
|
Format: { on | off | def_only | inf_only | always }
|
||||||
on: s390 zlib hardware support for compression on
|
on: s390 zlib hardware support for compression on
|
||||||
@@ -1435,6 +1465,14 @@
|
|||||||
as early as possible in order to facilitate early
|
as early as possible in order to facilitate early
|
||||||
boot debugging.
|
boot debugging.
|
||||||
|
|
||||||
|
ftrace_boot_snapshot
|
||||||
|
[FTRACE] On boot up, a snapshot will be taken of the
|
||||||
|
ftrace ring buffer that can be read at:
|
||||||
|
/sys/kernel/tracing/snapshot.
|
||||||
|
This is useful if you need tracing information from kernel
|
||||||
|
boot up that is likely to be overridden by user space
|
||||||
|
start up functionality.
|
||||||
|
|
||||||
ftrace_dump_on_oops[=orig_cpu]
|
ftrace_dump_on_oops[=orig_cpu]
|
||||||
[FTRACE] will dump the trace buffers on oops.
|
[FTRACE] will dump the trace buffers on oops.
|
||||||
If no parameter is passed, ftrace will dump
|
If no parameter is passed, ftrace will dump
|
||||||
@@ -1625,7 +1663,7 @@
|
|||||||
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
|
[KNL] Reguires CONFIG_HUGETLB_PAGE_FREE_VMEMMAP
|
||||||
enabled.
|
enabled.
|
||||||
Allows heavy hugetlb users to free up some more
|
Allows heavy hugetlb users to free up some more
|
||||||
memory (6 * PAGE_SIZE for each 2MB hugetlb page).
|
memory (7 * PAGE_SIZE for each 2MB hugetlb page).
|
||||||
Format: { on | off (default) }
|
Format: { on | off (default) }
|
||||||
|
|
||||||
on: enable the feature
|
on: enable the feature
|
||||||
@@ -1703,17 +1741,6 @@
|
|||||||
|
|
||||||
i810= [HW,DRM]
|
i810= [HW,DRM]
|
||||||
|
|
||||||
i8k.ignore_dmi [HW] Continue probing hardware even if DMI data
|
|
||||||
indicates that the driver is running on unsupported
|
|
||||||
hardware.
|
|
||||||
i8k.force [HW] Activate i8k driver even if SMM BIOS signature
|
|
||||||
does not match list of supported models.
|
|
||||||
i8k.power_status
|
|
||||||
[HW] Report power status in /proc/i8k
|
|
||||||
(disabled by default)
|
|
||||||
i8k.restricted [HW] Allow controlling fans only if SYS_ADMIN
|
|
||||||
capability is set.
|
|
||||||
|
|
||||||
i915.invert_brightness=
|
i915.invert_brightness=
|
||||||
[DRM] Invert the sense of the variable that is used to
|
[DRM] Invert the sense of the variable that is used to
|
||||||
set the brightness of the panel backlight. Normally a
|
set the brightness of the panel backlight. Normally a
|
||||||
@@ -2339,13 +2366,35 @@
|
|||||||
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
kvm.ignore_msrs=[KVM] Ignore guest accesses to unhandled MSRs.
|
||||||
Default is 0 (don't ignore, but inject #GP)
|
Default is 0 (don't ignore, but inject #GP)
|
||||||
|
|
||||||
|
kvm.eager_page_split=
|
||||||
|
[KVM,X86] Controls whether or not KVM will try to
|
||||||
|
proactively split all huge pages during dirty logging.
|
||||||
|
Eager page splitting reduces interruptions to vCPU
|
||||||
|
execution by eliminating the write-protection faults
|
||||||
|
and MMU lock contention that would otherwise be
|
||||||
|
required to split huge pages lazily.
|
||||||
|
|
||||||
|
VM workloads that rarely perform writes or that write
|
||||||
|
only to a small region of VM memory may benefit from
|
||||||
|
disabling eager page splitting to allow huge pages to
|
||||||
|
still be used for reads.
|
||||||
|
|
||||||
|
The behavior of eager page splitting depends on whether
|
||||||
|
KVM_DIRTY_LOG_INITIALLY_SET is enabled or disabled. If
|
||||||
|
disabled, all huge pages in a memslot will be eagerly
|
||||||
|
split when dirty logging is enabled on that memslot. If
|
||||||
|
enabled, eager page splitting will be performed during
|
||||||
|
the KVM_CLEAR_DIRTY ioctl, and only for the pages being
|
||||||
|
cleared.
|
||||||
|
|
||||||
|
Eager page splitting currently only supports splitting
|
||||||
|
huge pages mapped by the TDP MMU.
|
||||||
|
|
||||||
|
Default is Y (on).
|
||||||
|
|
||||||
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
kvm.enable_vmware_backdoor=[KVM] Support VMware backdoor PV interface.
|
||||||
Default is false (don't support).
|
Default is false (don't support).
|
||||||
|
|
||||||
kvm.mmu_audit= [KVM] This is a R/W parameter which allows audit
|
|
||||||
KVM MMU at runtime.
|
|
||||||
Default is 0 (off)
|
|
||||||
|
|
||||||
kvm.nx_huge_pages=
|
kvm.nx_huge_pages=
|
||||||
[KVM] Controls the software workaround for the
|
[KVM] Controls the software workaround for the
|
||||||
X86_BUG_ITLB_MULTIHIT bug.
|
X86_BUG_ITLB_MULTIHIT bug.
|
||||||
@@ -2827,6 +2876,9 @@
|
|||||||
|
|
||||||
For details see: Documentation/admin-guide/hw-vuln/mds.rst
|
For details see: Documentation/admin-guide/hw-vuln/mds.rst
|
||||||
|
|
||||||
|
mem=nn[KMG] [HEXAGON] Set the memory size.
|
||||||
|
Must be specified, otherwise memory size will be 0.
|
||||||
|
|
||||||
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
|
mem=nn[KMG] [KNL,BOOT] Force usage of a specific amount of memory
|
||||||
Amount of memory to be used in cases as follows:
|
Amount of memory to be used in cases as follows:
|
||||||
|
|
||||||
@@ -2834,6 +2886,13 @@
|
|||||||
2 when the kernel is not able to see the whole system memory;
|
2 when the kernel is not able to see the whole system memory;
|
||||||
3 memory that lies after 'mem=' boundary is excluded from
|
3 memory that lies after 'mem=' boundary is excluded from
|
||||||
the hypervisor, then assigned to KVM guests.
|
the hypervisor, then assigned to KVM guests.
|
||||||
|
4 to limit the memory available for kdump kernel.
|
||||||
|
|
||||||
|
[ARC,MICROBLAZE] - the limit applies only to low memory,
|
||||||
|
high memory is not affected.
|
||||||
|
|
||||||
|
[ARM64] - only limits memory covered by the linear
|
||||||
|
mapping. The NOMAP regions are not affected.
|
||||||
|
|
||||||
[X86] Work as limiting max address. Use together
|
[X86] Work as limiting max address. Use together
|
||||||
with memmap= to avoid physical address space collisions.
|
with memmap= to avoid physical address space collisions.
|
||||||
@@ -2844,6 +2903,14 @@
|
|||||||
in above case 3, memory may need be hot added after boot
|
in above case 3, memory may need be hot added after boot
|
||||||
if system memory of hypervisor is not sufficient.
|
if system memory of hypervisor is not sufficient.
|
||||||
|
|
||||||
|
mem=nn[KMG]@ss[KMG]
|
||||||
|
[ARM,MIPS] - override the memory layout reported by
|
||||||
|
firmware.
|
||||||
|
Define a memory region of size nn[KMG] starting at
|
||||||
|
ss[KMG].
|
||||||
|
Multiple different regions can be specified with
|
||||||
|
multiple mem= parameters on the command line.
|
||||||
|
|
||||||
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
mem=nopentium [BUGS=X86-32] Disable usage of 4MB pages for kernel
|
||||||
memory.
|
memory.
|
||||||
|
|
||||||
@@ -3485,8 +3552,7 @@
|
|||||||
difficult since unequal pointers can no longer be
|
difficult since unequal pointers can no longer be
|
||||||
compared. However, if this command-line option is
|
compared. However, if this command-line option is
|
||||||
specified, then all normal pointers will have their true
|
specified, then all normal pointers will have their true
|
||||||
value printed. Pointers printed via %pK may still be
|
value printed. This option should only be specified when
|
||||||
hashed. This option should only be specified when
|
|
||||||
debugging the kernel. Please do not use on production
|
debugging the kernel. Please do not use on production
|
||||||
kernels.
|
kernels.
|
||||||
|
|
||||||
@@ -3726,6 +3792,11 @@
|
|||||||
bit 3: print locks info if CONFIG_LOCKDEP is on
|
bit 3: print locks info if CONFIG_LOCKDEP is on
|
||||||
bit 4: print ftrace buffer
|
bit 4: print ftrace buffer
|
||||||
bit 5: print all printk messages in buffer
|
bit 5: print all printk messages in buffer
|
||||||
|
bit 6: print all CPUs backtrace (if available in the arch)
|
||||||
|
*Be aware* that this option may print a _lot_ of lines,
|
||||||
|
so there are risks of losing older messages in the log.
|
||||||
|
Use this option carefully, maybe worth to setup a
|
||||||
|
bigger log buffer with "log_buf_len" along with this.
|
||||||
|
|
||||||
panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
|
panic_on_taint= Bitmask for conditionally calling panic() in add_taint()
|
||||||
Format: <hex>[,nousertaint]
|
Format: <hex>[,nousertaint]
|
||||||
@@ -4356,6 +4427,12 @@
|
|||||||
fully seed the kernel's CRNG. Default is controlled
|
fully seed the kernel's CRNG. Default is controlled
|
||||||
by CONFIG_RANDOM_TRUST_CPU.
|
by CONFIG_RANDOM_TRUST_CPU.
|
||||||
|
|
||||||
|
random.trust_bootloader={on,off}
|
||||||
|
[KNL] Enable or disable trusting the use of a
|
||||||
|
seed passed by the bootloader (if available) to
|
||||||
|
fully seed the kernel's CRNG. Default is controlled
|
||||||
|
by CONFIG_RANDOM_TRUST_BOOTLOADER.
|
||||||
|
|
||||||
randomize_kstack_offset=
|
randomize_kstack_offset=
|
||||||
[KNL] Enable or disable kernel stack offset
|
[KNL] Enable or disable kernel stack offset
|
||||||
randomization, which provides roughly 5 bits of
|
randomization, which provides roughly 5 bits of
|
||||||
@@ -4504,6 +4581,8 @@
|
|||||||
(the least-favored priority). Otherwise, when
|
(the least-favored priority). Otherwise, when
|
||||||
RCU_BOOST is not set, valid values are 0-99 and
|
RCU_BOOST is not set, valid values are 0-99 and
|
||||||
the default is zero (non-realtime operation).
|
the default is zero (non-realtime operation).
|
||||||
|
When RCU_NOCB_CPU is set, also adjust the
|
||||||
|
priority of NOCB callback kthreads.
|
||||||
|
|
||||||
rcutree.rcu_nocb_gp_stride= [KNL]
|
rcutree.rcu_nocb_gp_stride= [KNL]
|
||||||
Set the number of NOCB callback kthreads in
|
Set the number of NOCB callback kthreads in
|
||||||
@@ -5361,8 +5440,12 @@
|
|||||||
Specific mitigations can also be selected manually:
|
Specific mitigations can also be selected manually:
|
||||||
|
|
||||||
retpoline - replace indirect branches
|
retpoline - replace indirect branches
|
||||||
retpoline,generic - google's original retpoline
|
retpoline,generic - Retpolines
|
||||||
retpoline,amd - AMD-specific minimal thunk
|
retpoline,lfence - LFENCE; indirect branch
|
||||||
|
retpoline,amd - alias for retpoline,lfence
|
||||||
|
eibrs - enhanced IBRS
|
||||||
|
eibrs,retpoline - enhanced IBRS + Retpolines
|
||||||
|
eibrs,lfence - enhanced IBRS + LFENCE
|
||||||
|
|
||||||
Not specifying this option is equivalent to
|
Not specifying this option is equivalent to
|
||||||
spectre_v2=auto.
|
spectre_v2=auto.
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ FN lock.
|
|||||||
Battery care limit
|
Battery care limit
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
Writing 80/100 to /sys/devices/platform/lg-laptop/battery_care_limit
|
Writing 80/100 to /sys/class/power_supply/CMB0/charge_control_end_threshold
|
||||||
sets the maximum capacity to charge the battery. Limiting the charge
|
sets the maximum capacity to charge the battery. Limiting the charge
|
||||||
reduces battery capacity loss over time.
|
reduces battery capacity loss over time.
|
||||||
|
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ data from LCD controller (FIMD) through the SoC internal writeback data
|
|||||||
path. There are multiple FIMC instances in the SoCs (up to 4), having
|
path. There are multiple FIMC instances in the SoCs (up to 4), having
|
||||||
slightly different capabilities, like pixel alignment constraints, rotator
|
slightly different capabilities, like pixel alignment constraints, rotator
|
||||||
availability, LCD writeback support, etc. The driver is located at
|
availability, LCD writeback support, etc. The driver is located at
|
||||||
drivers/media/platform/exynos4-is directory.
|
drivers/media/platform/samsung/exynos4-is directory.
|
||||||
|
|
||||||
Supported SoCs
|
Supported SoCs
|
||||||
--------------
|
--------------
|
||||||
|
|||||||
@@ -284,7 +284,7 @@ tda9887 TDA 9885/6/7 analog IF demodulator
|
|||||||
tea5761 TEA 5761 radio tuner
|
tea5761 TEA 5761 radio tuner
|
||||||
tea5767 TEA 5767 radio tuner
|
tea5767 TEA 5767 radio tuner
|
||||||
tua9001 Infineon TUA9001 silicon tuner
|
tua9001 Infineon TUA9001 silicon tuner
|
||||||
tuner-xc2028 XCeive xc2028/xc3028 tuners
|
xc2028 XCeive xc2028/xc3028 tuners
|
||||||
xc4000 Xceive XC4000 silicon tuner
|
xc4000 Xceive XC4000 silicon tuner
|
||||||
xc5000 Xceive XC5000 silicon tuner
|
xc5000 Xceive XC5000 silicon tuner
|
||||||
============ ==================================================
|
============ ==================================================
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ reference manual [#f1]_.
|
|||||||
Entities
|
Entities
|
||||||
--------
|
--------
|
||||||
|
|
||||||
imx7-mipi-csi2
|
imx-mipi-csi2
|
||||||
--------------
|
--------------
|
||||||
|
|
||||||
This is the MIPI CSI-2 receiver entity. It has one sink pad to receive the pixel
|
This is the MIPI CSI-2 receiver entity. It has one sink pad to receive the pixel
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ Introduction
|
|||||||
------------
|
------------
|
||||||
|
|
||||||
This file documents the Texas Instruments OMAP 3 Image Signal Processor (ISP)
|
This file documents the Texas Instruments OMAP 3 Image Signal Processor (ISP)
|
||||||
driver located under drivers/media/platform/omap3isp. The original driver was
|
driver located under drivers/media/platform/ti/omap3isp. The original driver was
|
||||||
written by Texas Instruments but since that it has been rewritten (twice) at
|
written by Texas Instruments but since that it has been rewritten (twice) at
|
||||||
Nokia.
|
Nokia.
|
||||||
|
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ As of Revision AB, the ISS is described in detail in section 8.
|
|||||||
This driver is supporting **only** the CSI2-A/B interfaces for now.
|
This driver is supporting **only** the CSI2-A/B interfaces for now.
|
||||||
|
|
||||||
It makes use of the Media Controller framework [#f2]_, and inherited most of the
|
It makes use of the Media Controller framework [#f2]_, and inherited most of the
|
||||||
code from OMAP3 ISP driver (found under drivers/media/platform/omap3isp/\*),
|
code from OMAP3 ISP driver (found under drivers/media/platform/ti/omap3isp/\*),
|
||||||
except that it doesn't need an IOMMU now for ISS buffers memory mapping.
|
except that it doesn't need an IOMMU now for ISS buffers memory mapping.
|
||||||
|
|
||||||
Supports usage of MMAP buffers only (for now).
|
Supports usage of MMAP buffers only (for now).
|
||||||
|
|||||||
@@ -76,3 +76,16 @@ vimc-capture:
|
|||||||
|
|
||||||
* 1 Pad sink
|
* 1 Pad sink
|
||||||
* 1 Pad source
|
* 1 Pad source
|
||||||
|
|
||||||
|
Module options
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Vimc has a module parameter to configure the driver.
|
||||||
|
|
||||||
|
* ``allocator=<unsigned int>``
|
||||||
|
|
||||||
|
memory allocator selection, default is 0. It specifies the way buffers
|
||||||
|
will be allocated.
|
||||||
|
|
||||||
|
- 0: vmalloc
|
||||||
|
- 1: dma-contig
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
Detailed Usages
|
Detailed Usages
|
||||||
===============
|
===============
|
||||||
|
|
||||||
DAMON provides below three interfaces for different users.
|
DAMON provides below interfaces for different users.
|
||||||
|
|
||||||
- *DAMON user space tool.*
|
- *DAMON user space tool.*
|
||||||
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
`This <https://github.com/awslabs/damo>`_ is for privileged people such as
|
||||||
@@ -14,17 +14,21 @@ DAMON provides below three interfaces for different users.
|
|||||||
virtual and physical address spaces monitoring. For more detail, please
|
virtual and physical address spaces monitoring. For more detail, please
|
||||||
refer to its `usage document
|
refer to its `usage document
|
||||||
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
<https://github.com/awslabs/damo/blob/next/USAGE.md>`_.
|
||||||
- *debugfs interface.*
|
- *sysfs interface.*
|
||||||
:ref:`This <debugfs_interface>` is for privileged user space programmers who
|
:ref:`This <sysfs_interface>` is for privileged user space programmers who
|
||||||
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
want more optimized use of DAMON. Using this, users can use DAMON’s major
|
||||||
features by reading from and writing to special debugfs files. Therefore,
|
features by reading from and writing to special sysfs files. Therefore,
|
||||||
you can write and use your personalized DAMON debugfs wrapper programs that
|
you can write and use your personalized DAMON sysfs wrapper programs that
|
||||||
reads/writes the debugfs files instead of you. The `DAMON user space tool
|
reads/writes the sysfs files instead of you. The `DAMON user space tool
|
||||||
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
<https://github.com/awslabs/damo>`_ is one example of such programs. It
|
||||||
supports both virtual and physical address spaces monitoring. Note that this
|
supports both virtual and physical address spaces monitoring. Note that this
|
||||||
interface provides only simple :ref:`statistics <damos_stats>` for the
|
interface provides only simple :ref:`statistics <damos_stats>` for the
|
||||||
monitoring results. For detailed monitoring results, DAMON provides a
|
monitoring results. For detailed monitoring results, DAMON provides a
|
||||||
:ref:`tracepoint <tracepoint>`.
|
:ref:`tracepoint <tracepoint>`.
|
||||||
|
- *debugfs interface.*
|
||||||
|
:ref:`This <debugfs_interface>` is almost identical to :ref:`sysfs interface
|
||||||
|
<sysfs_interface>`. This will be removed after next LTS kernel is released,
|
||||||
|
so users should move to the :ref:`sysfs interface <sysfs_interface>`.
|
||||||
- *Kernel Space Programming Interface.*
|
- *Kernel Space Programming Interface.*
|
||||||
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
||||||
users can utilize every feature of DAMON most flexibly and efficiently by
|
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||||
@@ -32,6 +36,340 @@ DAMON provides below three interfaces for different users.
|
|||||||
DAMON for various address spaces. For detail, please refer to the interface
|
DAMON for various address spaces. For detail, please refer to the interface
|
||||||
:doc:`document </vm/damon/api>`.
|
:doc:`document </vm/damon/api>`.
|
||||||
|
|
||||||
|
.. _sysfs_interface:
|
||||||
|
|
||||||
|
sysfs Interface
|
||||||
|
===============
|
||||||
|
|
||||||
|
DAMON sysfs interface is built when ``CONFIG_DAMON_SYSFS`` is defined. It
|
||||||
|
creates multiple directories and files under its sysfs directory,
|
||||||
|
``<sysfs>/kernel/mm/damon/``. You can control DAMON by writing to and reading
|
||||||
|
from the files under the directory.
|
||||||
|
|
||||||
|
For a short example, users can monitor the virtual address space of a given
|
||||||
|
workload as below. ::
|
||||||
|
|
||||||
|
# cd /sys/kernel/mm/damon/admin/
|
||||||
|
# echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr
|
||||||
|
# echo vaddr > kdamonds/0/contexts/0/operations
|
||||||
|
# echo 1 > kdamonds/0/contexts/0/targets/nr
|
||||||
|
# echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid
|
||||||
|
# echo on > kdamonds/0/state
|
||||||
|
|
||||||
|
Files Hierarchy
|
||||||
|
---------------
|
||||||
|
|
||||||
|
The files hierarchy of DAMON sysfs interface is shown below. In the below
|
||||||
|
figure, parents-children relations are represented with indentations, each
|
||||||
|
directory is having ``/`` suffix, and files in each directory are separated by
|
||||||
|
comma (","). ::
|
||||||
|
|
||||||
|
/sys/kernel/mm/damon/admin
|
||||||
|
│ kdamonds/nr_kdamonds
|
||||||
|
│ │ 0/state,pid
|
||||||
|
│ │ │ contexts/nr_contexts
|
||||||
|
│ │ │ │ 0/operations
|
||||||
|
│ │ │ │ │ monitoring_attrs/
|
||||||
|
│ │ │ │ │ │ intervals/sample_us,aggr_us,update_us
|
||||||
|
│ │ │ │ │ │ nr_regions/min,max
|
||||||
|
│ │ │ │ │ targets/nr_targets
|
||||||
|
│ │ │ │ │ │ 0/pid_target
|
||||||
|
│ │ │ │ │ │ │ regions/nr_regions
|
||||||
|
│ │ │ │ │ │ │ │ 0/start,end
|
||||||
|
│ │ │ │ │ │ │ │ ...
|
||||||
|
│ │ │ │ │ │ ...
|
||||||
|
│ │ │ │ │ schemes/nr_schemes
|
||||||
|
│ │ │ │ │ │ 0/action
|
||||||
|
│ │ │ │ │ │ │ access_pattern/
|
||||||
|
│ │ │ │ │ │ │ │ sz/min,max
|
||||||
|
│ │ │ │ │ │ │ │ nr_accesses/min,max
|
||||||
|
│ │ │ │ │ │ │ │ age/min,max
|
||||||
|
│ │ │ │ │ │ │ quotas/ms,bytes,reset_interval_ms
|
||||||
|
│ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
|
||||||
|
│ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
|
||||||
|
│ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
|
||||||
|
│ │ │ │ │ │ ...
|
||||||
|
│ │ │ │ ...
|
||||||
|
│ │ ...
|
||||||
|
|
||||||
|
Root
|
||||||
|
----
|
||||||
|
|
||||||
|
The root of the DAMON sysfs interface is ``<sysfs>/kernel/mm/damon/``, and it
|
||||||
|
has one directory named ``admin``. The directory contains the files for
|
||||||
|
privileged user space programs' control of DAMON. User space tools or deamons
|
||||||
|
having the root permission could use this directory.
|
||||||
|
|
||||||
|
kdamonds/
|
||||||
|
---------
|
||||||
|
|
||||||
|
The monitoring-related information including request specifications and results
|
||||||
|
are called DAMON context. DAMON executes each context with a kernel thread
|
||||||
|
called kdamond, and multiple kdamonds could run in parallel.
|
||||||
|
|
||||||
|
Under the ``admin`` directory, one directory, ``kdamonds``, which has files for
|
||||||
|
controlling the kdamonds exist. In the beginning, this directory has only one
|
||||||
|
file, ``nr_kdamonds``. Writing a number (``N``) to the file creates the number
|
||||||
|
of child directories named ``0`` to ``N-1``. Each directory represents each
|
||||||
|
kdamond.
|
||||||
|
|
||||||
|
kdamonds/<N>/
|
||||||
|
-------------
|
||||||
|
|
||||||
|
In each kdamond directory, two files (``state`` and ``pid``) and one directory
|
||||||
|
(``contexts``) exist.
|
||||||
|
|
||||||
|
Reading ``state`` returns ``on`` if the kdamond is currently running, or
|
||||||
|
``off`` if it is not running. Writing ``on`` or ``off`` makes the kdamond be
|
||||||
|
in the state. Writing ``update_schemes_stats`` to ``state`` file updates the
|
||||||
|
contents of stats files for each DAMON-based operation scheme of the kdamond.
|
||||||
|
For details of the stats, please refer to :ref:`stats section
|
||||||
|
<sysfs_schemes_stats>`.
|
||||||
|
|
||||||
|
If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
|
||||||
|
|
||||||
|
``contexts`` directory contains files for controlling the monitoring contexts
|
||||||
|
that this kdamond will execute.
|
||||||
|
|
||||||
|
kdamonds/<N>/contexts/
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_contexts``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named as
|
||||||
|
``0`` to ``N-1``. Each directory represents each monitoring context. At the
|
||||||
|
moment, only one context per kdamond is supported, so only ``0`` or ``1`` can
|
||||||
|
be written to the file.
|
||||||
|
|
||||||
|
contexts/<N>/
|
||||||
|
-------------
|
||||||
|
|
||||||
|
In each context directory, one file (``operations``) and three directories
|
||||||
|
(``monitoring_attrs``, ``targets``, and ``schemes``) exist.
|
||||||
|
|
||||||
|
DAMON supports multiple types of monitoring operations, including those for
|
||||||
|
virtual address space and the physical address space. You can set and get what
|
||||||
|
type of monitoring operations DAMON will use for the context by writing one of
|
||||||
|
below keywords to, and reading from the file.
|
||||||
|
|
||||||
|
- vaddr: Monitor virtual address spaces of specific processes
|
||||||
|
- paddr: Monitor the physical address space of the system
|
||||||
|
|
||||||
|
contexts/<N>/monitoring_attrs/
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Files for specifying attributes of the monitoring including required quality
|
||||||
|
and efficiency of the monitoring are in ``monitoring_attrs`` directory.
|
||||||
|
Specifically, two directories, ``intervals`` and ``nr_regions`` exist in this
|
||||||
|
directory.
|
||||||
|
|
||||||
|
Under ``intervals`` directory, three files for DAMON's sampling interval
|
||||||
|
(``sample_us``), aggregation interval (``aggr_us``), and update interval
|
||||||
|
(``update_us``) exist. You can set and get the values in micro-seconds by
|
||||||
|
writing to and reading from the files.
|
||||||
|
|
||||||
|
Under ``nr_regions`` directory, two files for the lower-bound and upper-bound
|
||||||
|
of DAMON's monitoring regions (``min`` and ``max``, respectively), which
|
||||||
|
controls the monitoring overhead, exist. You can set and get the values by
|
||||||
|
writing to and rading from the files.
|
||||||
|
|
||||||
|
For more details about the intervals and monitoring regions range, please refer
|
||||||
|
to the Design document (:doc:`/vm/damon/design`).
|
||||||
|
|
||||||
|
contexts/<N>/targets/
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_targets``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named ``0``
|
||||||
|
to ``N-1``. Each directory represents each monitoring target.
|
||||||
|
|
||||||
|
targets/<N>/
|
||||||
|
------------
|
||||||
|
|
||||||
|
In each target directory, one file (``pid_target``) and one directory
|
||||||
|
(``regions``) exist.
|
||||||
|
|
||||||
|
If you wrote ``vaddr`` to the ``contexts/<N>/operations``, each target should
|
||||||
|
be a process. You can specify the process to DAMON by writing the pid of the
|
||||||
|
process to the ``pid_target`` file.
|
||||||
|
|
||||||
|
targets/<N>/regions
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
When ``vaddr`` monitoring operations set is being used (``vaddr`` is written to
|
||||||
|
the ``contexts/<N>/operations`` file), DAMON automatically sets and updates the
|
||||||
|
monitoring target regions so that entire memory mappings of target processes
|
||||||
|
can be covered. However, users could want to set the initial monitoring region
|
||||||
|
to specific address ranges.
|
||||||
|
|
||||||
|
In contrast, DAMON do not automatically sets and updates the monitoring target
|
||||||
|
regions when ``paddr`` monitoring operations set is being used (``paddr`` is
|
||||||
|
written to the ``contexts/<N>/operations``). Therefore, users should set the
|
||||||
|
monitoring target regions by themselves in the case.
|
||||||
|
|
||||||
|
For such cases, users can explicitly set the initial monitoring target regions
|
||||||
|
as they want, by writing proper values to the files under this directory.
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_regions``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named ``0``
|
||||||
|
to ``N-1``. Each directory represents each initial monitoring target region.
|
||||||
|
|
||||||
|
regions/<N>/
|
||||||
|
------------
|
||||||
|
|
||||||
|
In each region directory, you will find two files (``start`` and ``end``). You
|
||||||
|
can set and get the start and end addresses of the initial monitoring target
|
||||||
|
region by writing to and reading from the files, respectively.
|
||||||
|
|
||||||
|
contexts/<N>/schemes/
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
For usual DAMON-based data access aware memory management optimizations, users
|
||||||
|
would normally want the system to apply a memory management action to a memory
|
||||||
|
region of a specific access pattern. DAMON receives such formalized operation
|
||||||
|
schemes from the user and applies those to the target memory regions. Users
|
||||||
|
can get and set the schemes by reading from and writing to files under this
|
||||||
|
directory.
|
||||||
|
|
||||||
|
In the beginning, this directory has only one file, ``nr_schemes``. Writing a
|
||||||
|
number (``N``) to the file creates the number of child directories named ``0``
|
||||||
|
to ``N-1``. Each directory represents each DAMON-based operation scheme.
|
||||||
|
|
||||||
|
schemes/<N>/
|
||||||
|
------------
|
||||||
|
|
||||||
|
In each scheme directory, four directories (``access_pattern``, ``quotas``,
|
||||||
|
``watermarks``, and ``stats``) and one file (``action``) exist.
|
||||||
|
|
||||||
|
The ``action`` file is for setting and getting what action you want to apply to
|
||||||
|
memory regions having specific access pattern of the interest. The keywords
|
||||||
|
that can be written to and read from the file and their meaning are as below.
|
||||||
|
|
||||||
|
- ``willneed``: Call ``madvise()`` for the region with ``MADV_WILLNEED``
|
||||||
|
- ``cold``: Call ``madvise()`` for the region with ``MADV_COLD``
|
||||||
|
- ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
|
||||||
|
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
|
||||||
|
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
||||||
|
- ``stat``: Do nothing but count the statistics
|
||||||
|
|
||||||
|
schemes/<N>/access_pattern/
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
The target access pattern of each DAMON-based operation scheme is constructed
|
||||||
|
with three ranges including the size of the region in bytes, number of
|
||||||
|
monitored accesses per aggregate interval, and number of aggregated intervals
|
||||||
|
for the age of the region.
|
||||||
|
|
||||||
|
Under the ``access_pattern`` directory, three directories (``sz``,
|
||||||
|
``nr_accesses``, and ``age``) each having two files (``min`` and ``max``)
|
||||||
|
exist. You can set and get the access pattern for the given scheme by writing
|
||||||
|
to and reading from the ``min`` and ``max`` files under ``sz``,
|
||||||
|
``nr_accesses``, and ``age`` directories, respectively.
|
||||||
|
|
||||||
|
schemes/<N>/quotas/
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Optimal ``target access pattern`` for each ``action`` is workload dependent, so
|
||||||
|
not easy to find. Worse yet, setting a scheme of some action too aggressive
|
||||||
|
can cause severe overhead. To avoid such overhead, users can limit time and
|
||||||
|
size quota for each scheme. In detail, users can ask DAMON to try to use only
|
||||||
|
up to specific time (``time quota``) for applying the action, and to apply the
|
||||||
|
action to only up to specific amount (``size quota``) of memory regions having
|
||||||
|
the target access pattern within a given time interval (``reset interval``).
|
||||||
|
|
||||||
|
When the quota limit is expected to be exceeded, DAMON prioritizes found memory
|
||||||
|
regions of the ``target access pattern`` based on their size, access frequency,
|
||||||
|
and age. For personalized prioritization, users can set the weights for the
|
||||||
|
three properties.
|
||||||
|
|
||||||
|
Under ``quotas`` directory, three files (``ms``, ``bytes``,
|
||||||
|
``reset_interval_ms``) and one directory (``weights``) having three files
|
||||||
|
(``sz_permil``, ``nr_accesses_permil``, and ``age_permil``) in it exist.
|
||||||
|
|
||||||
|
You can set the ``time quota`` in milliseconds, ``size quota`` in bytes, and
|
||||||
|
``reset interval`` in milliseconds by writing the values to the three files,
|
||||||
|
respectively. You can also set the prioritization weights for size, access
|
||||||
|
frequency, and age in per-thousand unit by writing the values to the three
|
||||||
|
files under the ``weights`` directory.
|
||||||
|
|
||||||
|
schemes/<N>/watermarks/
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
To allow easy activation and deactivation of each scheme based on system
|
||||||
|
status, DAMON provides a feature called watermarks. The feature receives five
|
||||||
|
values called ``metric``, ``interval``, ``high``, ``mid``, and ``low``. The
|
||||||
|
``metric`` is the system metric such as free memory ratio that can be measured.
|
||||||
|
If the metric value of the system is higher than the value in ``high`` or lower
|
||||||
|
than ``low`` at the memoent, the scheme is deactivated. If the value is lower
|
||||||
|
than ``mid``, the scheme is activated.
|
||||||
|
|
||||||
|
Under the watermarks directory, five files (``metric``, ``interval_us``,
|
||||||
|
``high``, ``mid``, and ``low``) for setting each value exist. You can set and
|
||||||
|
get the five values by writing to the files, respectively.
|
||||||
|
|
||||||
|
Keywords and meanings of those that can be written to the ``metric`` file are
|
||||||
|
as below.
|
||||||
|
|
||||||
|
- none: Ignore the watermarks
|
||||||
|
- free_mem_rate: System's free memory rate (per thousand)
|
||||||
|
|
||||||
|
The ``interval`` should written in microseconds unit.
|
||||||
|
|
||||||
|
.. _sysfs_schemes_stats:
|
||||||
|
|
||||||
|
schemes/<N>/stats/
|
||||||
|
------------------
|
||||||
|
|
||||||
|
DAMON counts the total number and bytes of regions that each scheme is tried to
|
||||||
|
be applied, the two numbers for the regions that each scheme is successfully
|
||||||
|
applied, and the total number of the quota limit exceeds. This statistics can
|
||||||
|
be used for online analysis or tuning of the schemes.
|
||||||
|
|
||||||
|
The statistics can be retrieved by reading the files under ``stats`` directory
|
||||||
|
(``nr_tried``, ``sz_tried``, ``nr_applied``, ``sz_applied``, and
|
||||||
|
``qt_exceeds``), respectively. The files are not updated in real time, so you
|
||||||
|
should ask DAMON sysfs interface to updte the content of the files for the
|
||||||
|
stats by writing a special keyword, ``update_schemes_stats`` to the relevant
|
||||||
|
``kdamonds/<N>/state`` file.
|
||||||
|
|
||||||
|
Example
|
||||||
|
~~~~~~~
|
||||||
|
|
||||||
|
Below commands applies a scheme saying "If a memory region of size in [4KiB,
|
||||||
|
8KiB] is showing accesses per aggregate interval in [0, 5] for aggregate
|
||||||
|
interval in [10, 20], page out the region. For the paging out, use only up to
|
||||||
|
10ms per second, and also don't page out more than 1GiB per second. Under the
|
||||||
|
limitation, page out memory regions having longer age first. Also, check the
|
||||||
|
free memory rate of the system every 5 seconds, start the monitoring and paging
|
||||||
|
out when the free memory rate becomes lower than 50%, but stop it if the free
|
||||||
|
memory rate becomes larger than 60%, or lower than 30%". ::
|
||||||
|
|
||||||
|
# cd <sysfs>/kernel/mm/damon/admin
|
||||||
|
# # populate directories
|
||||||
|
# echo 1 > kdamonds/nr_kdamonds; echo 1 > kdamonds/0/contexts/nr_contexts;
|
||||||
|
# echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
|
||||||
|
# cd kdamonds/0/contexts/0/schemes/0
|
||||||
|
# # set the basic access pattern and the action
|
||||||
|
# echo 4096 > access_patterns/sz/min
|
||||||
|
# echo 8192 > access_patterns/sz/max
|
||||||
|
# echo 0 > access_patterns/nr_accesses/min
|
||||||
|
# echo 5 > access_patterns/nr_accesses/max
|
||||||
|
# echo 10 > access_patterns/age/min
|
||||||
|
# echo 20 > access_patterns/age/max
|
||||||
|
# echo pageout > action
|
||||||
|
# # set quotas
|
||||||
|
# echo 10 > quotas/ms
|
||||||
|
# echo $((1024*1024*1024)) > quotas/bytes
|
||||||
|
# echo 1000 > quotas/reset_interval_ms
|
||||||
|
# # set watermark
|
||||||
|
# echo free_mem_rate > watermarks/metric
|
||||||
|
# echo 5000000 > watermarks/interval_us
|
||||||
|
# echo 600 > watermarks/high
|
||||||
|
# echo 500 > watermarks/mid
|
||||||
|
# echo 300 > watermarks/low
|
||||||
|
|
||||||
|
Please note that it's highly recommended to use user space tools like `damo
|
||||||
|
<https://github.com/awslabs/damo>`_ rather than manually reading and writing
|
||||||
|
the files as above. Above is only for an example.
|
||||||
|
|
||||||
.. _debugfs_interface:
|
.. _debugfs_interface:
|
||||||
|
|
||||||
@@ -47,7 +385,7 @@ Attributes
|
|||||||
----------
|
----------
|
||||||
|
|
||||||
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
||||||
``regions update interval``, and min/max number of monitoring target regions by
|
``update interval``, and min/max number of monitoring target regions by
|
||||||
reading from and writing to the ``attrs`` file. To know about the monitoring
|
reading from and writing to the ``attrs`` file. To know about the monitoring
|
||||||
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
|
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
|
||||||
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
||||||
@@ -108,24 +446,28 @@ In such cases, users can explicitly set the initial monitoring target regions
|
|||||||
as they want, by writing proper values to the ``init_regions`` file. Each line
|
as they want, by writing proper values to the ``init_regions`` file. Each line
|
||||||
of the input should represent one region in below form.::
|
of the input should represent one region in below form.::
|
||||||
|
|
||||||
<target id> <start address> <end address>
|
<target idx> <start address> <end address>
|
||||||
|
|
||||||
The ``target id`` should already in ``target_ids`` file, and the regions should
|
The ``target idx`` should be the index of the target in ``target_ids`` file,
|
||||||
be passed in address order. For example, below commands will set a couple of
|
starting from ``0``, and the regions should be passed in address order. For
|
||||||
address ranges, ``1-100`` and ``100-200`` as the initial monitoring target
|
example, below commands will set a couple of address ranges, ``1-100`` and
|
||||||
region of process 42, and another couple of address ranges, ``20-40`` and
|
``100-200`` as the initial monitoring target region of pid 42, which is the
|
||||||
``50-100`` as that of process 4242.::
|
first one (index ``0``) in ``target_ids``, and another couple of address
|
||||||
|
ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
|
||||||
|
(index ``1``) in ``target_ids``.::
|
||||||
|
|
||||||
# cd <debugfs>/damon
|
# cd <debugfs>/damon
|
||||||
# echo "42 1 100
|
# cat target_ids
|
||||||
42 100 200
|
42 4242
|
||||||
4242 20 40
|
# echo "0 1 100
|
||||||
4242 50 100" > init_regions
|
0 100 200
|
||||||
|
1 20 40
|
||||||
|
1 50 100" > init_regions
|
||||||
|
|
||||||
Note that this sets the initial monitoring target regions only. In case of
|
Note that this sets the initial monitoring target regions only. In case of
|
||||||
virtual memory monitoring, DAMON will automatically updates the boundary of the
|
virtual memory monitoring, DAMON will automatically updates the boundary of the
|
||||||
regions after one ``regions update interval``. Therefore, users should set the
|
regions after one ``update interval``. Therefore, users should set the
|
||||||
``regions update interval`` large enough in this case, if they don't want the
|
``update interval`` large enough in this case, if they don't want the
|
||||||
update.
|
update.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ There are four components to pagemap:
|
|||||||
* Bit 56 page exclusively mapped (since 4.2)
|
* Bit 56 page exclusively mapped (since 4.2)
|
||||||
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see
|
* Bit 57 pte is uffd-wp write-protected (since 5.13) (see
|
||||||
:ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`)
|
:ref:`Documentation/admin-guide/mm/userfaultfd.rst <userfaultfd>`)
|
||||||
* Bits 57-60 zero
|
* Bits 58-60 zero
|
||||||
* Bit 61 page is file-page or shared-anon (since 3.5)
|
* Bit 61 page is file-page or shared-anon (since 3.5)
|
||||||
* Bit 62 page swapped
|
* Bit 62 page swapped
|
||||||
* Bit 63 page present
|
* Bit 63 page present
|
||||||
|
|||||||
@@ -130,9 +130,25 @@ attribute, e.g.::
|
|||||||
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
|
echo 1 > /sys/module/zswap/parameters/same_filled_pages_enabled
|
||||||
|
|
||||||
When zswap same-filled page identification is disabled at runtime, it will stop
|
When zswap same-filled page identification is disabled at runtime, it will stop
|
||||||
checking for the same-value filled pages during store operation. However, the
|
checking for the same-value filled pages during store operation.
|
||||||
existing pages which are marked as same-value filled pages remain stored
|
In other words, every page will be then considered non-same-value filled.
|
||||||
unchanged in zswap until they are either loaded or invalidated.
|
However, the existing pages which are marked as same-value filled pages remain
|
||||||
|
stored unchanged in zswap until they are either loaded or invalidated.
|
||||||
|
|
||||||
|
In some circumstances it might be advantageous to make use of just the zswap
|
||||||
|
ability to efficiently store same-filled pages without enabling the whole
|
||||||
|
compressed page storage.
|
||||||
|
In this case the handling of non-same-value pages by zswap (enabled by default)
|
||||||
|
can be disabled by setting the ``non_same_filled_pages_enabled`` attribute
|
||||||
|
to 0, e.g. ``zswap.non_same_filled_pages_enabled=0``.
|
||||||
|
It can also be enabled and disabled at runtime using the sysfs
|
||||||
|
``non_same_filled_pages_enabled`` attribute, e.g.::
|
||||||
|
|
||||||
|
echo 1 > /sys/module/zswap/parameters/non_same_filled_pages_enabled
|
||||||
|
|
||||||
|
Disabling both ``zswap.same_filled_pages_enabled`` and
|
||||||
|
``zswap.non_same_filled_pages_enabled`` effectively disables accepting any new
|
||||||
|
pages by zswap.
|
||||||
|
|
||||||
To prevent zswap from shrinking pool when zswap is full and there's a high
|
To prevent zswap from shrinking pool when zswap is full and there's a high
|
||||||
pressure on swap (this will result in flipping pages in and out zswap pool
|
pressure on swap (this will result in flipping pages in and out zswap pool
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ Performance monitor support
|
|||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
hisi-pmu
|
hisi-pmu
|
||||||
|
hisi-pcie-pmu
|
||||||
imx-ddr
|
imx-ddr
|
||||||
qcom_l2_pmu
|
qcom_l2_pmu
|
||||||
qcom_l3_pmu
|
qcom_l3_pmu
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ Linux kernel. The new mechanism is based on Collaborative Processor
|
|||||||
Performance Control (CPPC) which provides finer grain frequency management
|
Performance Control (CPPC) which provides finer grain frequency management
|
||||||
than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using
|
than legacy ACPI hardware P-States. Current AMD CPU/APU platforms are using
|
||||||
the ACPI P-states driver to manage CPU frequency and clocks with switching
|
the ACPI P-states driver to manage CPU frequency and clocks with switching
|
||||||
only in 3 P-states. CPPC replaces the ACPI P-states controls, allows a
|
only in 3 P-states. CPPC replaces the ACPI P-states controls and allows a
|
||||||
flexible, low-latency interface for the Linux kernel to directly
|
flexible, low-latency interface for the Linux kernel to directly
|
||||||
communicate the performance hints to hardware.
|
communicate the performance hints to hardware.
|
||||||
|
|
||||||
@@ -27,7 +27,7 @@ communicate the performance hints to hardware.
|
|||||||
``ondemand``, etc. to manage the performance hints which are provided by
|
``ondemand``, etc. to manage the performance hints which are provided by
|
||||||
CPPC hardware functionality that internally follows the hardware
|
CPPC hardware functionality that internally follows the hardware
|
||||||
specification (for details refer to AMD64 Architecture Programmer's Manual
|
specification (for details refer to AMD64 Architecture Programmer's Manual
|
||||||
Volume 2: System Programming [1]_). Currently ``amd-pstate`` supports basic
|
Volume 2: System Programming [1]_). Currently, ``amd-pstate`` supports basic
|
||||||
frequency control function according to kernel governors on some of the
|
frequency control function according to kernel governors on some of the
|
||||||
Zen2 and Zen3 processors, and we will implement more AMD specific functions
|
Zen2 and Zen3 processors, and we will implement more AMD specific functions
|
||||||
in future after we verify them on the hardware and SBIOS.
|
in future after we verify them on the hardware and SBIOS.
|
||||||
@@ -41,9 +41,9 @@ continuous, abstract, and unit-less performance value in a scale that is
|
|||||||
not tied to a specific performance state / frequency. This is an ACPI
|
not tied to a specific performance state / frequency. This is an ACPI
|
||||||
standard [2]_ which software can specify application performance goals and
|
standard [2]_ which software can specify application performance goals and
|
||||||
hints as a relative target to the infrastructure limits. AMD processors
|
hints as a relative target to the infrastructure limits. AMD processors
|
||||||
provides the low latency register model (MSR) instead of AML code
|
provide the low latency register model (MSR) instead of an AML code
|
||||||
interpreter for performance adjustments. ``amd-pstate`` will initialize a
|
interpreter for performance adjustments. ``amd-pstate`` will initialize a
|
||||||
``struct cpufreq_driver`` instance ``amd_pstate_driver`` with the callbacks
|
``struct cpufreq_driver`` instance, ``amd_pstate_driver``, with the callbacks
|
||||||
to manage each performance update behavior. ::
|
to manage each performance update behavior. ::
|
||||||
|
|
||||||
Highest Perf ------>+-----------------------+ +-----------------------+
|
Highest Perf ------>+-----------------------+ +-----------------------+
|
||||||
@@ -91,26 +91,26 @@ AMD CPPC Performance Capability
|
|||||||
Highest Performance (RO)
|
Highest Performance (RO)
|
||||||
.........................
|
.........................
|
||||||
|
|
||||||
It is the absolute maximum performance an individual processor may reach,
|
This is the absolute maximum performance an individual processor may reach,
|
||||||
assuming ideal conditions. This performance level may not be sustainable
|
assuming ideal conditions. This performance level may not be sustainable
|
||||||
for long durations and may only be achievable if other platform components
|
for long durations and may only be achievable if other platform components
|
||||||
are in a specific state; for example, it may require other processors be in
|
are in a specific state; for example, it may require other processors to be in
|
||||||
an idle state. This would be equivalent to the highest frequencies
|
an idle state. This would be equivalent to the highest frequencies
|
||||||
supported by the processor.
|
supported by the processor.
|
||||||
|
|
||||||
Nominal (Guaranteed) Performance (RO)
|
Nominal (Guaranteed) Performance (RO)
|
||||||
......................................
|
......................................
|
||||||
|
|
||||||
It is the maximum sustained performance level of the processor, assuming
|
This is the maximum sustained performance level of the processor, assuming
|
||||||
ideal operating conditions. In absence of an external constraint (power,
|
ideal operating conditions. In the absence of an external constraint (power,
|
||||||
thermal, etc.) this is the performance level the processor is expected to
|
thermal, etc.), this is the performance level the processor is expected to
|
||||||
be able to maintain continuously. All cores/processors are expected to be
|
be able to maintain continuously. All cores/processors are expected to be
|
||||||
able to sustain their nominal performance state simultaneously.
|
able to sustain their nominal performance state simultaneously.
|
||||||
|
|
||||||
Lowest non-linear Performance (RO)
|
Lowest non-linear Performance (RO)
|
||||||
...................................
|
...................................
|
||||||
|
|
||||||
It is the lowest performance level at which nonlinear power savings are
|
This is the lowest performance level at which nonlinear power savings are
|
||||||
achieved, for example, due to the combined effects of voltage and frequency
|
achieved, for example, due to the combined effects of voltage and frequency
|
||||||
scaling. Above this threshold, lower performance levels should be generally
|
scaling. Above this threshold, lower performance levels should be generally
|
||||||
more energy efficient than higher performance levels. This register
|
more energy efficient than higher performance levels. This register
|
||||||
@@ -119,7 +119,7 @@ effectively conveys the most efficient performance level to ``amd-pstate``.
|
|||||||
Lowest Performance (RO)
|
Lowest Performance (RO)
|
||||||
........................
|
........................
|
||||||
|
|
||||||
It is the absolute lowest performance level of the processor. Selecting a
|
This is the absolute lowest performance level of the processor. Selecting a
|
||||||
performance level lower than the lowest nonlinear performance level may
|
performance level lower than the lowest nonlinear performance level may
|
||||||
cause an efficiency penalty but should reduce the instantaneous power
|
cause an efficiency penalty but should reduce the instantaneous power
|
||||||
consumption of the processor.
|
consumption of the processor.
|
||||||
@@ -149,14 +149,14 @@ a relative number. This can be expressed as percentage of nominal
|
|||||||
performance (infrastructure max). Below the nominal sustained performance
|
performance (infrastructure max). Below the nominal sustained performance
|
||||||
level, desired performance expresses the average performance level of the
|
level, desired performance expresses the average performance level of the
|
||||||
processor subject to hardware. Above the nominal performance level,
|
processor subject to hardware. Above the nominal performance level,
|
||||||
processor must provide at least nominal performance requested and go higher
|
the processor must provide at least nominal performance requested and go higher
|
||||||
if current operating conditions allow.
|
if current operating conditions allow.
|
||||||
|
|
||||||
Energy Performance Preference (EPP) (RW)
|
Energy Performance Preference (EPP) (RW)
|
||||||
.........................................
|
.........................................
|
||||||
|
|
||||||
Provides a hint to the hardware if software wants to bias toward performance
|
This attribute provides a hint to the hardware if software wants to bias
|
||||||
(0x0) or energy efficiency (0xff).
|
toward performance (0x0) or energy efficiency (0xff).
|
||||||
|
|
||||||
|
|
||||||
Key Governors Support
|
Key Governors Support
|
||||||
@@ -173,35 +173,34 @@ operating frequencies supported by the hardware. Users can check the
|
|||||||
``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic
|
``amd-pstate`` mainly supports ``schedutil`` and ``ondemand`` for dynamic
|
||||||
frequency control. It is to fine tune the processor configuration on
|
frequency control. It is to fine tune the processor configuration on
|
||||||
``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate``
|
``amd-pstate`` to the ``schedutil`` with CPU CFS scheduler. ``amd-pstate``
|
||||||
registers adjust_perf callback to implement the CPPC similar performance
|
registers the adjust_perf callback to implement performance update behavior
|
||||||
update behavior. It is initialized by ``sugov_start`` and then populate the
|
similar to CPPC. It is initialized by ``sugov_start`` and then populates the
|
||||||
CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as
|
CPU's update_util_data pointer to assign ``sugov_update_single_perf`` as the
|
||||||
the utilization update callback function in CPU scheduler. CPU scheduler
|
utilization update callback function in the CPU scheduler. The CPU scheduler
|
||||||
will call ``cpufreq_update_util`` and assign the target performance
|
will call ``cpufreq_update_util`` and assigns the target performance according
|
||||||
according to the ``struct sugov_cpu`` that utilization update belongs to.
|
to the ``struct sugov_cpu`` that the utilization update belongs to.
|
||||||
Then ``amd-pstate`` updates the desired performance according to the CPU
|
Then, ``amd-pstate`` updates the desired performance according to the CPU
|
||||||
scheduler assigned.
|
scheduler assigned.
|
||||||
|
|
||||||
|
|
||||||
Processor Support
|
Processor Support
|
||||||
=======================
|
=======================
|
||||||
|
|
||||||
The ``amd-pstate`` initialization will fail if the _CPC in ACPI SBIOS is
|
The ``amd-pstate`` initialization will fail if the ``_CPC`` entry in the ACPI
|
||||||
not existed at the detected processor, and it uses ``acpi_cpc_valid`` to
|
SBIOS does not exist in the detected processor. It uses ``acpi_cpc_valid``
|
||||||
check the _CPC existence. All Zen based processors support legacy ACPI
|
to check the existence of ``_CPC``. All Zen based processors support the legacy
|
||||||
hardware P-States function, so while the ``amd-pstate`` fails to be
|
ACPI hardware P-States function, so when ``amd-pstate`` fails initialization,
|
||||||
initialized, the kernel will fall back to initialize ``acpi-cpufreq``
|
the kernel will fall back to initialize the ``acpi-cpufreq`` driver.
|
||||||
driver.
|
|
||||||
|
|
||||||
There are two types of hardware implementations for ``amd-pstate``: one is
|
There are two types of hardware implementations for ``amd-pstate``: one is
|
||||||
`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support
|
`Full MSR Support <perf_cap_>`_ and another is `Shared Memory Support
|
||||||
<perf_cap_>`_. It can use :c:macro:`X86_FEATURE_CPPC` feature flag (for
|
<perf_cap_>`_. It can use the :c:macro:`X86_FEATURE_CPPC` feature flag to
|
||||||
details refer to Processor Programming Reference (PPR) for AMD Family
|
indicate the different types. (For details, refer to the Processor Programming
|
||||||
19h Model 51h, Revision A1 Processors [3]_) to indicate the different
|
Reference (PPR) for AMD Family 19h Model 51h, Revision A1 Processors [3]_.)
|
||||||
types. ``amd-pstate`` is to register different ``static_call`` instances
|
``amd-pstate`` is to register different ``static_call`` instances for different
|
||||||
for different hardware implementations.
|
hardware implementations.
|
||||||
|
|
||||||
Currently, some of Zen2 and Zen3 processors support ``amd-pstate``. In the
|
Currently, some of the Zen2 and Zen3 processors support ``amd-pstate``. In the
|
||||||
future, it will be supported on more and more AMD processors.
|
future, it will be supported on more and more AMD processors.
|
||||||
|
|
||||||
Full MSR Support
|
Full MSR Support
|
||||||
@@ -210,18 +209,18 @@ Full MSR Support
|
|||||||
Some new Zen3 processors such as Cezanne provide the MSR registers directly
|
Some new Zen3 processors such as Cezanne provide the MSR registers directly
|
||||||
while the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is set.
|
while the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is set.
|
||||||
``amd-pstate`` can handle the MSR register to implement the fast switch
|
``amd-pstate`` can handle the MSR register to implement the fast switch
|
||||||
function in ``CPUFreq`` that can shrink latency of frequency control on the
|
function in ``CPUFreq`` that can reduce the latency of frequency control in
|
||||||
interrupt context. The functions with ``pstate_xxx`` prefix represent the
|
interrupt context. The functions with a ``pstate_xxx`` prefix represent the
|
||||||
operations of MSR registers.
|
operations on MSR registers.
|
||||||
|
|
||||||
Shared Memory Support
|
Shared Memory Support
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
If :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, that means the
|
If the :c:macro:`X86_FEATURE_CPPC` CPU feature flag is not set, the
|
||||||
processor supports shared memory solution. In this case, ``amd-pstate``
|
processor supports the shared memory solution. In this case, ``amd-pstate``
|
||||||
uses the ``cppc_acpi`` helper methods to implement the callback functions
|
uses the ``cppc_acpi`` helper methods to implement the callback functions
|
||||||
that defined on ``static_call``. The functions with ``cppc_xxx`` prefix
|
that are defined on ``static_call``. The functions with the ``cppc_xxx`` prefix
|
||||||
represent the operations of acpi cppc helpers for shared memory solution.
|
represent the operations of ACPI CPPC helpers for the shared memory solution.
|
||||||
|
|
||||||
|
|
||||||
AMD P-States and ACPI hardware P-States always can be supported in one
|
AMD P-States and ACPI hardware P-States always can be supported in one
|
||||||
@@ -234,7 +233,7 @@ User Space Interface in ``sysfs``
|
|||||||
==================================
|
==================================
|
||||||
|
|
||||||
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
|
``amd-pstate`` exposes several global attributes (files) in ``sysfs`` to
|
||||||
control its functionality at the system level. They located in the
|
control its functionality at the system level. They are located in the
|
||||||
``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. ::
|
``/sys/devices/system/cpu/cpufreq/policyX/`` directory and affect all CPUs. ::
|
||||||
|
|
||||||
root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd*
|
root@hr-test1:/home/ray# ls /sys/devices/system/cpu/cpufreq/policy0/*amd*
|
||||||
@@ -246,38 +245,38 @@ control its functionality at the system level. They located in the
|
|||||||
``amd_pstate_highest_perf / amd_pstate_max_freq``
|
``amd_pstate_highest_perf / amd_pstate_max_freq``
|
||||||
|
|
||||||
Maximum CPPC performance and CPU frequency that the driver is allowed to
|
Maximum CPPC performance and CPU frequency that the driver is allowed to
|
||||||
set in percent of the maximum supported CPPC performance level (the highest
|
set, in percent of the maximum supported CPPC performance level (the highest
|
||||||
performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
performance supported in `AMD CPPC Performance Capability <perf_cap_>`_).
|
||||||
In some of ASICs, the highest CPPC performance is not the one in the _CPC
|
In some ASICs, the highest CPPC performance is not the one in the ``_CPC``
|
||||||
table, so we need to expose it to sysfs. If boost is not active but
|
table, so we need to expose it to sysfs. If boost is not active, but
|
||||||
supported, this maximum frequency will be larger than the one in
|
still supported, this maximum frequency will be larger than the one in
|
||||||
``cpuinfo``.
|
``cpuinfo``.
|
||||||
This attribute is read-only.
|
This attribute is read-only.
|
||||||
|
|
||||||
``amd_pstate_lowest_nonlinear_freq``
|
``amd_pstate_lowest_nonlinear_freq``
|
||||||
|
|
||||||
The lowest non-linear CPPC CPU frequency that the driver is allowed to set
|
The lowest non-linear CPPC CPU frequency that the driver is allowed to set,
|
||||||
in percent of the maximum supported CPPC performance level (Please see the
|
in percent of the maximum supported CPPC performance level. (Please see the
|
||||||
lowest non-linear performance in `AMD CPPC Performance Capability
|
lowest non-linear performance in `AMD CPPC Performance Capability
|
||||||
<perf_cap_>`_).
|
<perf_cap_>`_.)
|
||||||
This attribute is read-only.
|
This attribute is read-only.
|
||||||
|
|
||||||
For other performance and frequency values, we can read them back from
|
Other performance and frequency values can be read back from
|
||||||
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
|
``/sys/devices/system/cpu/cpuX/acpi_cppc/``, see :ref:`cppc_sysfs`.
|
||||||
|
|
||||||
|
|
||||||
``amd-pstate`` vs ``acpi-cpufreq``
|
``amd-pstate`` vs ``acpi-cpufreq``
|
||||||
======================================
|
======================================
|
||||||
|
|
||||||
On majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
|
On the majority of AMD platforms supported by ``acpi-cpufreq``, the ACPI tables
|
||||||
provided by the platform firmware used for CPU performance scaling, but
|
provided by the platform firmware are used for CPU performance scaling, but
|
||||||
only provides 3 P-states on AMD processors.
|
only provide 3 P-states on AMD processors.
|
||||||
However, on modern AMD APU and CPU series, it provides the collaborative
|
However, on modern AMD APU and CPU series, hardware provides the Collaborative
|
||||||
processor performance control according to ACPI protocol and customize this
|
Processor Performance Control according to the ACPI protocol and customizes this
|
||||||
for AMD platforms. That is fine-grain and continuous frequency range
|
for AMD platforms. That is, fine-grained and continuous frequency ranges
|
||||||
instead of the legacy hardware P-states. ``amd-pstate`` is the kernel
|
instead of the legacy hardware P-states. ``amd-pstate`` is the kernel
|
||||||
module which supports the new AMD P-States mechanism on most of future AMD
|
module which supports the new AMD P-States mechanism on most of the future AMD
|
||||||
platforms. The AMD P-States mechanism will be the more performance and energy
|
platforms. The AMD P-States mechanism is the more performance and energy
|
||||||
efficiency frequency management method on AMD processors.
|
efficiency frequency management method on AMD processors.
|
||||||
|
|
||||||
Kernel Module Options for ``amd-pstate``
|
Kernel Module Options for ``amd-pstate``
|
||||||
@@ -287,25 +286,25 @@ Kernel Module Options for ``amd-pstate``
|
|||||||
Use a module param (shared_mem) to enable related processors manually with
|
Use a module param (shared_mem) to enable related processors manually with
|
||||||
**amd_pstate.shared_mem=1**.
|
**amd_pstate.shared_mem=1**.
|
||||||
Due to the performance issue on the processors with `Shared Memory Support
|
Due to the performance issue on the processors with `Shared Memory Support
|
||||||
<perf_cap_>`_, so we disable it for the moment and will enable this by default
|
<perf_cap_>`_, we disable it presently and will re-enable this by default
|
||||||
once we address performance issue on this solution.
|
once we address performance issue with this solution.
|
||||||
|
|
||||||
The way to check whether current processor is `Full MSR Support <perf_cap_>`_
|
To check whether the current processor is using `Full MSR Support <perf_cap_>`_
|
||||||
or `Shared Memory Support <perf_cap_>`_ : ::
|
or `Shared Memory Support <perf_cap_>`_ : ::
|
||||||
|
|
||||||
ray@hr-test1:~$ lscpu | grep cppc
|
ray@hr-test1:~$ lscpu | grep cppc
|
||||||
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
|
Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr rdpru wbnoinvd cppc arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip pku ospke vaes vpclmulqdq rdpid overflow_recov succor smca fsrm
|
||||||
|
|
||||||
If CPU Flags have cppc, then this processor supports `Full MSR Support
|
If the CPU flags have ``cppc``, then this processor supports `Full MSR Support
|
||||||
<perf_cap_>`_. Otherwise it supports `Shared Memory Support <perf_cap_>`_.
|
<perf_cap_>`_. Otherwise, it supports `Shared Memory Support <perf_cap_>`_.
|
||||||
|
|
||||||
|
|
||||||
``cpupower`` tool support for ``amd-pstate``
|
``cpupower`` tool support for ``amd-pstate``
|
||||||
===============================================
|
===============================================
|
||||||
|
|
||||||
``amd-pstate`` is supported on ``cpupower`` tool that can be used to dump the frequency
|
``amd-pstate`` is supported by the ``cpupower`` tool, which can be used to dump
|
||||||
information. And it is in progress to support more and more operations for new
|
frequency information. Development is in progress to support more and more
|
||||||
``amd-pstate`` module with this tool. ::
|
operations for the new ``amd-pstate`` module with this tool. ::
|
||||||
|
|
||||||
root@hr-test1:/home/ray# cpupower frequency-info
|
root@hr-test1:/home/ray# cpupower frequency-info
|
||||||
analyzing CPU 0:
|
analyzing CPU 0:
|
||||||
@@ -336,10 +335,10 @@ Trace Events
|
|||||||
--------------
|
--------------
|
||||||
|
|
||||||
There are two static trace events that can be used for ``amd-pstate``
|
There are two static trace events that can be used for ``amd-pstate``
|
||||||
diagnostics. One of them is the cpu_frequency trace event generally used
|
diagnostics. One of them is the ``cpu_frequency`` trace event generally used
|
||||||
by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event
|
by ``CPUFreq``, and the other one is the ``amd_pstate_perf`` trace event
|
||||||
specific to ``amd-pstate``. The following sequence of shell commands can
|
specific to ``amd-pstate``. The following sequence of shell commands can
|
||||||
be used to enable them and see their output (if the kernel is generally
|
be used to enable them and see their output (if the kernel is
|
||||||
configured to support event tracing). ::
|
configured to support event tracing). ::
|
||||||
|
|
||||||
root@hr-test1:/home/ray# cd /sys/kernel/tracing/
|
root@hr-test1:/home/ray# cd /sys/kernel/tracing/
|
||||||
@@ -364,11 +363,37 @@ configured to support event tracing). ::
|
|||||||
<idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true
|
<idle>-0 [003] d.s.. 4995.980971: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=3 changed=false fast_switch=true
|
||||||
<idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true
|
<idle>-0 [011] d.s.. 4995.980996: amd_pstate_perf: amd_min_perf=85 amd_des_perf=85 amd_max_perf=166 cpu_id=11 changed=false fast_switch=true
|
||||||
|
|
||||||
The cpu_frequency trace event will be triggered either by the ``schedutil`` scaling
|
The ``cpu_frequency`` trace event will be triggered either by the ``schedutil`` scaling
|
||||||
governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the
|
governor (for the policies it is attached to), or by the ``CPUFreq`` core (for the
|
||||||
policies with other scaling governors).
|
policies with other scaling governors).
|
||||||
|
|
||||||
|
|
||||||
|
Tracer Tool
|
||||||
|
-------------
|
||||||
|
|
||||||
|
``amd_pstate_tracer.py`` can record and parse ``amd-pstate`` trace log, then
|
||||||
|
generate performance plots. This utility can be used to debug and tune the
|
||||||
|
performance of ``amd-pstate`` driver. The tracer tool needs to import intel
|
||||||
|
pstate tracer.
|
||||||
|
|
||||||
|
Tracer tool located in ``linux/tools/power/x86/amd_pstate_tracer``. It can be
|
||||||
|
used in two ways. If trace file is available, then directly parse the file
|
||||||
|
with command ::
|
||||||
|
|
||||||
|
./amd_pstate_trace.py [-c cpus] -t <trace_file> -n <test_name>
|
||||||
|
|
||||||
|
Or generate trace file with root privilege, then parse and plot with command ::
|
||||||
|
|
||||||
|
sudo ./amd_pstate_trace.py [-c cpus] -n <test_name> -i <interval> [-m kbytes]
|
||||||
|
|
||||||
|
The test result can be found in ``results/test_name``. Following is the example
|
||||||
|
about part of the output. ::
|
||||||
|
|
||||||
|
common_cpu common_secs common_usecs min_perf des_perf max_perf freq mperf apef tsc load duration_ms sample_num elapsed_time common_comm
|
||||||
|
CPU_005 712 116384 39 49 166 0.7565 9645075 2214891 38431470 25.1 11.646 469 2.496 kworker/5:0-40
|
||||||
|
CPU_006 712 116408 39 49 166 0.6769 8950227 1839034 37192089 24.06 11.272 470 2.496 kworker/6:0-1264
|
||||||
|
|
||||||
|
|
||||||
Reference
|
Reference
|
||||||
===========
|
===========
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,60 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
.. include:: <isonum.txt>
|
||||||
|
|
||||||
|
==============================
|
||||||
|
Intel Uncore Frequency Scaling
|
||||||
|
==============================
|
||||||
|
|
||||||
|
:Copyright: |copy| 2022 Intel Corporation
|
||||||
|
|
||||||
|
:Author: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
|
||||||
|
|
||||||
|
Introduction
|
||||||
|
------------
|
||||||
|
|
||||||
|
The uncore can consume significant amount of power in Intel's Xeon servers based
|
||||||
|
on the workload characteristics. To optimize the total power and improve overall
|
||||||
|
performance, SoCs have internal algorithms for scaling uncore frequency. These
|
||||||
|
algorithms monitor workload usage of uncore and set a desirable frequency.
|
||||||
|
|
||||||
|
It is possible that users have different expectations of uncore performance and
|
||||||
|
want to have control over it. The objective is similar to allowing users to set
|
||||||
|
the scaling min/max frequencies via cpufreq sysfs to improve CPU performance.
|
||||||
|
Users may have some latency sensitive workloads where they do not want any
|
||||||
|
change to uncore frequency. Also, users may have workloads which require
|
||||||
|
different core and uncore performance at distinct phases and they may want to
|
||||||
|
use both cpufreq and the uncore scaling interface to distribute power and
|
||||||
|
improve overall performance.
|
||||||
|
|
||||||
|
Sysfs Interface
|
||||||
|
---------------
|
||||||
|
|
||||||
|
To control uncore frequency, a sysfs interface is provided in the directory:
|
||||||
|
`/sys/devices/system/cpu/intel_uncore_frequency/`.
|
||||||
|
|
||||||
|
There is one directory for each package and die combination as the scope of
|
||||||
|
uncore scaling control is per die in multiple die/package SoCs or per
|
||||||
|
package for single die per package SoCs. The name represents the
|
||||||
|
scope of control. For example: 'package_00_die_00' is for package id 0 and
|
||||||
|
die 0.
|
||||||
|
|
||||||
|
Each package_*_die_* contains the following attributes:
|
||||||
|
|
||||||
|
``initial_max_freq_khz``
|
||||||
|
Out of reset, this attribute represent the maximum possible frequency.
|
||||||
|
This is a read-only attribute. If users adjust max_freq_khz,
|
||||||
|
they can always go back to maximum using the value from this attribute.
|
||||||
|
|
||||||
|
``initial_min_freq_khz``
|
||||||
|
Out of reset, this attribute represent the minimum possible frequency.
|
||||||
|
This is a read-only attribute. If users adjust min_freq_khz,
|
||||||
|
they can always go back to minimum using the value from this attribute.
|
||||||
|
|
||||||
|
``max_freq_khz``
|
||||||
|
This attribute is used to set the maximum uncore frequency.
|
||||||
|
|
||||||
|
``min_freq_khz``
|
||||||
|
This attribute is used to set the minimum uncore frequency.
|
||||||
|
|
||||||
|
``current_freq_khz``
|
||||||
|
This attribute is used to get the current uncore frequency.
|
||||||
@@ -15,3 +15,4 @@ Working-State Power Management
|
|||||||
cpufreq_drivers
|
cpufreq_drivers
|
||||||
intel_epb
|
intel_epb
|
||||||
intel-speed-select
|
intel-speed-select
|
||||||
|
intel_uncore_frequency_scaling
|
||||||
|
|||||||
@@ -1,14 +1,5 @@
|
|||||||
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||||
..
|
.. See the bottom of this file for additional redistribution information.
|
||||||
If you want to distribute this text under CC-BY-4.0 only, please use 'The
|
|
||||||
Linux kernel developers' for author attribution and link this as source:
|
|
||||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
|
|
||||||
..
|
|
||||||
Note: Only the content of this RST file as found in the Linux kernel sources
|
|
||||||
is available under CC-BY-4.0, as versions of this text that were processed
|
|
||||||
(for example by the kernel's build system) might contain content taken from
|
|
||||||
files which use a more restrictive license.
|
|
||||||
|
|
||||||
|
|
||||||
Reporting issues
|
Reporting issues
|
||||||
++++++++++++++++
|
++++++++++++++++
|
||||||
@@ -395,22 +386,16 @@ fixed as soon as possible, hence there are 'issues of high priority' that get
|
|||||||
handled slightly differently in the reporting process. Three type of cases
|
handled slightly differently in the reporting process. Three type of cases
|
||||||
qualify: regressions, security issues, and really severe problems.
|
qualify: regressions, security issues, and really severe problems.
|
||||||
|
|
||||||
You deal with a 'regression' if something that worked with an older version of
|
You deal with a regression if some application or practical use case running
|
||||||
the Linux kernel does not work with a newer one or somehow works worse with it.
|
fine with one Linux kernel works worse or not at all with a newer version
|
||||||
It thus is a regression when a WiFi driver that did a fine job with Linux 5.7
|
compiled using a similar configuration. The document
|
||||||
somehow misbehaves with 5.8 or doesn't work at all. It's also a regression if
|
Documentation/admin-guide/reporting-regressions.rst explains this in more
|
||||||
an application shows erratic behavior with a newer kernel, which might happen
|
detail. It also provides a good deal of other information about regressions you
|
||||||
due to incompatible changes in the interface between the kernel and the
|
might want to be aware of; it for example explains how to add your issue to the
|
||||||
userland (like procfs and sysfs). Significantly reduced performance or
|
list of tracked regressions, to ensure it won't fall through the cracks.
|
||||||
increased power consumption also qualify as regression. But keep in mind: the
|
|
||||||
new kernel needs to be built with a configuration that is similar to the one
|
|
||||||
from the old kernel (see below how to achieve that). That's because the kernel
|
|
||||||
developers sometimes can not avoid incompatibilities when implementing new
|
|
||||||
features; but to avoid regressions such features have to be enabled explicitly
|
|
||||||
during build time configuration.
|
|
||||||
|
|
||||||
What qualifies as security issue is left to your judgment. Consider reading
|
What qualifies as security issue is left to your judgment. Consider reading
|
||||||
'Documentation/admin-guide/security-bugs.rst' before proceeding, as it
|
Documentation/admin-guide/security-bugs.rst before proceeding, as it
|
||||||
provides additional details how to best handle security issues.
|
provides additional details how to best handle security issues.
|
||||||
|
|
||||||
An issue is a 'really severe problem' when something totally unacceptably bad
|
An issue is a 'really severe problem' when something totally unacceptably bad
|
||||||
@@ -517,7 +502,7 @@ line starting with 'CPU:'. It should end with 'Not tainted' if the kernel was
|
|||||||
not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
|
not tainted when it noticed the problem; it was tainted if you see 'Tainted:'
|
||||||
followed by a few spaces and some letters.
|
followed by a few spaces and some letters.
|
||||||
|
|
||||||
If your kernel is tainted, study 'Documentation/admin-guide/tainted-kernels.rst'
|
If your kernel is tainted, study Documentation/admin-guide/tainted-kernels.rst
|
||||||
to find out why. Try to eliminate the reason. Often it's caused by one these
|
to find out why. Try to eliminate the reason. Often it's caused by one these
|
||||||
three things:
|
three things:
|
||||||
|
|
||||||
@@ -1043,7 +1028,7 @@ down the culprit, as maintainers often won't have the time or setup at hand to
|
|||||||
reproduce it themselves.
|
reproduce it themselves.
|
||||||
|
|
||||||
To find the change there is a process called 'bisection' which the document
|
To find the change there is a process called 'bisection' which the document
|
||||||
'Documentation/admin-guide/bug-bisect.rst' describes in detail. That process
|
Documentation/admin-guide/bug-bisect.rst describes in detail. That process
|
||||||
will often require you to build about ten to twenty kernel images, trying to
|
will often require you to build about ten to twenty kernel images, trying to
|
||||||
reproduce the issue with each of them before building the next. Yes, that takes
|
reproduce the issue with each of them before building the next. Yes, that takes
|
||||||
some time, but don't worry, it works a lot quicker than most people assume.
|
some time, but don't worry, it works a lot quicker than most people assume.
|
||||||
@@ -1073,10 +1058,11 @@ When dealing with regressions make sure the issue you face is really caused by
|
|||||||
the kernel and not by something else, as outlined above already.
|
the kernel and not by something else, as outlined above already.
|
||||||
|
|
||||||
In the whole process keep in mind: an issue only qualifies as regression if the
|
In the whole process keep in mind: an issue only qualifies as regression if the
|
||||||
older and the newer kernel got built with a similar configuration. The best way
|
older and the newer kernel got built with a similar configuration. This can be
|
||||||
to archive this: copy the configuration file (``.config``) from the old working
|
achieved by using ``make olddefconfig``, as explained in more detail by
|
||||||
kernel freshly to each newer kernel version you try. Afterwards run ``make
|
Documentation/admin-guide/reporting-regressions.rst; that document also
|
||||||
olddefconfig`` to adjust it for the needs of the new version.
|
provides a good deal of other information about regressions you might want to be
|
||||||
|
aware of.
|
||||||
|
|
||||||
|
|
||||||
Write and send the report
|
Write and send the report
|
||||||
@@ -1283,7 +1269,7 @@ them when sending the report by mail. If you filed it in a bug tracker, forward
|
|||||||
the report's text to these addresses; but on top of it put a small note where
|
the report's text to these addresses; but on top of it put a small note where
|
||||||
you mention that you filed it with a link to the ticket.
|
you mention that you filed it with a link to the ticket.
|
||||||
|
|
||||||
See 'Documentation/admin-guide/security-bugs.rst' for more information.
|
See Documentation/admin-guide/security-bugs.rst for more information.
|
||||||
|
|
||||||
|
|
||||||
Duties after the report went out
|
Duties after the report went out
|
||||||
@@ -1571,7 +1557,7 @@ Once your report is out your might get asked to do a proper one, as it allows to
|
|||||||
pinpoint the exact change that causes the issue (which then can easily get
|
pinpoint the exact change that causes the issue (which then can easily get
|
||||||
reverted to fix the issue quickly). Hence consider to do a proper bisection
|
reverted to fix the issue quickly). Hence consider to do a proper bisection
|
||||||
right away if time permits. See the section 'Special care for regressions' and
|
right away if time permits. See the section 'Special care for regressions' and
|
||||||
the document 'Documentation/admin-guide/bug-bisect.rst' for details how to
|
the document Documentation/admin-guide/bug-bisect.rst for details how to
|
||||||
perform one. In case of a successful bisection add the author of the culprit to
|
perform one. In case of a successful bisection add the author of the culprit to
|
||||||
the recipients; also CC everyone in the signed-off-by chain, which you find at
|
the recipients; also CC everyone in the signed-off-by chain, which you find at
|
||||||
the end of its commit message.
|
the end of its commit message.
|
||||||
@@ -1594,7 +1580,7 @@ Some fixes are too complex
|
|||||||
Even small and seemingly obvious code-changes sometimes introduce new and
|
Even small and seemingly obvious code-changes sometimes introduce new and
|
||||||
totally unexpected problems. The maintainers of the stable and longterm kernels
|
totally unexpected problems. The maintainers of the stable and longterm kernels
|
||||||
are very aware of that and thus only apply changes to these kernels that are
|
are very aware of that and thus only apply changes to these kernels that are
|
||||||
within rules outlined in 'Documentation/process/stable-kernel-rules.rst'.
|
within rules outlined in Documentation/process/stable-kernel-rules.rst.
|
||||||
|
|
||||||
Complex or risky changes for example do not qualify and thus only get applied
|
Complex or risky changes for example do not qualify and thus only get applied
|
||||||
to mainline. Other fixes are easy to get backported to the newest stable and
|
to mainline. Other fixes are easy to get backported to the newest stable and
|
||||||
@@ -1756,10 +1742,23 @@ art will lay some groundwork to improve the situation over time.
|
|||||||
|
|
||||||
|
|
||||||
..
|
..
|
||||||
This text is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If you
|
end-of-content
|
||||||
spot a typo or small mistake, feel free to let him know directly and he'll
|
..
|
||||||
fix it. You are free to do the same in a mostly informal way if you want
|
This document is maintained by Thorsten Leemhuis <linux@leemhuis.info>. If
|
||||||
to contribute changes to the text, but for copyright reasons please CC
|
you spot a typo or small mistake, feel free to let him know directly and
|
||||||
|
he'll fix it. You are free to do the same in a mostly informal way if you
|
||||||
|
want to contribute changes to the text, but for copyright reasons please CC
|
||||||
linux-doc@vger.kernel.org and "sign-off" your contribution as
|
linux-doc@vger.kernel.org and "sign-off" your contribution as
|
||||||
Documentation/process/submitting-patches.rst outlines in the section "Sign
|
Documentation/process/submitting-patches.rst outlines in the section "Sign
|
||||||
your work - the Developer's Certificate of Origin".
|
your work - the Developer's Certificate of Origin".
|
||||||
|
..
|
||||||
|
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||||
|
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||||
|
please use "The Linux kernel developers" for author attribution and link
|
||||||
|
this as source:
|
||||||
|
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-issues.rst
|
||||||
|
..
|
||||||
|
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||||
|
is available under CC-BY-4.0, as versions of this text that were processed
|
||||||
|
(for example by the kernel's build system) might contain content taken from
|
||||||
|
files which use a more restrictive license.
|
||||||
|
|||||||
451
Documentation/admin-guide/reporting-regressions.rst
Normal file
451
Documentation/admin-guide/reporting-regressions.rst
Normal file
@@ -0,0 +1,451 @@
|
|||||||
|
.. SPDX-License-Identifier: (GPL-2.0+ OR CC-BY-4.0)
|
||||||
|
.. [see the bottom of this file for redistribution information]
|
||||||
|
|
||||||
|
Reporting regressions
|
||||||
|
+++++++++++++++++++++
|
||||||
|
|
||||||
|
"*We don't cause regressions*" is the first rule of Linux kernel development;
|
||||||
|
Linux founder and lead developer Linus Torvalds established it himself and
|
||||||
|
ensures it's obeyed.
|
||||||
|
|
||||||
|
This document describes what the rule means for users and how the Linux kernel's
|
||||||
|
development model ensures to address all reported regressions; aspects relevant
|
||||||
|
for kernel developers are left to Documentation/process/handling-regressions.rst.
|
||||||
|
|
||||||
|
|
||||||
|
The important bits (aka "TL;DR")
|
||||||
|
================================
|
||||||
|
|
||||||
|
#. It's a regression if something running fine with one Linux kernel works worse
|
||||||
|
or not at all with a newer version. Note, the newer kernel has to be compiled
|
||||||
|
using a similar configuration; the detailed explanations below describes this
|
||||||
|
and other fine print in more detail.
|
||||||
|
|
||||||
|
#. Report your issue as outlined in Documentation/admin-guide/reporting-issues.rst,
|
||||||
|
it already covers all aspects important for regressions and repeated
|
||||||
|
below for convenience. Two of them are important: start your report's subject
|
||||||
|
with "[REGRESSION]" and CC or forward it to `the regression mailing list
|
||||||
|
<https://lore.kernel.org/regressions/>`_ (regressions@lists.linux.dev).
|
||||||
|
|
||||||
|
#. Optional, but recommended: when sending or forwarding your report, make the
|
||||||
|
Linux kernel regression tracking bot "regzbot" track the issue by specifying
|
||||||
|
when the regression started like this::
|
||||||
|
|
||||||
|
#regzbot introduced v5.13..v5.14-rc1
|
||||||
|
|
||||||
|
|
||||||
|
All the details on Linux kernel regressions relevant for users
|
||||||
|
==============================================================
|
||||||
|
|
||||||
|
|
||||||
|
The important basics
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
|
||||||
|
What is a "regression" and what is the "no regressions rule"?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
It's a regression if some application or practical use case running fine with
|
||||||
|
one Linux kernel works worse or not at all with a newer version compiled using a
|
||||||
|
similar configuration. The "no regressions rule" forbids this to take place; if
|
||||||
|
it happens by accident, developers that caused it are expected to quickly fix
|
||||||
|
the issue.
|
||||||
|
|
||||||
|
It thus is a regression when a WiFi driver from Linux 5.13 works fine, but with
|
||||||
|
5.14 doesn't work at all, works significantly slower, or misbehaves somehow.
|
||||||
|
It's also a regression if a perfectly working application suddenly shows erratic
|
||||||
|
behavior with a newer kernel version; such issues can be caused by changes in
|
||||||
|
procfs, sysfs, or one of the many other interfaces Linux provides to userland
|
||||||
|
software. But keep in mind, as mentioned earlier: 5.14 in this example needs to
|
||||||
|
be built from a configuration similar to the one from 5.13. This can be achieved
|
||||||
|
using ``make olddefconfig``, as explained in more detail below.
|
||||||
|
|
||||||
|
Note the "practical use case" in the first sentence of this section: developers
|
||||||
|
despite the "no regressions" rule are free to change any aspect of the kernel
|
||||||
|
and even APIs or ABIs to userland, as long as no existing application or use
|
||||||
|
case breaks.
|
||||||
|
|
||||||
|
Also be aware the "no regressions" rule covers only interfaces the kernel
|
||||||
|
provides to the userland. It thus does not apply to kernel-internal interfaces
|
||||||
|
like the module API, which some externally developed drivers use to hook into
|
||||||
|
the kernel.
|
||||||
|
|
||||||
|
How do I report a regression?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Just report the issue as outlined in
|
||||||
|
Documentation/admin-guide/reporting-issues.rst, it already describes the
|
||||||
|
important points. The following aspects outlined there are especially relevant
|
||||||
|
for regressions:
|
||||||
|
|
||||||
|
* When checking for existing reports to join, also search the `archives of the
|
||||||
|
Linux regressions mailing list <https://lore.kernel.org/regressions/>`_ and
|
||||||
|
`regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
|
||||||
|
|
||||||
|
* Start your report's subject with "[REGRESSION]".
|
||||||
|
|
||||||
|
* In your report, clearly mention the last kernel version that worked fine and
|
||||||
|
the first broken one. Ideally try to find the exact change causing the
|
||||||
|
regression using a bisection, as explained below in more detail.
|
||||||
|
|
||||||
|
* Remember to let the Linux regressions mailing list
|
||||||
|
(regressions@lists.linux.dev) know about your report:
|
||||||
|
|
||||||
|
* If you report the regression by mail, CC the regressions list.
|
||||||
|
|
||||||
|
* If you report your regression to some bug tracker, forward the submitted
|
||||||
|
report by mail to the regressions list while CCing the maintainer and the
|
||||||
|
mailing list for the subsystem in question.
|
||||||
|
|
||||||
|
If it's a regression within a stable or longterm series (e.g.
|
||||||
|
v5.15.3..v5.15.5), remember to CC the `Linux stable mailing list
|
||||||
|
<https://lore.kernel.org/stable/>`_ (stable@vger.kernel.org).
|
||||||
|
|
||||||
|
In case you performed a successful bisection, add everyone to the CC the
|
||||||
|
culprit's commit message mentions in lines starting with "Signed-off-by:".
|
||||||
|
|
||||||
|
When CCing for forwarding your report to the list, consider directly telling the
|
||||||
|
aforementioned Linux kernel regression tracking bot about your report. To do
|
||||||
|
that, include a paragraph like this in your mail::
|
||||||
|
|
||||||
|
#regzbot introduced: v5.13..v5.14-rc1
|
||||||
|
|
||||||
|
Regzbot will then consider your mail a report for a regression introduced in the
|
||||||
|
specified version range. In above case Linux v5.13 still worked fine and Linux
|
||||||
|
v5.14-rc1 was the first version where you encountered the issue. If you
|
||||||
|
performed a bisection to find the commit that caused the regression, specify the
|
||||||
|
culprit's commit-id instead::
|
||||||
|
|
||||||
|
#regzbot introduced: 1f2e3d4c5d
|
||||||
|
|
||||||
|
Placing such a "regzbot command" is in your interest, as it will ensure the
|
||||||
|
report won't fall through the cracks unnoticed. If you omit this, the Linux
|
||||||
|
kernel's regressions tracker will take care of telling regzbot about your
|
||||||
|
regression, as long as you send a copy to the regressions mailing lists. But the
|
||||||
|
regression tracker is just one human which sometimes has to rest or occasionally
|
||||||
|
might even enjoy some time away from computers (as crazy as that might sound).
|
||||||
|
Relying on this person thus will result in an unnecessary delay before the
|
||||||
|
regressions becomes mentioned `on the list of tracked and unresolved Linux
|
||||||
|
kernel regressions <https://linux-regtracking.leemhuis.info/regzbot/>`_ and the
|
||||||
|
weekly regression reports sent by regzbot. Such delays can result in Linus
|
||||||
|
Torvalds being unaware of important regressions when deciding between "continue
|
||||||
|
development or call this finished and release the final?".
|
||||||
|
|
||||||
|
Are really all regressions fixed?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Nearly all of them are, as long as the change causing the regression (the
|
||||||
|
"culprit commit") is reliably identified. Some regressions can be fixed without
|
||||||
|
this, but often it's required.
|
||||||
|
|
||||||
|
Who needs to find the root cause of a regression?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Developers of the affected code area should try to locate the culprit on their
|
||||||
|
own. But for them that's often impossible to do with reasonable effort, as quite
|
||||||
|
a lot of issues only occur in a particular environment outside the developer's
|
||||||
|
reach -- for example, a specific hardware platform, firmware, Linux distro,
|
||||||
|
system's configuration, or application. That's why in the end it's often up to
|
||||||
|
the reporter to locate the culprit commit; sometimes users might even need to
|
||||||
|
run additional tests afterwards to pinpoint the exact root cause. Developers
|
||||||
|
should offer advice and reasonably help where they can, to make this process
|
||||||
|
relatively easy and achievable for typical users.
|
||||||
|
|
||||||
|
How can I find the culprit?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Perform a bisection, as roughly outlined in
|
||||||
|
Documentation/admin-guide/reporting-issues.rst and described in more detail by
|
||||||
|
Documentation/admin-guide/bug-bisect.rst. It might sound like a lot of work, but
|
||||||
|
in many cases finds the culprit relatively quickly. If it's hard or
|
||||||
|
time-consuming to reliably reproduce the issue, consider teaming up with other
|
||||||
|
affected users to narrow down the search range together.
|
||||||
|
|
||||||
|
Who can I ask for advice when it comes to regressions?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Send a mail to the regressions mailing list (regressions@lists.linux.dev) while
|
||||||
|
CCing the Linux kernel's regression tracker (regressions@leemhuis.info); if the
|
||||||
|
issue might better be dealt with in private, feel free to omit the list.
|
||||||
|
|
||||||
|
|
||||||
|
Additional details about regressions
|
||||||
|
------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
What is the goal of the "no regressions rule"?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Users should feel safe when updating kernel versions and not have to worry
|
||||||
|
something might break. This is in the interest of the kernel developers to make
|
||||||
|
updating attractive: they don't want users to stay on stable or longterm Linux
|
||||||
|
series that are either abandoned or more than one and a half years old. That's
|
||||||
|
in everybody's interest, as `those series might have known bugs, security
|
||||||
|
issues, or other problematic aspects already fixed in later versions
|
||||||
|
<http://www.kroah.com/log/blog/2018/08/24/what-stable-kernel-should-i-use/>`_.
|
||||||
|
Additionally, the kernel developers want to make it simple and appealing for
|
||||||
|
users to test the latest pre-release or regular release. That's also in
|
||||||
|
everybody's interest, as it's a lot easier to track down and fix problems, if
|
||||||
|
they are reported shortly after being introduced.
|
||||||
|
|
||||||
|
Is the "no regressions" rule really adhered in practice?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
It's taken really seriously, as can be seen by many mailing list posts from
|
||||||
|
Linux creator and lead developer Linus Torvalds, some of which are quoted in
|
||||||
|
Documentation/process/handling-regressions.rst.
|
||||||
|
|
||||||
|
Exceptions to this rule are extremely rare; in the past developers almost always
|
||||||
|
turned out to be wrong when they assumed a particular situation was warranting
|
||||||
|
an exception.
|
||||||
|
|
||||||
|
Who ensures the "no regressions" is actually followed?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The subsystem maintainers should take care of that, which are watched and
|
||||||
|
supported by the tree maintainers -- e.g. Linus Torvalds for mainline and
|
||||||
|
Greg Kroah-Hartman et al. for various stable/longterm series.
|
||||||
|
|
||||||
|
All of them are helped by people trying to ensure no regression report falls
|
||||||
|
through the cracks. One of them is Thorsten Leemhuis, who's currently acting as
|
||||||
|
the Linux kernel's "regressions tracker"; to facilitate this work he relies on
|
||||||
|
regzbot, the Linux kernel regression tracking bot. That's why you want to bring
|
||||||
|
your report on the radar of these people by CCing or forwarding each report to
|
||||||
|
the regressions mailing list, ideally with a "regzbot command" in your mail to
|
||||||
|
get it tracked immediately.
|
||||||
|
|
||||||
|
How quickly are regressions normally fixed?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Developers should fix any reported regression as quickly as possible, to provide
|
||||||
|
affected users with a solution in a timely manner and prevent more users from
|
||||||
|
running into the issue; nevertheless developers need to take enough time and
|
||||||
|
care to ensure regression fixes do not cause additional damage.
|
||||||
|
|
||||||
|
The answer thus depends on various factors like the impact of a regression, its
|
||||||
|
age, or the Linux series in which it occurs. In the end though, most regressions
|
||||||
|
should be fixed within two weeks.
|
||||||
|
|
||||||
|
Is it a regression, if the issue can be avoided by updating some software?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Almost always: yes. If a developer tells you otherwise, ask the regression
|
||||||
|
tracker for advice as outlined above.
|
||||||
|
|
||||||
|
Is it a regression, if a newer kernel works slower or consumes more energy?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Yes, but the difference has to be significant. A five percent slow-down in a
|
||||||
|
micro-benchmark thus is unlikely to qualify as regression, unless it also
|
||||||
|
influences the results of a broad benchmark by more than one percent. If in
|
||||||
|
doubt, ask for advice.
|
||||||
|
|
||||||
|
Is it a regression, if an external kernel module breaks when updating Linux?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
No, as the "no regression" rule is about interfaces and services the Linux
|
||||||
|
kernel provides to the userland. It thus does not cover building or running
|
||||||
|
externally developed kernel modules, as they run in kernel-space and hook into
|
||||||
|
the kernel using internal interfaces occasionally changed.
|
||||||
|
|
||||||
|
How are regressions handled that are caused by security fixes?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
In extremely rare situations security issues can't be fixed without causing
|
||||||
|
regressions; those fixes are given way, as they are the lesser evil in the end.
|
||||||
|
Luckily this middling almost always can be avoided, as key developers for the
|
||||||
|
affected area and often Linus Torvalds himself try very hard to fix security
|
||||||
|
issues without causing regressions.
|
||||||
|
|
||||||
|
If you nevertheless face such a case, check the mailing list archives if people
|
||||||
|
tried their best to avoid the regression. If not, report it; if in doubt, ask
|
||||||
|
for advice as outlined above.
|
||||||
|
|
||||||
|
What happens if fixing a regression is impossible without causing another?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Sadly these things happen, but luckily not very often; if they occur, expert
|
||||||
|
developers of the affected code area should look into the issue to find a fix
|
||||||
|
that avoids regressions or at least their impact. If you run into such a
|
||||||
|
situation, do what was outlined already for regressions caused by security
|
||||||
|
fixes: check earlier discussions if people already tried their best and ask for
|
||||||
|
advice if in doubt.
|
||||||
|
|
||||||
|
A quick note while at it: these situations could be avoided, if people would
|
||||||
|
regularly give mainline pre-releases (say v5.15-rc1 or -rc3) from each
|
||||||
|
development cycle a test run. This is best explained by imagining a change
|
||||||
|
integrated between Linux v5.14 and v5.15-rc1 which causes a regression, but at
|
||||||
|
the same time is a hard requirement for some other improvement applied for
|
||||||
|
5.15-rc1. All these changes often can simply be reverted and the regression thus
|
||||||
|
solved, if someone finds and reports it before 5.15 is released. A few days or
|
||||||
|
weeks later this solution can become impossible, as some software might have
|
||||||
|
started to rely on aspects introduced by one of the follow-up changes: reverting
|
||||||
|
all changes would then cause a regression for users of said software and thus is
|
||||||
|
out of the question.
|
||||||
|
|
||||||
|
Is it a regression, if some feature I relied on was removed months ago?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
It is, but often it's hard to fix such regressions due to the aspects outlined
|
||||||
|
in the previous section. It hence needs to be dealt with on a case-by-case
|
||||||
|
basis. This is another reason why it's in everybody's interest to regularly test
|
||||||
|
mainline pre-releases.
|
||||||
|
|
||||||
|
Does the "no regression" rule apply if I seem to be the only affected person?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
It does, but only for practical usage: the Linux developers want to be free to
|
||||||
|
remove support for hardware only to be found in attics and museums anymore.
|
||||||
|
|
||||||
|
Note, sometimes regressions can't be avoided to make progress -- and the latter
|
||||||
|
is needed to prevent Linux from stagnation. Hence, if only very few users seem
|
||||||
|
to be affected by a regression, it for the greater good might be in their and
|
||||||
|
everyone else's interest to lettings things pass. Especially if there is an
|
||||||
|
easy way to circumvent the regression somehow, for example by updating some
|
||||||
|
software or using a kernel parameter created just for this purpose.
|
||||||
|
|
||||||
|
Does the regression rule apply for code in the staging tree as well?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Not according to the `help text for the configuration option covering all
|
||||||
|
staging code <https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/staging/Kconfig>`_,
|
||||||
|
which since its early days states::
|
||||||
|
|
||||||
|
Please note that these drivers are under heavy development, may or
|
||||||
|
may not work, and may contain userspace interfaces that most likely
|
||||||
|
will be changed in the near future.
|
||||||
|
|
||||||
|
The staging developers nevertheless often adhere to the "no regressions" rule,
|
||||||
|
but sometimes bend it to make progress. That's for example why some users had to
|
||||||
|
deal with (often negligible) regressions when a WiFi driver from the staging
|
||||||
|
tree was replaced by a totally different one written from scratch.
|
||||||
|
|
||||||
|
Why do later versions have to be "compiled with a similar configuration"?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Because the Linux kernel developers sometimes integrate changes known to cause
|
||||||
|
regressions, but make them optional and disable them in the kernel's default
|
||||||
|
configuration. This trick allows progress, as the "no regressions" rule
|
||||||
|
otherwise would lead to stagnation.
|
||||||
|
|
||||||
|
Consider for example a new security feature blocking access to some kernel
|
||||||
|
interfaces often abused by malware, which at the same time are required to run a
|
||||||
|
few rarely used applications. The outlined approach makes both camps happy:
|
||||||
|
people using these applications can leave the new security feature off, while
|
||||||
|
everyone else can enable it without running into trouble.
|
||||||
|
|
||||||
|
How to create a configuration similar to the one of an older kernel?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Start your machine with a known-good kernel and configure the newer Linux
|
||||||
|
version with ``make olddefconfig``. This makes the kernel's build scripts pick
|
||||||
|
up the configuration file (the ".config" file) from the running kernel as base
|
||||||
|
for the new one you are about to compile; afterwards they set all new
|
||||||
|
configuration options to their default value, which should disable new features
|
||||||
|
that might cause regressions.
|
||||||
|
|
||||||
|
Can I report a regression I found with pre-compiled vanilla kernels?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
You need to ensure the newer kernel was compiled with a similar configuration
|
||||||
|
file as the older one (see above), as those that built them might have enabled
|
||||||
|
some known-to-be incompatible feature for the newer kernel. If in doubt, report
|
||||||
|
the matter to the kernel's provider and ask for advice.
|
||||||
|
|
||||||
|
|
||||||
|
More about regression tracking with "regzbot"
|
||||||
|
---------------------------------------------
|
||||||
|
|
||||||
|
What is regression tracking and why should I care about it?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Rules like "no regressions" need someone to ensure they are followed, otherwise
|
||||||
|
they are broken either accidentally or on purpose. History has shown this to be
|
||||||
|
true for Linux kernel development as well. That's why Thorsten Leemhuis, the
|
||||||
|
Linux Kernel's regression tracker, and some people try to ensure all regression
|
||||||
|
are fixed by keeping an eye on them until they are resolved. Neither of them are
|
||||||
|
paid for this, that's why the work is done on a best effort basis.
|
||||||
|
|
||||||
|
Why and how are Linux kernel regressions tracked using a bot?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Tracking regressions completely manually has proven to be quite hard due to the
|
||||||
|
distributed and loosely structured nature of Linux kernel development process.
|
||||||
|
That's why the Linux kernel's regression tracker developed regzbot to facilitate
|
||||||
|
the work, with the long term goal to automate regression tracking as much as
|
||||||
|
possible for everyone involved.
|
||||||
|
|
||||||
|
Regzbot works by watching for replies to reports of tracked regressions.
|
||||||
|
Additionally, it's looking out for posted or committed patches referencing such
|
||||||
|
reports with "Link:" tags; replies to such patch postings are tracked as well.
|
||||||
|
Combined this data provides good insights into the current state of the fixing
|
||||||
|
process.
|
||||||
|
|
||||||
|
How to see which regressions regzbot tracks currently?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
Check out `regzbot's web-interface <https://linux-regtracking.leemhuis.info/regzbot/>`_.
|
||||||
|
|
||||||
|
What kind of issues are supposed to be tracked by regzbot?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
The bot is meant to track regressions, hence please don't involve regzbot for
|
||||||
|
regular issues. But it's okay for the Linux kernel's regression tracker if you
|
||||||
|
involve regzbot to track severe issues, like reports about hangs, corrupted
|
||||||
|
data, or internal errors (Panic, Oops, BUG(), warning, ...).
|
||||||
|
|
||||||
|
How to change aspects of a tracked regression?
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
By using a 'regzbot command' in a direct or indirect reply to the mail with the
|
||||||
|
report. The easiest way to do that: find the report in your "Sent" folder or the
|
||||||
|
mailing list archive and reply to it using your mailer's "Reply-all" function.
|
||||||
|
In that mail, use one of the following commands in a stand-alone paragraph (IOW:
|
||||||
|
use blank lines to separate one or multiple of these commands from the rest of
|
||||||
|
the mail's text).
|
||||||
|
|
||||||
|
* Update when the regression started to happen, for example after performing a
|
||||||
|
bisection::
|
||||||
|
|
||||||
|
#regzbot introduced: 1f2e3d4c5d
|
||||||
|
|
||||||
|
* Set or update the title::
|
||||||
|
|
||||||
|
#regzbot title: foo
|
||||||
|
|
||||||
|
* Monitor a discussion or bugzilla.kernel.org ticket where additions aspects of
|
||||||
|
the issue or a fix are discussed:::
|
||||||
|
|
||||||
|
#regzbot monitor: https://lore.kernel.org/r/30th.anniversary.repost@klaava.Helsinki.FI/
|
||||||
|
#regzbot monitor: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
|
||||||
|
|
||||||
|
* Point to a place with further details of interest, like a mailing list post
|
||||||
|
or a ticket in a bug tracker that are slightly related, but about a different
|
||||||
|
topic::
|
||||||
|
|
||||||
|
#regzbot link: https://bugzilla.kernel.org/show_bug.cgi?id=123456789
|
||||||
|
|
||||||
|
* Mark a regression as invalid::
|
||||||
|
|
||||||
|
#regzbot invalid: wasn't a regression, problem has always existed
|
||||||
|
|
||||||
|
Regzbot supports a few other commands primarily used by developers or people
|
||||||
|
tracking regressions. They and more details about the aforementioned regzbot
|
||||||
|
commands can be found in the `getting started guide
|
||||||
|
<https://gitlab.com/knurd42/regzbot/-/blob/main/docs/getting_started.md>`_ and
|
||||||
|
the `reference documentation <https://gitlab.com/knurd42/regzbot/-/blob/main/docs/reference.md>`_
|
||||||
|
for regzbot.
|
||||||
|
|
||||||
|
..
|
||||||
|
end-of-content
|
||||||
|
..
|
||||||
|
This text is available under GPL-2.0+ or CC-BY-4.0, as stated at the top
|
||||||
|
of the file. If you want to distribute this text under CC-BY-4.0 only,
|
||||||
|
please use "The Linux kernel developers" for author attribution and link
|
||||||
|
this as source:
|
||||||
|
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/Documentation/admin-guide/reporting-regressions.rst
|
||||||
|
..
|
||||||
|
Note: Only the content of this RST file as found in the Linux kernel sources
|
||||||
|
is available under CC-BY-4.0, as versions of this text that were processed
|
||||||
|
(for example by the kernel's build system) might contain content taken from
|
||||||
|
files which use a more restrictive license.
|
||||||
@@ -595,65 +595,33 @@ Documentation/admin-guide/kernel-parameters.rst).
|
|||||||
numa_balancing
|
numa_balancing
|
||||||
==============
|
==============
|
||||||
|
|
||||||
Enables/disables automatic page fault based NUMA memory
|
Enables/disables and configures automatic page fault based NUMA memory
|
||||||
balancing. Memory is moved automatically to nodes
|
balancing. Memory is moved automatically to nodes that access it often.
|
||||||
that access it often.
|
The value to set can be the result of ORing the following:
|
||||||
|
|
||||||
Enables/disables automatic NUMA memory balancing. On NUMA machines, there
|
= =================================
|
||||||
is a performance penalty if remote memory is accessed by a CPU. When this
|
0 NUMA_BALANCING_DISABLED
|
||||||
feature is enabled the kernel samples what task thread is accessing memory
|
1 NUMA_BALANCING_NORMAL
|
||||||
by periodically unmapping pages and later trapping a page fault. At the
|
2 NUMA_BALANCING_MEMORY_TIERING
|
||||||
time of the page fault, it is determined if the data being accessed should
|
= =================================
|
||||||
be migrated to a local memory node.
|
|
||||||
|
Or NUMA_BALANCING_NORMAL to optimize page placement among different
|
||||||
|
NUMA nodes to reduce remote accessing. On NUMA machines, there is a
|
||||||
|
performance penalty if remote memory is accessed by a CPU. When this
|
||||||
|
feature is enabled the kernel samples what task thread is accessing
|
||||||
|
memory by periodically unmapping pages and later trapping a page
|
||||||
|
fault. At the time of the page fault, it is determined if the data
|
||||||
|
being accessed should be migrated to a local memory node.
|
||||||
|
|
||||||
The unmapping of pages and trapping faults incur additional overhead that
|
The unmapping of pages and trapping faults incur additional overhead that
|
||||||
ideally is offset by improved memory locality but there is no universal
|
ideally is offset by improved memory locality but there is no universal
|
||||||
guarantee. If the target workload is already bound to NUMA nodes then this
|
guarantee. If the target workload is already bound to NUMA nodes then this
|
||||||
feature should be disabled. Otherwise, if the system overhead from the
|
feature should be disabled.
|
||||||
feature is too high then the rate the kernel samples for NUMA hinting
|
|
||||||
faults may be controlled by the `numa_balancing_scan_period_min_ms,
|
|
||||||
numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms,
|
|
||||||
numa_balancing_scan_size_mb`_, and numa_balancing_settle_count sysctls.
|
|
||||||
|
|
||||||
|
|
||||||
numa_balancing_scan_period_min_ms, numa_balancing_scan_delay_ms, numa_balancing_scan_period_max_ms, numa_balancing_scan_size_mb
|
|
||||||
===============================================================================================================================
|
|
||||||
|
|
||||||
|
|
||||||
Automatic NUMA balancing scans tasks address space and unmaps pages to
|
|
||||||
detect if pages are properly placed or if the data should be migrated to a
|
|
||||||
memory node local to where the task is running. Every "scan delay" the task
|
|
||||||
scans the next "scan size" number of pages in its address space. When the
|
|
||||||
end of the address space is reached the scanner restarts from the beginning.
|
|
||||||
|
|
||||||
In combination, the "scan delay" and "scan size" determine the scan rate.
|
|
||||||
When "scan delay" decreases, the scan rate increases. The scan delay and
|
|
||||||
hence the scan rate of every task is adaptive and depends on historical
|
|
||||||
behaviour. If pages are properly placed then the scan delay increases,
|
|
||||||
otherwise the scan delay decreases. The "scan size" is not adaptive but
|
|
||||||
the higher the "scan size", the higher the scan rate.
|
|
||||||
|
|
||||||
Higher scan rates incur higher system overhead as page faults must be
|
|
||||||
trapped and potentially data must be migrated. However, the higher the scan
|
|
||||||
rate, the more quickly a tasks memory is migrated to a local node if the
|
|
||||||
workload pattern changes and minimises performance impact due to remote
|
|
||||||
memory accesses. These sysctls control the thresholds for scan delays and
|
|
||||||
the number of pages scanned.
|
|
||||||
|
|
||||||
``numa_balancing_scan_period_min_ms`` is the minimum time in milliseconds to
|
|
||||||
scan a tasks virtual memory. It effectively controls the maximum scanning
|
|
||||||
rate for each task.
|
|
||||||
|
|
||||||
``numa_balancing_scan_delay_ms`` is the starting "scan delay" used for a task
|
|
||||||
when it initially forks.
|
|
||||||
|
|
||||||
``numa_balancing_scan_period_max_ms`` is the maximum time in milliseconds to
|
|
||||||
scan a tasks virtual memory. It effectively controls the minimum scanning
|
|
||||||
rate for each task.
|
|
||||||
|
|
||||||
``numa_balancing_scan_size_mb`` is how many megabytes worth of pages are
|
|
||||||
scanned for a given scan.
|
|
||||||
|
|
||||||
|
Or NUMA_BALANCING_MEMORY_TIERING to optimize page placement among
|
||||||
|
different types of memory (represented as different NUMA nodes) to
|
||||||
|
place the hot pages in the fast memory. This is implemented based on
|
||||||
|
unmapping and page fault too.
|
||||||
|
|
||||||
oops_all_cpu_backtrace
|
oops_all_cpu_backtrace
|
||||||
======================
|
======================
|
||||||
@@ -795,6 +763,8 @@ bit 1 print system memory info
|
|||||||
bit 2 print timer info
|
bit 2 print timer info
|
||||||
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
|
bit 3 print locks info if ``CONFIG_LOCKDEP`` is on
|
||||||
bit 4 print ftrace buffer
|
bit 4 print ftrace buffer
|
||||||
|
bit 5 print all printk messages in buffer
|
||||||
|
bit 6 print all CPUs backtrace (if available in the arch)
|
||||||
===== ============================================
|
===== ============================================
|
||||||
|
|
||||||
So for example to print tasks and memory info on panic, user can::
|
So for example to print tasks and memory info on panic, user can::
|
||||||
@@ -1029,23 +999,17 @@ This is a directory, with the following entries:
|
|||||||
* ``poolsize``: the entropy pool size, in bits;
|
* ``poolsize``: the entropy pool size, in bits;
|
||||||
|
|
||||||
* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
|
* ``urandom_min_reseed_secs``: obsolete (used to determine the minimum
|
||||||
number of seconds between urandom pool reseeding).
|
number of seconds between urandom pool reseeding). This file is
|
||||||
|
writable for compatibility purposes, but writing to it has no effect
|
||||||
|
on any RNG behavior.
|
||||||
|
|
||||||
* ``uuid``: a UUID generated every time this is retrieved (this can
|
* ``uuid``: a UUID generated every time this is retrieved (this can
|
||||||
thus be used to generate UUIDs at will);
|
thus be used to generate UUIDs at will);
|
||||||
|
|
||||||
* ``write_wakeup_threshold``: when the entropy count drops below this
|
* ``write_wakeup_threshold``: when the entropy count drops below this
|
||||||
(as a number of bits), processes waiting to write to ``/dev/random``
|
(as a number of bits), processes waiting to write to ``/dev/random``
|
||||||
are woken up.
|
are woken up. This file is writable for compatibility purposes, but
|
||||||
|
writing to it has no effect on any RNG behavior.
|
||||||
If ``drivers/char/random.c`` is built with ``ADD_INTERRUPT_BENCH``
|
|
||||||
defined, these additional entries are present:
|
|
||||||
|
|
||||||
* ``add_interrupt_avg_cycles``: the average number of cycles between
|
|
||||||
interrupts used to feed the pool;
|
|
||||||
|
|
||||||
* ``add_interrupt_avg_deviation``: the standard deviation seen on the
|
|
||||||
number of cycles between interrupts used to feed the pool.
|
|
||||||
|
|
||||||
|
|
||||||
randomize_va_space
|
randomize_va_space
|
||||||
|
|||||||
@@ -365,6 +365,15 @@ new netns has been created.
|
|||||||
|
|
||||||
Default : 0 (for compatibility reasons)
|
Default : 0 (for compatibility reasons)
|
||||||
|
|
||||||
|
txrehash
|
||||||
|
--------
|
||||||
|
|
||||||
|
Controls default hash rethink behaviour on listening socket when SO_TXREHASH
|
||||||
|
option is set to SOCK_TXREHASH_DEFAULT (i. e. not overridden by setsockopt).
|
||||||
|
|
||||||
|
If set to 1 (default), hash rethink is performed on listening socket.
|
||||||
|
If set to 0, hash rethink is not performed.
|
||||||
|
|
||||||
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
||||||
----------------------------------------------------------
|
----------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ This document is based on the ARM booting document by Russell King and
|
|||||||
is relevant to all public releases of the AArch64 Linux kernel.
|
is relevant to all public releases of the AArch64 Linux kernel.
|
||||||
|
|
||||||
The AArch64 exception model is made up of a number of exception levels
|
The AArch64 exception model is made up of a number of exception levels
|
||||||
(EL0 - EL3), with EL0 and EL1 having a secure and a non-secure
|
(EL0 - EL3), with EL0, EL1 and EL2 having a secure and a non-secure
|
||||||
counterpart. EL2 is the hypervisor level and exists only in non-secure
|
counterpart. EL2 is the hypervisor level, EL3 is the highest priority
|
||||||
mode. EL3 is the highest priority level and exists only in secure mode.
|
level and exists only in secure mode. Both are architecturally optional.
|
||||||
|
|
||||||
For the purposes of this document, we will use the term `boot loader`
|
For the purposes of this document, we will use the term `boot loader`
|
||||||
simply to define all software that executes on the CPU(s) before control
|
simply to define all software that executes on the CPU(s) before control
|
||||||
@@ -167,8 +167,8 @@ Before jumping into the kernel, the following conditions must be met:
|
|||||||
|
|
||||||
All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
|
All forms of interrupts must be masked in PSTATE.DAIF (Debug, SError,
|
||||||
IRQ and FIQ).
|
IRQ and FIQ).
|
||||||
The CPU must be in either EL2 (RECOMMENDED in order to have access to
|
The CPU must be in non-secure state, either in EL2 (RECOMMENDED in order
|
||||||
the virtualisation extensions) or non-secure EL1.
|
to have access to the virtualisation extensions), or in EL1.
|
||||||
|
|
||||||
- Caches, MMUs
|
- Caches, MMUs
|
||||||
|
|
||||||
|
|||||||
@@ -259,6 +259,11 @@ HWCAP2_RPRES
|
|||||||
|
|
||||||
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
|
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
|
||||||
|
|
||||||
|
HWCAP2_MTE3
|
||||||
|
|
||||||
|
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
|
||||||
|
by Documentation/arm64/memory-tagging-extension.rst.
|
||||||
|
|
||||||
4. Unused AT_HWCAP bits
|
4. Unused AT_HWCAP bits
|
||||||
-----------------------
|
-----------------------
|
||||||
|
|
||||||
|
|||||||
@@ -76,6 +76,9 @@ configurable behaviours:
|
|||||||
with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
|
with ``.si_code = SEGV_MTEAERR`` and ``.si_addr = 0`` (the faulting
|
||||||
address is unknown).
|
address is unknown).
|
||||||
|
|
||||||
|
- *Asymmetric* - Reads are handled as for synchronous mode while writes
|
||||||
|
are handled as for asynchronous mode.
|
||||||
|
|
||||||
The user can select the above modes, per thread, using the
|
The user can select the above modes, per thread, using the
|
||||||
``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
|
``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call where ``flags``
|
||||||
contains any number of the following values in the ``PR_MTE_TCF_MASK``
|
contains any number of the following values in the ``PR_MTE_TCF_MASK``
|
||||||
@@ -91,8 +94,9 @@ mode is specified, the program will run in that mode. If multiple
|
|||||||
modes are specified, the mode is selected as described in the "Per-CPU
|
modes are specified, the mode is selected as described in the "Per-CPU
|
||||||
preferred tag checking modes" section below.
|
preferred tag checking modes" section below.
|
||||||
|
|
||||||
The current tag check fault mode can be read using the
|
The current tag check fault configuration can be read using the
|
||||||
``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call.
|
``prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)`` system call. If
|
||||||
|
multiple modes were requested then all will be reported.
|
||||||
|
|
||||||
Tag checking can also be disabled for a user thread by setting the
|
Tag checking can also be disabled for a user thread by setting the
|
||||||
``PSTATE.TCO`` bit with ``MSR TCO, #1``.
|
``PSTATE.TCO`` bit with ``MSR TCO, #1``.
|
||||||
@@ -139,18 +143,25 @@ tag checking mode as the CPU's preferred tag checking mode.
|
|||||||
|
|
||||||
The preferred tag checking mode for each CPU is controlled by
|
The preferred tag checking mode for each CPU is controlled by
|
||||||
``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
|
``/sys/devices/system/cpu/cpu<N>/mte_tcf_preferred``, to which a
|
||||||
privileged user may write the value ``async`` or ``sync``. The default
|
privileged user may write the value ``async``, ``sync`` or ``asymm``. The
|
||||||
preferred mode for each CPU is ``async``.
|
default preferred mode for each CPU is ``async``.
|
||||||
|
|
||||||
To allow a program to potentially run in the CPU's preferred tag
|
To allow a program to potentially run in the CPU's preferred tag
|
||||||
checking mode, the user program may set multiple tag check fault mode
|
checking mode, the user program may set multiple tag check fault mode
|
||||||
bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
|
bits in the ``flags`` argument to the ``prctl(PR_SET_TAGGED_ADDR_CTRL,
|
||||||
flags, 0, 0, 0)`` system call. If the CPU's preferred tag checking
|
flags, 0, 0, 0)`` system call. If both synchronous and asynchronous
|
||||||
mode is in the task's set of provided tag checking modes (this will
|
modes are requested then asymmetric mode may also be selected by the
|
||||||
always be the case at present because the kernel only supports two
|
kernel. If the CPU's preferred tag checking mode is in the task's set
|
||||||
tag checking modes, but future kernels may support more modes), that
|
of provided tag checking modes, that mode will be selected. Otherwise,
|
||||||
mode will be selected. Otherwise, one of the modes in the task's mode
|
one of the modes in the task's mode will be selected by the kernel
|
||||||
set will be selected in a currently unspecified manner.
|
from the task's mode set using the preference order:
|
||||||
|
|
||||||
|
1. Asynchronous
|
||||||
|
2. Asymmetric
|
||||||
|
3. Synchronous
|
||||||
|
|
||||||
|
Note that there is no way for userspace to request multiple modes and
|
||||||
|
also disable asymmetric mode.
|
||||||
|
|
||||||
Initial process state
|
Initial process state
|
||||||
---------------------
|
---------------------
|
||||||
@@ -213,6 +224,29 @@ address ABI control and MTE configuration of a process as per the
|
|||||||
Documentation/arm64/tagged-address-abi.rst and above. The corresponding
|
Documentation/arm64/tagged-address-abi.rst and above. The corresponding
|
||||||
``regset`` is 1 element of 8 bytes (``sizeof(long))``).
|
``regset`` is 1 element of 8 bytes (``sizeof(long))``).
|
||||||
|
|
||||||
|
Core dump support
|
||||||
|
-----------------
|
||||||
|
|
||||||
|
The allocation tags for user memory mapped with ``PROT_MTE`` are dumped
|
||||||
|
in the core file as additional ``PT_ARM_MEMTAG_MTE`` segments. The
|
||||||
|
program header for such segment is defined as:
|
||||||
|
|
||||||
|
:``p_type``: ``PT_ARM_MEMTAG_MTE``
|
||||||
|
:``p_flags``: 0
|
||||||
|
:``p_offset``: segment file offset
|
||||||
|
:``p_vaddr``: segment virtual address, same as the corresponding
|
||||||
|
``PT_LOAD`` segment
|
||||||
|
:``p_paddr``: 0
|
||||||
|
:``p_filesz``: segment size in file, calculated as ``p_mem_sz / 32``
|
||||||
|
(two 4-bit tags cover 32 bytes of memory)
|
||||||
|
:``p_memsz``: segment size in memory, same as the corresponding
|
||||||
|
``PT_LOAD`` segment
|
||||||
|
:``p_align``: 0
|
||||||
|
|
||||||
|
The tags are stored in the core file at ``p_offset`` as two 4-bit tags
|
||||||
|
in a byte. With the tag granule of 16 bytes, a 4K page requires 128
|
||||||
|
bytes in the core file.
|
||||||
|
|
||||||
Example of correct usage
|
Example of correct usage
|
||||||
========================
|
========================
|
||||||
|
|
||||||
|
|||||||
@@ -100,6 +100,8 @@ stable kernels.
|
|||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
|
| ARM | Cortex-A510 | #2051678 | ARM64_ERRATUM_2051678 |
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
|
| ARM | Cortex-A510 | #2077057 | ARM64_ERRATUM_2077057 |
|
||||||
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
|
| ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 |
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
|
| ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 |
|
||||||
@@ -134,7 +136,7 @@ stable kernels.
|
|||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
|
| Cavium | ThunderX ITS | #23144 | CAVIUM_ERRATUM_23144 |
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 |
|
| Cavium | ThunderX GICv3 | #23154,38545 | CAVIUM_ERRATUM_23154 |
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| Cavium | ThunderX GICv3 | #38539 | N/A |
|
| Cavium | ThunderX GICv3 | #38539 | N/A |
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
|
|||||||
@@ -130,14 +130,13 @@ denoting a range of code via ``SYM_*_START/END`` annotations.
|
|||||||
In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
|
In fact, this kind of annotation corresponds to the now deprecated ``ENTRY``
|
||||||
and ``ENDPROC`` macros.
|
and ``ENDPROC`` macros.
|
||||||
|
|
||||||
* ``SYM_FUNC_START_ALIAS`` and ``SYM_FUNC_START_LOCAL_ALIAS`` serve for those
|
* ``SYM_FUNC_ALIAS``, ``SYM_FUNC_ALIAS_LOCAL``, and ``SYM_FUNC_ALIAS_WEAK`` can
|
||||||
who decided to have two or more names for one function. The typical use is::
|
be used to define multiple names for a function. The typical use is::
|
||||||
|
|
||||||
SYM_FUNC_START_ALIAS(__memset)
|
SYM_FUNC_START(__memset)
|
||||||
SYM_FUNC_START(memset)
|
|
||||||
... asm insns ...
|
... asm insns ...
|
||||||
SYM_FUNC_END(memset)
|
SYN_FUNC_END(__memset)
|
||||||
SYM_FUNC_END_ALIAS(__memset)
|
SYM_FUNC_ALIAS(memset, __memset)
|
||||||
|
|
||||||
In this example, one can call ``__memset`` or ``memset`` with the same
|
In this example, one can call ``__memset`` or ``memset`` with the same
|
||||||
result, except the debug information for the instructions is generated to
|
result, except the debug information for the instructions is generated to
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -7,4 +7,4 @@ This file documents the sysfs file ``block/<disk>/capability``.
|
|||||||
``capability`` is a bitfield, printed in hexadecimal, indicating which
|
``capability`` is a bitfield, printed in hexadecimal, indicating which
|
||||||
capabilities a specific block device supports:
|
capabilities a specific block device supports:
|
||||||
|
|
||||||
.. kernel-doc:: include/linux/genhd.h
|
.. kernel-doc:: include/linux/blkdev.h
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ Block
|
|||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
bfq-iosched
|
bfq-iosched
|
||||||
biodoc
|
|
||||||
biovecs
|
biovecs
|
||||||
blk-mq
|
blk-mq
|
||||||
capability
|
capability
|
||||||
|
|||||||
@@ -658,7 +658,7 @@ when:
|
|||||||
|
|
||||||
.. Links
|
.. Links
|
||||||
.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
|
.. _Documentation/process/: https://www.kernel.org/doc/html/latest/process/
|
||||||
.. _netdev-FAQ: ../networking/netdev-FAQ.rst
|
.. _netdev-FAQ: Documentation/process/maintainer-netdev.rst
|
||||||
.. _selftests:
|
.. _selftests:
|
||||||
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
|
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/testing/selftests/bpf/
|
||||||
.. _Documentation/dev-tools/kselftest.rst:
|
.. _Documentation/dev-tools/kselftest.rst:
|
||||||
|
|||||||
117
Documentation/bpf/bpf_prog_run.rst
Normal file
117
Documentation/bpf/bpf_prog_run.rst
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===================================
|
||||||
|
Running BPF programs from userspace
|
||||||
|
===================================
|
||||||
|
|
||||||
|
This document describes the ``BPF_PROG_RUN`` facility for running BPF programs
|
||||||
|
from userspace.
|
||||||
|
|
||||||
|
.. contents::
|
||||||
|
:local:
|
||||||
|
:depth: 2
|
||||||
|
|
||||||
|
|
||||||
|
Overview
|
||||||
|
--------
|
||||||
|
|
||||||
|
The ``BPF_PROG_RUN`` command can be used through the ``bpf()`` syscall to
|
||||||
|
execute a BPF program in the kernel and return the results to userspace. This
|
||||||
|
can be used to unit test BPF programs against user-supplied context objects, and
|
||||||
|
as way to explicitly execute programs in the kernel for their side effects. The
|
||||||
|
command was previously named ``BPF_PROG_TEST_RUN``, and both constants continue
|
||||||
|
to be defined in the UAPI header, aliased to the same value.
|
||||||
|
|
||||||
|
The ``BPF_PROG_RUN`` command can be used to execute BPF programs of the
|
||||||
|
following types:
|
||||||
|
|
||||||
|
- ``BPF_PROG_TYPE_SOCKET_FILTER``
|
||||||
|
- ``BPF_PROG_TYPE_SCHED_CLS``
|
||||||
|
- ``BPF_PROG_TYPE_SCHED_ACT``
|
||||||
|
- ``BPF_PROG_TYPE_XDP``
|
||||||
|
- ``BPF_PROG_TYPE_SK_LOOKUP``
|
||||||
|
- ``BPF_PROG_TYPE_CGROUP_SKB``
|
||||||
|
- ``BPF_PROG_TYPE_LWT_IN``
|
||||||
|
- ``BPF_PROG_TYPE_LWT_OUT``
|
||||||
|
- ``BPF_PROG_TYPE_LWT_XMIT``
|
||||||
|
- ``BPF_PROG_TYPE_LWT_SEG6LOCAL``
|
||||||
|
- ``BPF_PROG_TYPE_FLOW_DISSECTOR``
|
||||||
|
- ``BPF_PROG_TYPE_STRUCT_OPS``
|
||||||
|
- ``BPF_PROG_TYPE_RAW_TRACEPOINT``
|
||||||
|
- ``BPF_PROG_TYPE_SYSCALL``
|
||||||
|
|
||||||
|
When using the ``BPF_PROG_RUN`` command, userspace supplies an input context
|
||||||
|
object and (for program types operating on network packets) a buffer containing
|
||||||
|
the packet data that the BPF program will operate on. The kernel will then
|
||||||
|
execute the program and return the results to userspace. Note that programs will
|
||||||
|
not have any side effects while being run in this mode; in particular, packets
|
||||||
|
will not actually be redirected or dropped, the program return code will just be
|
||||||
|
returned to userspace. A separate mode for live execution of XDP programs is
|
||||||
|
provided, documented separately below.
|
||||||
|
|
||||||
|
Running XDP programs in "live frame mode"
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
The ``BPF_PROG_RUN`` command has a separate mode for running live XDP programs,
|
||||||
|
which can be used to execute XDP programs in a way where packets will actually
|
||||||
|
be processed by the kernel after the execution of the XDP program as if they
|
||||||
|
arrived on a physical interface. This mode is activated by setting the
|
||||||
|
``BPF_F_TEST_XDP_LIVE_FRAMES`` flag when supplying an XDP program to
|
||||||
|
``BPF_PROG_RUN``.
|
||||||
|
|
||||||
|
The live packet mode is optimised for high performance execution of the supplied
|
||||||
|
XDP program many times (suitable for, e.g., running as a traffic generator),
|
||||||
|
which means the semantics are not quite as straight-forward as the regular test
|
||||||
|
run mode. Specifically:
|
||||||
|
|
||||||
|
- When executing an XDP program in live frame mode, the result of the execution
|
||||||
|
will not be returned to userspace; instead, the kernel will perform the
|
||||||
|
operation indicated by the program's return code (drop the packet, redirect
|
||||||
|
it, etc). For this reason, setting the ``data_out`` or ``ctx_out`` attributes
|
||||||
|
in the syscall parameters when running in this mode will be rejected. In
|
||||||
|
addition, not all failures will be reported back to userspace directly;
|
||||||
|
specifically, only fatal errors in setup or during execution (like memory
|
||||||
|
allocation errors) will halt execution and return an error. If an error occurs
|
||||||
|
in packet processing, like a failure to redirect to a given interface,
|
||||||
|
execution will continue with the next repetition; these errors can be detected
|
||||||
|
via the same trace points as for regular XDP programs.
|
||||||
|
|
||||||
|
- Userspace can supply an ifindex as part of the context object, just like in
|
||||||
|
the regular (non-live) mode. The XDP program will be executed as though the
|
||||||
|
packet arrived on this interface; i.e., the ``ingress_ifindex`` of the context
|
||||||
|
object will point to that interface. Furthermore, if the XDP program returns
|
||||||
|
``XDP_PASS``, the packet will be injected into the kernel networking stack as
|
||||||
|
though it arrived on that ifindex, and if it returns ``XDP_TX``, the packet
|
||||||
|
will be transmitted *out* of that same interface. Do note, though, that
|
||||||
|
because the program execution is not happening in driver context, an
|
||||||
|
``XDP_TX`` is actually turned into the same action as an ``XDP_REDIRECT`` to
|
||||||
|
that same interface (i.e., it will only work if the driver has support for the
|
||||||
|
``ndo_xdp_xmit`` driver op).
|
||||||
|
|
||||||
|
- When running the program with multiple repetitions, the execution will happen
|
||||||
|
in batches. The batch size defaults to 64 packets (which is same as the
|
||||||
|
maximum NAPI receive batch size), but can be specified by userspace through
|
||||||
|
the ``batch_size`` parameter, up to a maximum of 256 packets. For each batch,
|
||||||
|
the kernel executes the XDP program repeatedly, each invocation getting a
|
||||||
|
separate copy of the packet data. For each repetition, if the program drops
|
||||||
|
the packet, the data page is immediately recycled (see below). Otherwise, the
|
||||||
|
packet is buffered until the end of the batch, at which point all packets
|
||||||
|
buffered this way during the batch are transmitted at once.
|
||||||
|
|
||||||
|
- When setting up the test run, the kernel will initialise a pool of memory
|
||||||
|
pages of the same size as the batch size. Each memory page will be initialised
|
||||||
|
with the initial packet data supplied by userspace at ``BPF_PROG_RUN``
|
||||||
|
invocation. When possible, the pages will be recycled on future program
|
||||||
|
invocations, to improve performance. Pages will generally be recycled a full
|
||||||
|
batch at a time, except when a packet is dropped (by return code or because
|
||||||
|
of, say, a redirection error), in which case that page will be recycled
|
||||||
|
immediately. If a packet ends up being passed to the regular networking stack
|
||||||
|
(because the XDP program returns ``XDP_PASS``, or because it ends up being
|
||||||
|
redirected to an interface that injects it into the stack), the page will be
|
||||||
|
released and a new one will be allocated when the pool is empty.
|
||||||
|
|
||||||
|
When recycling, the page content is not rewritten; only the packet boundary
|
||||||
|
pointers (``data``, ``data_end`` and ``data_meta``) in the context object will
|
||||||
|
be reset to the original values. This means that if a program rewrites the
|
||||||
|
packet contents, it has to be prepared to see either the original content or
|
||||||
|
the modified version on subsequent invocations.
|
||||||
@@ -503,6 +503,19 @@ valid index (starting from 0) pointing to a member or an argument.
|
|||||||
* ``info.vlen``: 0
|
* ``info.vlen``: 0
|
||||||
* ``type``: the type with ``btf_type_tag`` attribute
|
* ``type``: the type with ``btf_type_tag`` attribute
|
||||||
|
|
||||||
|
Currently, ``BTF_KIND_TYPE_TAG`` is only emitted for pointer types.
|
||||||
|
It has the following btf type chain:
|
||||||
|
::
|
||||||
|
|
||||||
|
ptr -> [type_tag]*
|
||||||
|
-> [const | volatile | restrict | typedef]*
|
||||||
|
-> base_type
|
||||||
|
|
||||||
|
Basically, a pointer type points to zero or more
|
||||||
|
type_tag, then zero or more const/volatile/restrict/typedef
|
||||||
|
and finally the base type. The base type is one of
|
||||||
|
int, ptr, array, struct, union, enum, func_proto and float types.
|
||||||
|
|
||||||
3. BTF Kernel API
|
3. BTF Kernel API
|
||||||
=================
|
=================
|
||||||
|
|
||||||
@@ -565,18 +578,15 @@ A map can be created with ``btf_fd`` and specified key/value type id.::
|
|||||||
In libbpf, the map can be defined with extra annotation like below:
|
In libbpf, the map can be defined with extra annotation like below:
|
||||||
::
|
::
|
||||||
|
|
||||||
struct bpf_map_def SEC("maps") btf_map = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_ARRAY,
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
.key_size = sizeof(int),
|
__type(key, int);
|
||||||
.value_size = sizeof(struct ipv_counts),
|
__type(value, struct ipv_counts);
|
||||||
.max_entries = 4,
|
__uint(max_entries, 4);
|
||||||
};
|
} btf_map SEC(".maps");
|
||||||
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
|
|
||||||
|
|
||||||
Here, the parameters for macro BPF_ANNOTATE_KV_PAIR are map name, key and
|
During ELF parsing, libbpf is able to extract key/value type_id's and assign
|
||||||
value types for the map. During ELF parsing, libbpf is able to extract
|
them to BPF_MAP_CREATE attributes automatically.
|
||||||
key/value type_id's and assign them to BPF_MAP_CREATE attributes
|
|
||||||
automatically.
|
|
||||||
|
|
||||||
.. _BPF_Prog_Load:
|
.. _BPF_Prog_Load:
|
||||||
|
|
||||||
@@ -824,13 +834,12 @@ structure has bitfields. For example, for the following map,::
|
|||||||
___A b1:4;
|
___A b1:4;
|
||||||
enum A b2:4;
|
enum A b2:4;
|
||||||
};
|
};
|
||||||
struct bpf_map_def SEC("maps") tmpmap = {
|
struct {
|
||||||
.type = BPF_MAP_TYPE_ARRAY,
|
__uint(type, BPF_MAP_TYPE_ARRAY);
|
||||||
.key_size = sizeof(__u32),
|
__type(key, int);
|
||||||
.value_size = sizeof(struct tmp_t),
|
__type(value, struct tmp_t);
|
||||||
.max_entries = 1,
|
__uint(max_entries, 1);
|
||||||
};
|
} tmpmap SEC(".maps");
|
||||||
BPF_ANNOTATE_KV_PAIR(tmpmap, int, struct tmp_t);
|
|
||||||
|
|
||||||
bpftool is able to pretty print like below:
|
bpftool is able to pretty print like below:
|
||||||
::
|
::
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ that goes into great technical depth about the BPF Architecture.
|
|||||||
helpers
|
helpers
|
||||||
programs
|
programs
|
||||||
maps
|
maps
|
||||||
|
bpf_prog_run
|
||||||
classic_vs_extended.rst
|
classic_vs_extended.rst
|
||||||
bpf_licensing
|
bpf_licensing
|
||||||
test_debug
|
test_debug
|
||||||
|
|||||||
@@ -22,7 +22,13 @@ necessary across calls.
|
|||||||
Instruction encoding
|
Instruction encoding
|
||||||
====================
|
====================
|
||||||
|
|
||||||
eBPF uses 64-bit instructions with the following encoding:
|
eBPF has two instruction encodings:
|
||||||
|
|
||||||
|
* the basic instruction encoding, which uses 64 bits to encode an instruction
|
||||||
|
* the wide instruction encoding, which appends a second 64-bit immediate value
|
||||||
|
(imm64) after the basic instruction for a total of 128 bits.
|
||||||
|
|
||||||
|
The basic instruction encoding looks as follows:
|
||||||
|
|
||||||
============= ======= =============== ==================== ============
|
============= ======= =============== ==================== ============
|
||||||
32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
|
32 bits (MSB) 16 bits 4 bits 4 bits 8 bits (LSB)
|
||||||
@@ -82,9 +88,9 @@ BPF_ALU uses 32-bit wide operands while BPF_ALU64 uses 64-bit wide operands for
|
|||||||
otherwise identical operations.
|
otherwise identical operations.
|
||||||
The code field encodes the operation as below:
|
The code field encodes the operation as below:
|
||||||
|
|
||||||
======== ===== ==========================
|
======== ===== =================================================
|
||||||
code value description
|
code value description
|
||||||
======== ===== ==========================
|
======== ===== =================================================
|
||||||
BPF_ADD 0x00 dst += src
|
BPF_ADD 0x00 dst += src
|
||||||
BPF_SUB 0x10 dst -= src
|
BPF_SUB 0x10 dst -= src
|
||||||
BPF_MUL 0x20 dst \*= src
|
BPF_MUL 0x20 dst \*= src
|
||||||
@@ -98,8 +104,8 @@ The code field encodes the operation as below:
|
|||||||
BPF_XOR 0xa0 dst ^= src
|
BPF_XOR 0xa0 dst ^= src
|
||||||
BPF_MOV 0xb0 dst = src
|
BPF_MOV 0xb0 dst = src
|
||||||
BPF_ARSH 0xc0 sign extending shift right
|
BPF_ARSH 0xc0 sign extending shift right
|
||||||
BPF_END 0xd0 endianness conversion
|
BPF_END 0xd0 byte swap operations (see separate section below)
|
||||||
======== ===== ==========================
|
======== ===== =================================================
|
||||||
|
|
||||||
BPF_ADD | BPF_X | BPF_ALU means::
|
BPF_ADD | BPF_X | BPF_ALU means::
|
||||||
|
|
||||||
@@ -118,6 +124,42 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
|
|||||||
src_reg = src_reg ^ imm32
|
src_reg = src_reg ^ imm32
|
||||||
|
|
||||||
|
|
||||||
|
Byte swap instructions
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
|
||||||
|
code field of ``BPF_END``.
|
||||||
|
|
||||||
|
The byte swap instructions instructions operate on the destination register
|
||||||
|
only and do not use a separate source register or immediate value.
|
||||||
|
|
||||||
|
The 1-bit source operand field in the opcode is used to to select what byte
|
||||||
|
order the operation convert from or to:
|
||||||
|
|
||||||
|
========= ===== =================================================
|
||||||
|
source value description
|
||||||
|
========= ===== =================================================
|
||||||
|
BPF_TO_LE 0x00 convert between host byte order and little endian
|
||||||
|
BPF_TO_BE 0x08 convert between host byte order and big endian
|
||||||
|
========= ===== =================================================
|
||||||
|
|
||||||
|
The imm field encodes the width of the swap operations. The following widths
|
||||||
|
are supported: 16, 32 and 64.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
|
||||||
|
``BPF_ALU | BPF_TO_LE | BPF_END`` with imm = 16 means::
|
||||||
|
|
||||||
|
dst_reg = htole16(dst_reg)
|
||||||
|
|
||||||
|
``BPF_ALU | BPF_TO_BE | BPF_END`` with imm = 64 means::
|
||||||
|
|
||||||
|
dst_reg = htobe64(dst_reg)
|
||||||
|
|
||||||
|
``BPF_FROM_LE`` and ``BPF_FROM_BE`` exist as aliases for ``BPF_TO_LE`` and
|
||||||
|
``BPF_TO_LE`` respetively.
|
||||||
|
|
||||||
|
|
||||||
Jump instructions
|
Jump instructions
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
@@ -176,63 +218,96 @@ The mode modifier is one of:
|
|||||||
============= ===== ====================================
|
============= ===== ====================================
|
||||||
mode modifier value description
|
mode modifier value description
|
||||||
============= ===== ====================================
|
============= ===== ====================================
|
||||||
BPF_IMM 0x00 used for 64-bit mov
|
BPF_IMM 0x00 64-bit immediate instructions
|
||||||
BPF_ABS 0x20 legacy BPF packet access
|
BPF_ABS 0x20 legacy BPF packet access (absolute)
|
||||||
BPF_IND 0x40 legacy BPF packet access
|
BPF_IND 0x40 legacy BPF packet access (indirect)
|
||||||
BPF_MEM 0x60 all normal load and store operations
|
BPF_MEM 0x60 regular load and store operations
|
||||||
BPF_ATOMIC 0xc0 atomic operations
|
BPF_ATOMIC 0xc0 atomic operations
|
||||||
============= ===== ====================================
|
============= ===== ====================================
|
||||||
|
|
||||||
BPF_MEM | <size> | BPF_STX means::
|
|
||||||
|
Regular load and store operations
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
The ``BPF_MEM`` mode modifier is used to encode regular load and store
|
||||||
|
instructions that transfer data between a register and memory.
|
||||||
|
|
||||||
|
``BPF_MEM | <size> | BPF_STX`` means::
|
||||||
|
|
||||||
*(size *) (dst_reg + off) = src_reg
|
*(size *) (dst_reg + off) = src_reg
|
||||||
|
|
||||||
BPF_MEM | <size> | BPF_ST means::
|
``BPF_MEM | <size> | BPF_ST`` means::
|
||||||
|
|
||||||
*(size *) (dst_reg + off) = imm32
|
*(size *) (dst_reg + off) = imm32
|
||||||
|
|
||||||
BPF_MEM | <size> | BPF_LDX means::
|
``BPF_MEM | <size> | BPF_LDX`` means::
|
||||||
|
|
||||||
dst_reg = *(size *) (src_reg + off)
|
dst_reg = *(size *) (src_reg + off)
|
||||||
|
|
||||||
Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW.
|
Where size is one of: ``BPF_B``, ``BPF_H``, ``BPF_W``, or ``BPF_DW``.
|
||||||
|
|
||||||
Atomic operations
|
Atomic operations
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
eBPF includes atomic operations, which use the immediate field for extra
|
Atomic operations are operations that operate on memory and can not be
|
||||||
encoding::
|
interrupted or corrupted by other access to the same memory region
|
||||||
|
by other eBPF programs or means outside of this specification.
|
||||||
|
|
||||||
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_W | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
|
All atomic operations supported by eBPF are encoded as store operations
|
||||||
.imm = BPF_ADD, .code = BPF_ATOMIC | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
|
that use the ``BPF_ATOMIC`` mode modifier as follows:
|
||||||
|
|
||||||
The basic atomic operations supported are::
|
* ``BPF_ATOMIC | BPF_W | BPF_STX`` for 32-bit operations
|
||||||
|
* ``BPF_ATOMIC | BPF_DW | BPF_STX`` for 64-bit operations
|
||||||
|
* 8-bit and 16-bit wide atomic operations are not supported.
|
||||||
|
|
||||||
BPF_ADD
|
The imm field is used to encode the actual atomic operation.
|
||||||
BPF_AND
|
Simple atomic operation use a subset of the values defined to encode
|
||||||
BPF_OR
|
arithmetic operations in the imm field to encode the atomic operation:
|
||||||
BPF_XOR
|
|
||||||
|
|
||||||
Each having equivalent semantics with the ``BPF_ADD`` example, that is: the
|
======== ===== ===========
|
||||||
memory location addresed by ``dst_reg + off`` is atomically modified, with
|
imm value description
|
||||||
``src_reg`` as the other operand. If the ``BPF_FETCH`` flag is set in the
|
======== ===== ===========
|
||||||
immediate, then these operations also overwrite ``src_reg`` with the
|
BPF_ADD 0x00 atomic add
|
||||||
value that was in memory before it was modified.
|
BPF_OR 0x40 atomic or
|
||||||
|
BPF_AND 0x50 atomic and
|
||||||
|
BPF_XOR 0xa0 atomic xor
|
||||||
|
======== ===== ===========
|
||||||
|
|
||||||
The more special operations are::
|
|
||||||
|
|
||||||
BPF_XCHG
|
``BPF_ATOMIC | BPF_W | BPF_STX`` with imm = BPF_ADD means::
|
||||||
|
|
||||||
This atomically exchanges ``src_reg`` with the value addressed by ``dst_reg +
|
*(u32 *)(dst_reg + off16) += src_reg
|
||||||
off``. ::
|
|
||||||
|
|
||||||
BPF_CMPXCHG
|
``BPF_ATOMIC | BPF_DW | BPF_STX`` with imm = BPF ADD means::
|
||||||
|
|
||||||
This atomically compares the value addressed by ``dst_reg + off`` with
|
*(u64 *)(dst_reg + off16) += src_reg
|
||||||
``R0``. If they match it is replaced with ``src_reg``. In either case, the
|
|
||||||
value that was there before is zero-extended and loaded back to ``R0``.
|
|
||||||
|
|
||||||
Note that 1 and 2 byte atomic operations are not supported.
|
``BPF_XADD`` is a deprecated name for ``BPF_ATOMIC | BPF_ADD``.
|
||||||
|
|
||||||
|
In addition to the simple atomic operations, there also is a modifier and
|
||||||
|
two complex atomic operations:
|
||||||
|
|
||||||
|
=========== ================ ===========================
|
||||||
|
imm value description
|
||||||
|
=========== ================ ===========================
|
||||||
|
BPF_FETCH 0x01 modifier: return old value
|
||||||
|
BPF_XCHG 0xe0 | BPF_FETCH atomic exchange
|
||||||
|
BPF_CMPXCHG 0xf0 | BPF_FETCH atomic compare and exchange
|
||||||
|
=========== ================ ===========================
|
||||||
|
|
||||||
|
The ``BPF_FETCH`` modifier is optional for simple atomic operations, and
|
||||||
|
always set for the complex atomic operations. If the ``BPF_FETCH`` flag
|
||||||
|
is set, then the operation also overwrites ``src_reg`` with the value that
|
||||||
|
was in memory before it was modified.
|
||||||
|
|
||||||
|
The ``BPF_XCHG`` operation atomically exchanges ``src_reg`` with the value
|
||||||
|
addressed by ``dst_reg + off``.
|
||||||
|
|
||||||
|
The ``BPF_CMPXCHG`` operation atomically compares the value addressed by
|
||||||
|
``dst_reg + off`` with ``R0``. If they match, the value addressed by
|
||||||
|
``dst_reg + off`` is replaced with ``src_reg``. In either case, the
|
||||||
|
value that was at ``dst_reg + off`` before the operation is zero-extended
|
||||||
|
and loaded back to ``R0``.
|
||||||
|
|
||||||
Clang can generate atomic instructions by default when ``-mcpu=v3`` is
|
Clang can generate atomic instructions by default when ``-mcpu=v3`` is
|
||||||
enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
|
enabled. If a lower version for ``-mcpu`` is set, the only atomic instruction
|
||||||
@@ -240,40 +315,52 @@ Clang can generate is ``BPF_ADD`` *without* ``BPF_FETCH``. If you need to enable
|
|||||||
the atomics features, while keeping a lower ``-mcpu`` version, you can use
|
the atomics features, while keeping a lower ``-mcpu`` version, you can use
|
||||||
``-Xclang -target-feature -Xclang +alu32``.
|
``-Xclang -target-feature -Xclang +alu32``.
|
||||||
|
|
||||||
You may encounter ``BPF_XADD`` - this is a legacy name for ``BPF_ATOMIC``,
|
64-bit immediate instructions
|
||||||
referring to the exclusive-add operation encoded when the immediate field is
|
-----------------------------
|
||||||
zero.
|
|
||||||
|
|
||||||
16-byte instructions
|
Instructions with the ``BPF_IMM`` mode modifier use the wide instruction
|
||||||
--------------------
|
encoding for an extra imm64 value.
|
||||||
|
|
||||||
eBPF has one 16-byte instruction: ``BPF_LD | BPF_DW | BPF_IMM`` which consists
|
There is currently only one such instruction.
|
||||||
of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
|
|
||||||
instruction that loads 64-bit immediate value into a dst_reg.
|
|
||||||
|
|
||||||
Packet access instructions
|
``BPF_LD | BPF_DW | BPF_IMM`` means::
|
||||||
--------------------------
|
|
||||||
|
|
||||||
eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
|
dst_reg = imm64
|
||||||
(BPF_IND | <size> | BPF_LD) which are used to access packet data.
|
|
||||||
|
|
||||||
They had to be carried over from classic BPF to have strong performance of
|
|
||||||
socket filters running in eBPF interpreter. These instructions can only
|
|
||||||
be used when interpreter context is a pointer to ``struct sk_buff`` and
|
|
||||||
have seven implicit operands. Register R6 is an implicit input that must
|
|
||||||
contain pointer to sk_buff. Register R0 is an implicit output which contains
|
|
||||||
the data fetched from the packet. Registers R1-R5 are scratch registers
|
|
||||||
and must not be used to store the data across BPF_ABS | BPF_LD or
|
|
||||||
BPF_IND | BPF_LD instructions.
|
|
||||||
|
|
||||||
These instructions have implicit program exit condition as well. When
|
Legacy BPF Packet access instructions
|
||||||
eBPF program is trying to access the data beyond the packet boundary,
|
-------------------------------------
|
||||||
the interpreter will abort the execution of the program. JIT compilers
|
|
||||||
therefore must preserve this property. src_reg and imm32 fields are
|
|
||||||
explicit inputs to these instructions.
|
|
||||||
|
|
||||||
For example, BPF_IND | BPF_W | BPF_LD means::
|
eBPF has special instructions for access to packet data that have been
|
||||||
|
carried over from classic BPF to retain the performance of legacy socket
|
||||||
|
filters running in the eBPF interpreter.
|
||||||
|
|
||||||
|
The instructions come in two forms: ``BPF_ABS | <size> | BPF_LD`` and
|
||||||
|
``BPF_IND | <size> | BPF_LD``.
|
||||||
|
|
||||||
|
These instructions are used to access packet data and can only be used when
|
||||||
|
the program context is a pointer to networking packet. ``BPF_ABS``
|
||||||
|
accesses packet data at an absolute offset specified by the immediate data
|
||||||
|
and ``BPF_IND`` access packet data at an offset that includes the value of
|
||||||
|
a register in addition to the immediate data.
|
||||||
|
|
||||||
|
These instructions have seven implicit operands:
|
||||||
|
|
||||||
|
* Register R6 is an implicit input that must contain pointer to a
|
||||||
|
struct sk_buff.
|
||||||
|
* Register R0 is an implicit output which contains the data fetched from
|
||||||
|
the packet.
|
||||||
|
* Registers R1-R5 are scratch registers that are clobbered after a call to
|
||||||
|
``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
|
||||||
|
|
||||||
|
These instructions have an implicit program exit condition as well. When an
|
||||||
|
eBPF program is trying to access the data beyond the packet boundary, the
|
||||||
|
program execution will be aborted.
|
||||||
|
|
||||||
|
``BPF_ABS | BPF_W | BPF_LD`` means::
|
||||||
|
|
||||||
|
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + imm32))
|
||||||
|
|
||||||
|
``BPF_IND | BPF_W | BPF_LD`` means::
|
||||||
|
|
||||||
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
|
R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
|
||||||
|
|
||||||
and R1 - R5 are clobbered.
|
|
||||||
|
|||||||
@@ -329,7 +329,7 @@ Program with unreachable instructions::
|
|||||||
BPF_EXIT_INSN(),
|
BPF_EXIT_INSN(),
|
||||||
};
|
};
|
||||||
|
|
||||||
Error:
|
Error::
|
||||||
|
|
||||||
unreachable insn 1
|
unreachable insn 1
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ Getting started quick
|
|||||||
- Compile and install kernel and modules, reboot.
|
- Compile and install kernel and modules, reboot.
|
||||||
|
|
||||||
- You need the udftools package (pktsetup, mkudffs, cdrwtool).
|
- You need the udftools package (pktsetup, mkudffs, cdrwtool).
|
||||||
Download from http://sourceforge.net/projects/linux-udf/
|
Download from https://github.com/pali/udftools
|
||||||
|
|
||||||
- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
|
- Grab a new CD-RW disc and format it (assuming CD-RW is hdc, substitute
|
||||||
as appropriate)::
|
as appropriate)::
|
||||||
@@ -102,7 +102,7 @@ Using the pktcdvd sysfs interface
|
|||||||
|
|
||||||
Since Linux 2.6.20, the pktcdvd module has a sysfs interface
|
Since Linux 2.6.20, the pktcdvd module has a sysfs interface
|
||||||
and can be controlled by it. For example the "pktcdvd" tool uses
|
and can be controlled by it. For example the "pktcdvd" tool uses
|
||||||
this interface. (see http://tom.ist-im-web.de/download/pktcdvd )
|
this interface. (see http://tom.ist-im-web.de/linux/software/pktcdvd )
|
||||||
|
|
||||||
"pktcdvd" works similar to "pktsetup", e.g.::
|
"pktcdvd" works similar to "pktsetup", e.g.::
|
||||||
|
|
||||||
|
|||||||
@@ -409,135 +409,25 @@ latex_elements = {
|
|||||||
|
|
||||||
# Additional stuff for the LaTeX preamble.
|
# Additional stuff for the LaTeX preamble.
|
||||||
'preamble': '''
|
'preamble': '''
|
||||||
% Prevent column squeezing of tabulary.
|
|
||||||
\\setlength{\\tymin}{20em}
|
|
||||||
% Use some font with UTF-8 support with XeLaTeX
|
% Use some font with UTF-8 support with XeLaTeX
|
||||||
\\usepackage{fontspec}
|
\\usepackage{fontspec}
|
||||||
\\setsansfont{DejaVu Sans}
|
\\setsansfont{DejaVu Sans}
|
||||||
\\setromanfont{DejaVu Serif}
|
\\setromanfont{DejaVu Serif}
|
||||||
\\setmonofont{DejaVu Sans Mono}
|
\\setmonofont{DejaVu Sans Mono}
|
||||||
% Adjust \\headheight for fancyhdr
|
|
||||||
\\addtolength{\\headheight}{1.6pt}
|
|
||||||
\\addtolength{\\topmargin}{-1.6pt}
|
|
||||||
''',
|
''',
|
||||||
}
|
}
|
||||||
|
|
||||||
# Translations have Asian (CJK) characters which are only displayed if
|
|
||||||
# xeCJK is used
|
|
||||||
|
|
||||||
latex_elements['preamble'] += '''
|
|
||||||
\\IfFontExistsTF{Noto Sans CJK SC}{
|
|
||||||
% This is needed for translations
|
|
||||||
\\usepackage{xeCJK}
|
|
||||||
\\IfFontExistsTF{Noto Serif CJK SC}{
|
|
||||||
\\setCJKmainfont{Noto Serif CJK SC}[AutoFakeSlant]
|
|
||||||
}{
|
|
||||||
\\setCJKmainfont{Noto Sans CJK SC}[AutoFakeSlant]
|
|
||||||
}
|
|
||||||
\\setCJKsansfont{Noto Sans CJK SC}[AutoFakeSlant]
|
|
||||||
\\setCJKmonofont{Noto Sans Mono CJK SC}[AutoFakeSlant]
|
|
||||||
% CJK Language-specific font choices
|
|
||||||
\\IfFontExistsTF{Noto Serif CJK SC}{
|
|
||||||
\\newCJKfontfamily[SCmain]\\scmain{Noto Serif CJK SC}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[SCserif]\\scserif{Noto Serif CJK SC}[AutoFakeSlant]
|
|
||||||
}{
|
|
||||||
\\newCJKfontfamily[SCmain]\\scmain{Noto Sans CJK SC}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[SCserif]\\scserif{Noto Sans CJK SC}[AutoFakeSlant]
|
|
||||||
}
|
|
||||||
\\newCJKfontfamily[SCsans]\\scsans{Noto Sans CJK SC}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[SCmono]\\scmono{Noto Sans Mono CJK SC}[AutoFakeSlant]
|
|
||||||
\\IfFontExistsTF{Noto Serif CJK TC}{
|
|
||||||
\\newCJKfontfamily[TCmain]\\tcmain{Noto Serif CJK TC}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[TCserif]\\tcserif{Noto Serif CJK TC}[AutoFakeSlant]
|
|
||||||
}{
|
|
||||||
\\newCJKfontfamily[TCmain]\\tcmain{Noto Sans CJK TC}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[TCserif]\\tcserif{Noto Sans CJK TC}[AutoFakeSlant]
|
|
||||||
}
|
|
||||||
\\newCJKfontfamily[TCsans]\\tcsans{Noto Sans CJK TC}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[TCmono]\\tcmono{Noto Sans Mono CJK TC}[AutoFakeSlant]
|
|
||||||
\\IfFontExistsTF{Noto Serif CJK KR}{
|
|
||||||
\\newCJKfontfamily[KRmain]\\krmain{Noto Serif CJK KR}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[KRserif]\\krserif{Noto Serif CJK KR}[AutoFakeSlant]
|
|
||||||
}{
|
|
||||||
\\newCJKfontfamily[KRmain]\\krmain{Noto Sans CJK KR}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[KRserif]\\krserif{Noto Sans CJK KR}[AutoFakeSlant]
|
|
||||||
}
|
|
||||||
\\newCJKfontfamily[KRsans]\\krsans{Noto Sans CJK KR}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[KRmono]\\krmono{Noto Sans Mono CJK KR}[AutoFakeSlant]
|
|
||||||
\\IfFontExistsTF{Noto Serif CJK JP}{
|
|
||||||
\\newCJKfontfamily[JPmain]\\jpmain{Noto Serif CJK JP}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[JPserif]\\jpserif{Noto Serif CJK JP}[AutoFakeSlant]
|
|
||||||
}{
|
|
||||||
\\newCJKfontfamily[JPmain]\\jpmain{Noto Sans CJK JP}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[JPserif]\\jpserif{Noto Sans CJK JP}[AutoFakeSlant]
|
|
||||||
}
|
|
||||||
\\newCJKfontfamily[JPsans]\\jpsans{Noto Sans CJK JP}[AutoFakeSlant]
|
|
||||||
\\newCJKfontfamily[JPmono]\\jpmono{Noto Sans Mono CJK JP}[AutoFakeSlant]
|
|
||||||
% Dummy commands for Sphinx < 2.3 (no 'extrapackages' support)
|
|
||||||
\\providecommand{\\onehalfspacing}{}
|
|
||||||
\\providecommand{\\singlespacing}{}
|
|
||||||
% Define custom macros to on/off CJK
|
|
||||||
\\newcommand{\\kerneldocCJKon}{\\makexeCJKactive\\onehalfspacing}
|
|
||||||
\\newcommand{\\kerneldocCJKoff}{\\makexeCJKinactive\\singlespacing}
|
|
||||||
\\newcommand{\\kerneldocBeginSC}{%
|
|
||||||
\\begingroup%
|
|
||||||
\\scmain%
|
|
||||||
}
|
|
||||||
\\newcommand{\\kerneldocEndSC}{\\endgroup}
|
|
||||||
\\newcommand{\\kerneldocBeginTC}{%
|
|
||||||
\\begingroup%
|
|
||||||
\\tcmain%
|
|
||||||
\\renewcommand{\\CJKrmdefault}{TCserif}%
|
|
||||||
\\renewcommand{\\CJKsfdefault}{TCsans}%
|
|
||||||
\\renewcommand{\\CJKttdefault}{TCmono}%
|
|
||||||
}
|
|
||||||
\\newcommand{\\kerneldocEndTC}{\\endgroup}
|
|
||||||
\\newcommand{\\kerneldocBeginKR}{%
|
|
||||||
\\begingroup%
|
|
||||||
\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
|
|
||||||
\\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
|
|
||||||
\\krmain%
|
|
||||||
\\renewcommand{\\CJKrmdefault}{KRserif}%
|
|
||||||
\\renewcommand{\\CJKsfdefault}{KRsans}%
|
|
||||||
\\renewcommand{\\CJKttdefault}{KRmono}%
|
|
||||||
\\xeCJKsetup{CJKspace = true} % For inter-phrase space
|
|
||||||
}
|
|
||||||
\\newcommand{\\kerneldocEndKR}{\\endgroup}
|
|
||||||
\\newcommand{\\kerneldocBeginJP}{%
|
|
||||||
\\begingroup%
|
|
||||||
\\xeCJKDeclareCharClass{HalfLeft}{`“,`‘}%
|
|
||||||
\\xeCJKDeclareCharClass{HalfRight}{`”,`’}%
|
|
||||||
\\jpmain%
|
|
||||||
\\renewcommand{\\CJKrmdefault}{JPserif}%
|
|
||||||
\\renewcommand{\\CJKsfdefault}{JPsans}%
|
|
||||||
\\renewcommand{\\CJKttdefault}{JPmono}%
|
|
||||||
}
|
|
||||||
\\newcommand{\\kerneldocEndJP}{\\endgroup}
|
|
||||||
% Single spacing in literal blocks
|
|
||||||
\\fvset{baselinestretch=1}
|
|
||||||
% To customize \\sphinxtableofcontents
|
|
||||||
\\usepackage{etoolbox}
|
|
||||||
% Inactivate CJK after tableofcontents
|
|
||||||
\\apptocmd{\\sphinxtableofcontents}{\\kerneldocCJKoff}{}{}
|
|
||||||
}{ % No CJK font found
|
|
||||||
% Custom macros to on/off CJK (Dummy)
|
|
||||||
\\newcommand{\\kerneldocCJKon}{}
|
|
||||||
\\newcommand{\\kerneldocCJKoff}{}
|
|
||||||
\\newcommand{\\kerneldocBeginSC}{}
|
|
||||||
\\newcommand{\\kerneldocEndSC}{}
|
|
||||||
\\newcommand{\\kerneldocBeginTC}{}
|
|
||||||
\\newcommand{\\kerneldocEndTC}{}
|
|
||||||
\\newcommand{\\kerneldocBeginKR}{}
|
|
||||||
\\newcommand{\\kerneldocEndKR}{}
|
|
||||||
\\newcommand{\\kerneldocBeginJP}{}
|
|
||||||
\\newcommand{\\kerneldocEndJP}{}
|
|
||||||
}
|
|
||||||
'''
|
|
||||||
|
|
||||||
# Fix reference escape troubles with Sphinx 1.4.x
|
# Fix reference escape troubles with Sphinx 1.4.x
|
||||||
if major == 1:
|
if major == 1:
|
||||||
latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
|
latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
|
||||||
|
|
||||||
|
|
||||||
|
# Load kerneldoc specific LaTeX settings
|
||||||
|
latex_elements['preamble'] += '''
|
||||||
|
% Load kerneldoc specific LaTeX settings
|
||||||
|
\\input{kerneldoc-preamble.sty}
|
||||||
|
'''
|
||||||
|
|
||||||
# With Sphinx 1.6, it is possible to change the Bg color directly
|
# With Sphinx 1.6, it is possible to change the Bg color directly
|
||||||
# by using:
|
# by using:
|
||||||
# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
|
# \definecolor{sphinxnoteBgColor}{RGB}{204,255,255}
|
||||||
@@ -599,6 +489,11 @@ for fn in os.listdir('.'):
|
|||||||
# If false, no module index is generated.
|
# If false, no module index is generated.
|
||||||
#latex_domain_indices = True
|
#latex_domain_indices = True
|
||||||
|
|
||||||
|
# Additional LaTeX stuff to be copied to build directory
|
||||||
|
latex_additional_files = [
|
||||||
|
'sphinx/kerneldoc-preamble.sty',
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# -- Options for manual page output ---------------------------------------
|
# -- Options for manual page output ---------------------------------------
|
||||||
|
|
||||||
|
|||||||
279
Documentation/core-api/entry.rst
Normal file
279
Documentation/core-api/entry.rst
Normal file
@@ -0,0 +1,279 @@
|
|||||||
|
Entry/exit handling for exceptions, interrupts, syscalls and KVM
|
||||||
|
================================================================
|
||||||
|
|
||||||
|
All transitions between execution domains require state updates which are
|
||||||
|
subject to strict ordering constraints. State updates are required for the
|
||||||
|
following:
|
||||||
|
|
||||||
|
* Lockdep
|
||||||
|
* RCU / Context tracking
|
||||||
|
* Preemption counter
|
||||||
|
* Tracing
|
||||||
|
* Time accounting
|
||||||
|
|
||||||
|
The update order depends on the transition type and is explained below in
|
||||||
|
the transition type sections: `Syscalls`_, `KVM`_, `Interrupts and regular
|
||||||
|
exceptions`_, `NMI and NMI-like exceptions`_.
|
||||||
|
|
||||||
|
Non-instrumentable code - noinstr
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
Most instrumentation facilities depend on RCU, so intrumentation is prohibited
|
||||||
|
for entry code before RCU starts watching and exit code after RCU stops
|
||||||
|
watching. In addition, many architectures must save and restore register state,
|
||||||
|
which means that (for example) a breakpoint in the breakpoint entry code would
|
||||||
|
overwrite the debug registers of the initial breakpoint.
|
||||||
|
|
||||||
|
Such code must be marked with the 'noinstr' attribute, placing that code into a
|
||||||
|
special section inaccessible to instrumentation and debug facilities. Some
|
||||||
|
functions are partially instrumentable, which is handled by marking them
|
||||||
|
noinstr and using instrumentation_begin() and instrumentation_end() to flag the
|
||||||
|
instrumentable ranges of code:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
noinstr void entry(void)
|
||||||
|
{
|
||||||
|
handle_entry(); // <-- must be 'noinstr' or '__always_inline'
|
||||||
|
...
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
handle_context(); // <-- instrumentable code
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
...
|
||||||
|
handle_exit(); // <-- must be 'noinstr' or '__always_inline'
|
||||||
|
}
|
||||||
|
|
||||||
|
This allows verification of the 'noinstr' restrictions via objtool on
|
||||||
|
supported architectures.
|
||||||
|
|
||||||
|
Invoking non-instrumentable functions from instrumentable context has no
|
||||||
|
restrictions and is useful to protect e.g. state switching which would
|
||||||
|
cause malfunction if instrumented.
|
||||||
|
|
||||||
|
All non-instrumentable entry/exit code sections before and after the RCU
|
||||||
|
state transitions must run with interrupts disabled.
|
||||||
|
|
||||||
|
Syscalls
|
||||||
|
--------
|
||||||
|
|
||||||
|
Syscall-entry code starts in assembly code and calls out into low-level C code
|
||||||
|
after establishing low-level architecture-specific state and stack frames. This
|
||||||
|
low-level C code must not be instrumented. A typical syscall handling function
|
||||||
|
invoked from low-level assembly code looks like this:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
noinstr void syscall(struct pt_regs *regs, int nr)
|
||||||
|
{
|
||||||
|
arch_syscall_enter(regs);
|
||||||
|
nr = syscall_enter_from_user_mode(regs, nr);
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
if (!invoke_syscall(regs, nr) && nr != -1)
|
||||||
|
result_reg(regs) = __sys_ni_syscall(regs);
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
syscall_exit_to_user_mode(regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
syscall_enter_from_user_mode() first invokes enter_from_user_mode() which
|
||||||
|
establishes state in the following order:
|
||||||
|
|
||||||
|
* Lockdep
|
||||||
|
* RCU / Context tracking
|
||||||
|
* Tracing
|
||||||
|
|
||||||
|
and then invokes the various entry work functions like ptrace, seccomp, audit,
|
||||||
|
syscall tracing, etc. After all that is done, the instrumentable invoke_syscall
|
||||||
|
function can be invoked. The instrumentable code section then ends, after which
|
||||||
|
syscall_exit_to_user_mode() is invoked.
|
||||||
|
|
||||||
|
syscall_exit_to_user_mode() handles all work which needs to be done before
|
||||||
|
returning to user space like tracing, audit, signals, task work etc. After
|
||||||
|
that it invokes exit_to_user_mode() which again handles the state
|
||||||
|
transition in the reverse order:
|
||||||
|
|
||||||
|
* Tracing
|
||||||
|
* RCU / Context tracking
|
||||||
|
* Lockdep
|
||||||
|
|
||||||
|
syscall_enter_from_user_mode() and syscall_exit_to_user_mode() are also
|
||||||
|
available as fine grained subfunctions in cases where the architecture code
|
||||||
|
has to do extra work between the various steps. In such cases it has to
|
||||||
|
ensure that enter_from_user_mode() is called first on entry and
|
||||||
|
exit_to_user_mode() is called last on exit.
|
||||||
|
|
||||||
|
Do not nest syscalls. Nested systcalls will cause RCU and/or context tracking
|
||||||
|
to print a warning.
|
||||||
|
|
||||||
|
KVM
|
||||||
|
---
|
||||||
|
|
||||||
|
Entering or exiting guest mode is very similar to syscalls. From the host
|
||||||
|
kernel point of view the CPU goes off into user space when entering the
|
||||||
|
guest and returns to the kernel on exit.
|
||||||
|
|
||||||
|
kvm_guest_enter_irqoff() is a KVM-specific variant of exit_to_user_mode()
|
||||||
|
and kvm_guest_exit_irqoff() is the KVM variant of enter_from_user_mode().
|
||||||
|
The state operations have the same ordering.
|
||||||
|
|
||||||
|
Task work handling is done separately for guest at the boundary of the
|
||||||
|
vcpu_run() loop via xfer_to_guest_mode_handle_work() which is a subset of
|
||||||
|
the work handled on return to user space.
|
||||||
|
|
||||||
|
Do not nest KVM entry/exit transitions because doing so is nonsensical.
|
||||||
|
|
||||||
|
Interrupts and regular exceptions
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
Interrupts entry and exit handling is slightly more complex than syscalls
|
||||||
|
and KVM transitions.
|
||||||
|
|
||||||
|
If an interrupt is raised while the CPU executes in user space, the entry
|
||||||
|
and exit handling is exactly the same as for syscalls.
|
||||||
|
|
||||||
|
If the interrupt is raised while the CPU executes in kernel space the entry and
|
||||||
|
exit handling is slightly different. RCU state is only updated when the
|
||||||
|
interrupt is raised in the context of the CPU's idle task. Otherwise, RCU will
|
||||||
|
already be watching. Lockdep and tracing have to be updated unconditionally.
|
||||||
|
|
||||||
|
irqentry_enter() and irqentry_exit() provide the implementation for this.
|
||||||
|
|
||||||
|
The architecture-specific part looks similar to syscall handling:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
noinstr void interrupt(struct pt_regs *regs, int nr)
|
||||||
|
{
|
||||||
|
arch_interrupt_enter(regs);
|
||||||
|
state = irqentry_enter(regs);
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
|
||||||
|
irq_enter_rcu();
|
||||||
|
invoke_irq_handler(regs, nr);
|
||||||
|
irq_exit_rcu();
|
||||||
|
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
irqentry_exit(regs, state);
|
||||||
|
}
|
||||||
|
|
||||||
|
Note that the invocation of the actual interrupt handler is within a
|
||||||
|
irq_enter_rcu() and irq_exit_rcu() pair.
|
||||||
|
|
||||||
|
irq_enter_rcu() updates the preemption count which makes in_hardirq()
|
||||||
|
return true, handles NOHZ tick state and interrupt time accounting. This
|
||||||
|
means that up to the point where irq_enter_rcu() is invoked in_hardirq()
|
||||||
|
returns false.
|
||||||
|
|
||||||
|
irq_exit_rcu() handles interrupt time accounting, undoes the preemption
|
||||||
|
count update and eventually handles soft interrupts and NOHZ tick state.
|
||||||
|
|
||||||
|
In theory, the preemption count could be updated in irqentry_enter(). In
|
||||||
|
practice, deferring this update to irq_enter_rcu() allows the preemption-count
|
||||||
|
code to be traced, while also maintaining symmetry with irq_exit_rcu() and
|
||||||
|
irqentry_exit(), which are described in the next paragraph. The only downside
|
||||||
|
is that the early entry code up to irq_enter_rcu() must be aware that the
|
||||||
|
preemption count has not yet been updated with the HARDIRQ_OFFSET state.
|
||||||
|
|
||||||
|
Note that irq_exit_rcu() must remove HARDIRQ_OFFSET from the preemption count
|
||||||
|
before it handles soft interrupts, whose handlers must run in BH context rather
|
||||||
|
than irq-disabled context. In addition, irqentry_exit() might schedule, which
|
||||||
|
also requires that HARDIRQ_OFFSET has been removed from the preemption count.
|
||||||
|
|
||||||
|
Even though interrupt handlers are expected to run with local interrupts
|
||||||
|
disabled, interrupt nesting is common from an entry/exit perspective. For
|
||||||
|
example, softirq handling happens within an irqentry_{enter,exit}() block with
|
||||||
|
local interrupts enabled. Also, although uncommon, nothing prevents an
|
||||||
|
interrupt handler from re-enabling interrupts.
|
||||||
|
|
||||||
|
Interrupt entry/exit code doesn't strictly need to handle reentrancy, since it
|
||||||
|
runs with local interrupts disabled. But NMIs can happen anytime, and a lot of
|
||||||
|
the entry code is shared between the two.
|
||||||
|
|
||||||
|
NMI and NMI-like exceptions
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
NMIs and NMI-like exceptions (machine checks, double faults, debug
|
||||||
|
interrupts, etc.) can hit any context and must be extra careful with
|
||||||
|
the state.
|
||||||
|
|
||||||
|
State changes for debug exceptions and machine-check exceptions depend on
|
||||||
|
whether these exceptions happened in user-space (breakpoints or watchpoints) or
|
||||||
|
in kernel mode (code patching). From user-space, they are treated like
|
||||||
|
interrupts, while from kernel mode they are treated like NMIs.
|
||||||
|
|
||||||
|
NMIs and other NMI-like exceptions handle state transitions without
|
||||||
|
distinguishing between user-mode and kernel-mode origin.
|
||||||
|
|
||||||
|
The state update on entry is handled in irqentry_nmi_enter() which updates
|
||||||
|
state in the following order:
|
||||||
|
|
||||||
|
* Preemption counter
|
||||||
|
* Lockdep
|
||||||
|
* RCU / Context tracking
|
||||||
|
* Tracing
|
||||||
|
|
||||||
|
The exit counterpart irqentry_nmi_exit() does the reverse operation in the
|
||||||
|
reverse order.
|
||||||
|
|
||||||
|
Note that the update of the preemption counter has to be the first
|
||||||
|
operation on enter and the last operation on exit. The reason is that both
|
||||||
|
lockdep and RCU rely on in_nmi() returning true in this case. The
|
||||||
|
preemption count modification in the NMI entry/exit case must not be
|
||||||
|
traced.
|
||||||
|
|
||||||
|
Architecture-specific code looks like this:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
noinstr void nmi(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
arch_nmi_enter(regs);
|
||||||
|
state = irqentry_nmi_enter(regs);
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
nmi_handler(regs);
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
irqentry_nmi_exit(regs);
|
||||||
|
}
|
||||||
|
|
||||||
|
and for e.g. a debug exception it can look like this:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
noinstr void debug(struct pt_regs *regs)
|
||||||
|
{
|
||||||
|
arch_nmi_enter(regs);
|
||||||
|
|
||||||
|
debug_regs = save_debug_regs();
|
||||||
|
|
||||||
|
if (user_mode(regs)) {
|
||||||
|
state = irqentry_enter(regs);
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
user_mode_debug_handler(regs, debug_regs);
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
irqentry_exit(regs, state);
|
||||||
|
} else {
|
||||||
|
state = irqentry_nmi_enter(regs);
|
||||||
|
|
||||||
|
instrumentation_begin();
|
||||||
|
kernel_mode_debug_handler(regs, debug_regs);
|
||||||
|
instrumentation_end();
|
||||||
|
|
||||||
|
irqentry_nmi_exit(regs, state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
There is no combined irqentry_nmi_if_kernel() function available as the
|
||||||
|
above cannot be handled in an exception-agnostic way.
|
||||||
|
|
||||||
|
NMIs can happen in any context. For example, an NMI-like exception triggered
|
||||||
|
while handling an NMI. So NMI entry code has to be reentrant and state updates
|
||||||
|
need to handle nesting.
|
||||||
@@ -44,6 +44,14 @@ Library functionality that is used throughout the kernel.
|
|||||||
timekeeping
|
timekeeping
|
||||||
errseq
|
errseq
|
||||||
|
|
||||||
|
Low level entry and exit
|
||||||
|
========================
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
entry
|
||||||
|
|
||||||
Concurrency primitives
|
Concurrency primitives
|
||||||
======================
|
======================
|
||||||
|
|
||||||
|
|||||||
@@ -58,15 +58,30 @@ Virtually Contiguous Mappings
|
|||||||
File Mapping and Page Cache
|
File Mapping and Page Cache
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
.. kernel-doc:: mm/readahead.c
|
Filemap
|
||||||
:export:
|
-------
|
||||||
|
|
||||||
.. kernel-doc:: mm/filemap.c
|
.. kernel-doc:: mm/filemap.c
|
||||||
:export:
|
:export:
|
||||||
|
|
||||||
|
Readahead
|
||||||
|
---------
|
||||||
|
|
||||||
|
.. kernel-doc:: mm/readahead.c
|
||||||
|
:doc: Readahead Overview
|
||||||
|
|
||||||
|
.. kernel-doc:: mm/readahead.c
|
||||||
|
:export:
|
||||||
|
|
||||||
|
Writeback
|
||||||
|
---------
|
||||||
|
|
||||||
.. kernel-doc:: mm/page-writeback.c
|
.. kernel-doc:: mm/page-writeback.c
|
||||||
:export:
|
:export:
|
||||||
|
|
||||||
|
Truncate
|
||||||
|
--------
|
||||||
|
|
||||||
.. kernel-doc:: mm/truncate.c
|
.. kernel-doc:: mm/truncate.c
|
||||||
:export:
|
:export:
|
||||||
|
|
||||||
|
|||||||
@@ -55,18 +55,18 @@ flags the caller provides. The caller is required to pass in a non-null struct
|
|||||||
pages* array, and the function then pins pages by incrementing each by a special
|
pages* array, and the function then pins pages by incrementing each by a special
|
||||||
value: GUP_PIN_COUNTING_BIAS.
|
value: GUP_PIN_COUNTING_BIAS.
|
||||||
|
|
||||||
For huge pages (and in fact, any compound page of more than 2 pages), the
|
For compound pages, the GUP_PIN_COUNTING_BIAS scheme is not used. Instead,
|
||||||
GUP_PIN_COUNTING_BIAS scheme is not used. Instead, an exact form of pin counting
|
an exact form of pin counting is achieved, by using the 2nd struct page
|
||||||
is achieved, by using the 3rd struct page in the compound page. A new struct
|
in the compound page. A new struct page field, compound_pincount, has
|
||||||
page field, hpage_pinned_refcount, has been added in order to support this.
|
been added in order to support this.
|
||||||
|
|
||||||
This approach for compound pages avoids the counting upper limit problems that
|
This approach for compound pages avoids the counting upper limit problems that
|
||||||
are discussed below. Those limitations would have been aggravated severely by
|
are discussed below. Those limitations would have been aggravated severely by
|
||||||
huge pages, because each tail page adds a refcount to the head page. And in
|
huge pages, because each tail page adds a refcount to the head page. And in
|
||||||
fact, testing revealed that, without a separate hpage_pinned_refcount field,
|
fact, testing revealed that, without a separate compound_pincount field,
|
||||||
page overflows were seen in some huge page stress tests.
|
page overflows were seen in some huge page stress tests.
|
||||||
|
|
||||||
This also means that huge pages and compound pages (of order > 1) do not suffer
|
This also means that huge pages and compound pages do not suffer
|
||||||
from the false positives problem that is mentioned below.::
|
from the false positives problem that is mentioned below.::
|
||||||
|
|
||||||
Function
|
Function
|
||||||
@@ -264,9 +264,9 @@ place.)
|
|||||||
Other diagnostics
|
Other diagnostics
|
||||||
=================
|
=================
|
||||||
|
|
||||||
dump_page() has been enhanced slightly, to handle these new counting fields, and
|
dump_page() has been enhanced slightly, to handle these new counting
|
||||||
to better report on compound pages in general. Specifically, for compound pages
|
fields, and to better report on compound pages in general. Specifically,
|
||||||
with order > 1, the exact (hpage_pinned_refcount) pincount is reported.
|
for compound pages, the exact (compound_pincount) pincount is reported.
|
||||||
|
|
||||||
References
|
References
|
||||||
==========
|
==========
|
||||||
|
|||||||
@@ -315,11 +315,15 @@ indeed the normal API is implemented in terms of the advanced API. The
|
|||||||
advanced API is only available to modules with a GPL-compatible license.
|
advanced API is only available to modules with a GPL-compatible license.
|
||||||
|
|
||||||
The advanced API is based around the xa_state. This is an opaque data
|
The advanced API is based around the xa_state. This is an opaque data
|
||||||
structure which you declare on the stack using the XA_STATE()
|
structure which you declare on the stack using the XA_STATE() macro.
|
||||||
macro. This macro initialises the xa_state ready to start walking
|
This macro initialises the xa_state ready to start walking around the
|
||||||
around the XArray. It is used as a cursor to maintain the position
|
XArray. It is used as a cursor to maintain the position in the XArray
|
||||||
in the XArray and let you compose various operations together without
|
and let you compose various operations together without having to restart
|
||||||
having to restart from the top every time.
|
from the top every time. The contents of the xa_state are protected by
|
||||||
|
the rcu_read_lock() or the xas_lock(). If you need to drop whichever of
|
||||||
|
those locks is protecting your state and tree, you must call xas_pause()
|
||||||
|
so that future calls do not rely on the parts of the state which were
|
||||||
|
left unprotected.
|
||||||
|
|
||||||
The xa_state is also used to store errors. You can call
|
The xa_state is also used to store errors. You can call
|
||||||
xas_error() to retrieve the error. All operations check whether
|
xas_error() to retrieve the error. All operations check whether
|
||||||
|
|||||||
@@ -75,6 +75,9 @@ And optionally
|
|||||||
.resume - A pointer to a per-policy resume function which is called
|
.resume - A pointer to a per-policy resume function which is called
|
||||||
with interrupts disabled and _before_ the governor is started again.
|
with interrupts disabled and _before_ the governor is started again.
|
||||||
|
|
||||||
|
.ready - A pointer to a per-policy ready function which is called after
|
||||||
|
the policy is fully initialized.
|
||||||
|
|
||||||
.attr - A pointer to a NULL-terminated list of "struct freq_attr" which
|
.attr - A pointer to a NULL-terminated list of "struct freq_attr" which
|
||||||
allow to export values to sysfs.
|
allow to export values to sysfs.
|
||||||
|
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ Software tag-based KASAN mode is only supported in Clang.
|
|||||||
|
|
||||||
The hardware KASAN mode (#3) relies on hardware to perform the checks but
|
The hardware KASAN mode (#3) relies on hardware to perform the checks but
|
||||||
still requires a compiler version that supports memory tagging instructions.
|
still requires a compiler version that supports memory tagging instructions.
|
||||||
This mode is supported in GCC 10+ and Clang 11+.
|
This mode is supported in GCC 10+ and Clang 12+.
|
||||||
|
|
||||||
Both software KASAN modes work with SLUB and SLAB memory allocators,
|
Both software KASAN modes work with SLUB and SLAB memory allocators,
|
||||||
while the hardware tag-based KASAN currently only supports SLUB.
|
while the hardware tag-based KASAN currently only supports SLUB.
|
||||||
@@ -206,6 +206,9 @@ additional boot parameters that allow disabling KASAN or controlling features:
|
|||||||
Asymmetric mode: a bad access is detected synchronously on reads and
|
Asymmetric mode: a bad access is detected synchronously on reads and
|
||||||
asynchronously on writes.
|
asynchronously on writes.
|
||||||
|
|
||||||
|
- ``kasan.vmalloc=off`` or ``=on`` disables or enables tagging of vmalloc
|
||||||
|
allocations (default: ``on``).
|
||||||
|
|
||||||
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
- ``kasan.stacktrace=off`` or ``=on`` disables or enables alloc and free stack
|
||||||
traces collection (default: ``on``).
|
traces collection (default: ``on``).
|
||||||
|
|
||||||
@@ -279,8 +282,8 @@ Software tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
|||||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||||
reserved to tag freed memory regions.
|
reserved to tag freed memory regions.
|
||||||
|
|
||||||
Software tag-based KASAN currently only supports tagging of slab and page_alloc
|
Software tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||||
memory.
|
and vmalloc memory.
|
||||||
|
|
||||||
Hardware tag-based KASAN
|
Hardware tag-based KASAN
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
@@ -303,8 +306,8 @@ Hardware tag-based KASAN uses 0xFF as a match-all pointer tag (accesses through
|
|||||||
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
pointers with the 0xFF pointer tag are not checked). The value 0xFE is currently
|
||||||
reserved to tag freed memory regions.
|
reserved to tag freed memory regions.
|
||||||
|
|
||||||
Hardware tag-based KASAN currently only supports tagging of slab and page_alloc
|
Hardware tag-based KASAN currently only supports tagging of slab, page_alloc,
|
||||||
memory.
|
and VM_ALLOC-based vmalloc memory.
|
||||||
|
|
||||||
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
|
If the hardware does not support MTE (pre ARMv8.5), hardware tag-based KASAN
|
||||||
will not be enabled. In this case, all KASAN boot parameters are ignored.
|
will not be enabled. In this case, all KASAN boot parameters are ignored.
|
||||||
@@ -319,6 +322,8 @@ checking gets disabled.
|
|||||||
Shadow memory
|
Shadow memory
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
The contents of this section are only applicable to software KASAN modes.
|
||||||
|
|
||||||
The kernel maps memory in several different parts of the address space.
|
The kernel maps memory in several different parts of the address space.
|
||||||
The range of kernel virtual addresses is large: there is not enough real
|
The range of kernel virtual addresses is large: there is not enough real
|
||||||
memory to support a real shadow region for every address that could be
|
memory to support a real shadow region for every address that could be
|
||||||
@@ -349,7 +354,7 @@ CONFIG_KASAN_VMALLOC
|
|||||||
|
|
||||||
With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
|
With ``CONFIG_KASAN_VMALLOC``, KASAN can cover vmalloc space at the
|
||||||
cost of greater memory usage. Currently, this is supported on x86,
|
cost of greater memory usage. Currently, this is supported on x86,
|
||||||
riscv, s390, and powerpc.
|
arm64, riscv, s390, and powerpc.
|
||||||
|
|
||||||
This works by hooking into vmalloc and vmap and dynamically
|
This works by hooking into vmalloc and vmap and dynamically
|
||||||
allocating real shadow memory to back the mappings.
|
allocating real shadow memory to back the mappings.
|
||||||
|
|||||||
@@ -41,6 +41,18 @@ guarded by KFENCE. The default is configurable via the Kconfig option
|
|||||||
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
|
``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
|
||||||
disables KFENCE.
|
disables KFENCE.
|
||||||
|
|
||||||
|
The sample interval controls a timer that sets up KFENCE allocations. By
|
||||||
|
default, to keep the real sample interval predictable, the normal timer also
|
||||||
|
causes CPU wake-ups when the system is completely idle. This may be undesirable
|
||||||
|
on power-constrained systems. The boot parameter ``kfence.deferrable=1``
|
||||||
|
instead switches to a "deferrable" timer which does not force CPU wake-ups on
|
||||||
|
idle systems, at the risk of unpredictable sample intervals. The default is
|
||||||
|
configurable via the Kconfig option ``CONFIG_KFENCE_DEFERRABLE``.
|
||||||
|
|
||||||
|
.. warning::
|
||||||
|
The KUnit test suite is very likely to fail when using a deferrable timer
|
||||||
|
since it currently causes very unpredictable sample intervals.
|
||||||
|
|
||||||
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
|
||||||
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
|
||||||
255), the number of available guarded objects can be controlled. Each object
|
255), the number of available guarded objects can be controlled. Each object
|
||||||
|
|||||||
@@ -7,6 +7,14 @@ directory. These are intended to be small tests to exercise individual code
|
|||||||
paths in the kernel. Tests are intended to be run after building, installing
|
paths in the kernel. Tests are intended to be run after building, installing
|
||||||
and booting a kernel.
|
and booting a kernel.
|
||||||
|
|
||||||
|
Kselftest from mainline can be run on older stable kernels. Running tests
|
||||||
|
from mainline offers the best coverage. Several test rings run mainline
|
||||||
|
kselftest suite on stable releases. The reason is that when a new test
|
||||||
|
gets added to test existing code to regression test a bug, we should be
|
||||||
|
able to run that test on an older kernel. Hence, it is important to keep
|
||||||
|
code that can still test an older kernel and make sure it skips the test
|
||||||
|
gracefully on newer releases.
|
||||||
|
|
||||||
You can find additional information on Kselftest framework, how to
|
You can find additional information on Kselftest framework, how to
|
||||||
write new tests using the framework on Kselftest wiki:
|
write new tests using the framework on Kselftest wiki:
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
.. SPDX-License-Identifier: GPL-2.0
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
========================================
|
===================================================
|
||||||
The Kernel Test Anything Protocol (KTAP)
|
The Kernel Test Anything Protocol (KTAP), version 1
|
||||||
========================================
|
===================================================
|
||||||
|
|
||||||
TAP, or the Test Anything Protocol is a format for specifying test results used
|
TAP, or the Test Anything Protocol is a format for specifying test results used
|
||||||
by a number of projects. It's website and specification are found at this `link
|
by a number of projects. It's website and specification are found at this `link
|
||||||
@@ -68,7 +68,7 @@ Test case result lines
|
|||||||
Test case result lines indicate the final status of a test.
|
Test case result lines indicate the final status of a test.
|
||||||
They are required and must have the format:
|
They are required and must have the format:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
<result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
|
<result> <number> [<description>][ # [<directive>] [<diagnostic data>]]
|
||||||
|
|
||||||
@@ -117,32 +117,32 @@ separator.
|
|||||||
|
|
||||||
Example result lines include:
|
Example result lines include:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
ok 1 test_case_name
|
ok 1 test_case_name
|
||||||
|
|
||||||
The test "test_case_name" passed.
|
The test "test_case_name" passed.
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
not ok 1 test_case_name
|
not ok 1 test_case_name
|
||||||
|
|
||||||
The test "test_case_name" failed.
|
The test "test_case_name" failed.
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
ok 1 test # SKIP necessary dependency unavailable
|
ok 1 test # SKIP necessary dependency unavailable
|
||||||
|
|
||||||
The test "test" was SKIPPED with the diagnostic message "necessary dependency
|
The test "test" was SKIPPED with the diagnostic message "necessary dependency
|
||||||
unavailable".
|
unavailable".
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
not ok 1 test # TIMEOUT 30 seconds
|
not ok 1 test # TIMEOUT 30 seconds
|
||||||
|
|
||||||
The test "test" timed out, with diagnostic data "30 seconds".
|
The test "test" timed out, with diagnostic data "30 seconds".
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
ok 5 check return code # rcode=0
|
ok 5 check return code # rcode=0
|
||||||
|
|
||||||
@@ -174,6 +174,13 @@ There may be lines within KTAP output that do not follow the format of one of
|
|||||||
the four formats for lines described above. This is allowed, however, they will
|
the four formats for lines described above. This is allowed, however, they will
|
||||||
not influence the status of the tests.
|
not influence the status of the tests.
|
||||||
|
|
||||||
|
This is an important difference from TAP. Kernel tests may print messages
|
||||||
|
to the system console or a log file. Both of these destinations may contain
|
||||||
|
messages either from unrelated kernel or userspace activity, or kernel
|
||||||
|
messages from non-test code that is invoked by the test. The kernel code
|
||||||
|
invoked by the test likely is not aware that a test is in progress and
|
||||||
|
thus can not print the message as a diagnostic message.
|
||||||
|
|
||||||
Nested tests
|
Nested tests
|
||||||
------------
|
------------
|
||||||
|
|
||||||
@@ -186,13 +193,16 @@ starting with another KTAP version line and test plan, and end with the overall
|
|||||||
result. If one of the subtests fail, for example, the parent test should also
|
result. If one of the subtests fail, for example, the parent test should also
|
||||||
fail.
|
fail.
|
||||||
|
|
||||||
Additionally, all result lines in a subtest should be indented. One level of
|
Additionally, all lines in a subtest should be indented. One level of
|
||||||
indentation is two spaces: " ". The indentation should begin at the version
|
indentation is two spaces: " ". The indentation should begin at the version
|
||||||
line and should end before the parent test's result line.
|
line and should end before the parent test's result line.
|
||||||
|
|
||||||
|
"Unknown lines" are not considered to be lines in a subtest and thus are
|
||||||
|
allowed to be either indented or not indented.
|
||||||
|
|
||||||
An example of a test with two nested subtests:
|
An example of a test with two nested subtests:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
KTAP version 1
|
KTAP version 1
|
||||||
1..1
|
1..1
|
||||||
@@ -205,7 +215,7 @@ An example of a test with two nested subtests:
|
|||||||
|
|
||||||
An example format with multiple levels of nested testing:
|
An example format with multiple levels of nested testing:
|
||||||
|
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
KTAP version 1
|
KTAP version 1
|
||||||
1..2
|
1..2
|
||||||
@@ -224,10 +234,15 @@ An example format with multiple levels of nested testing:
|
|||||||
Major differences between TAP and KTAP
|
Major differences between TAP and KTAP
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
|
|
||||||
Note the major differences between the TAP and KTAP specification:
|
================================================== ========= ===============
|
||||||
- yaml and json are not recommended in diagnostic messages
|
Feature TAP KTAP
|
||||||
- TODO directive not recognized
|
================================================== ========= ===============
|
||||||
- KTAP allows for an arbitrary number of tests to be nested
|
yaml and json in diagnosic message ok not recommended
|
||||||
|
TODO directive ok not recognized
|
||||||
|
allows an arbitrary number of tests to be nested no yes
|
||||||
|
"Unknown lines" are in category of "Anything else" yes no
|
||||||
|
"Unknown lines" are incorrect allowed
|
||||||
|
================================================== ========= ===============
|
||||||
|
|
||||||
The TAP14 specification does permit nested tests, but instead of using another
|
The TAP14 specification does permit nested tests, but instead of using another
|
||||||
nested version line, uses a line of the form
|
nested version line, uses a line of the form
|
||||||
@@ -235,7 +250,7 @@ nested version line, uses a line of the form
|
|||||||
|
|
||||||
Example KTAP output
|
Example KTAP output
|
||||||
--------------------
|
--------------------
|
||||||
.. code-block::
|
.. code-block:: none
|
||||||
|
|
||||||
KTAP version 1
|
KTAP version 1
|
||||||
1..1
|
1..1
|
||||||
|
|||||||
@@ -26,10 +26,7 @@ The fundamental unit in KUnit is the test case. The KUnit test cases are
|
|||||||
grouped into KUnit suites. A KUnit test case is a function with type
|
grouped into KUnit suites. A KUnit test case is a function with type
|
||||||
signature ``void (*)(struct kunit *test)``.
|
signature ``void (*)(struct kunit *test)``.
|
||||||
These test case functions are wrapped in a struct called
|
These test case functions are wrapped in a struct called
|
||||||
``struct kunit_case``. For code, see:
|
struct kunit_case.
|
||||||
|
|
||||||
.. kernel-doc:: include/kunit/test.h
|
|
||||||
:identifiers: kunit_case
|
|
||||||
|
|
||||||
.. note:
|
.. note:
|
||||||
``generate_params`` is optional for non-parameterized tests.
|
``generate_params`` is optional for non-parameterized tests.
|
||||||
@@ -152,18 +149,12 @@ Parameterized Tests
|
|||||||
Each KUnit parameterized test is associated with a collection of
|
Each KUnit parameterized test is associated with a collection of
|
||||||
parameters. The test is invoked multiple times, once for each parameter
|
parameters. The test is invoked multiple times, once for each parameter
|
||||||
value and the parameter is stored in the ``param_value`` field.
|
value and the parameter is stored in the ``param_value`` field.
|
||||||
The test case includes a ``KUNIT_CASE_PARAM()`` macro that accepts a
|
The test case includes a KUNIT_CASE_PARAM() macro that accepts a
|
||||||
generator function.
|
generator function.
|
||||||
The generator function is passed the previous parameter and returns the next
|
The generator function is passed the previous parameter and returns the next
|
||||||
parameter. It also provides a macro to generate common-case generators based on
|
parameter. It also provides a macro to generate common-case generators based on
|
||||||
arrays.
|
arrays.
|
||||||
|
|
||||||
For code, see:
|
|
||||||
|
|
||||||
.. kernel-doc:: include/kunit/test.h
|
|
||||||
:identifiers: KUNIT_ARRAY_PARAM
|
|
||||||
|
|
||||||
|
|
||||||
kunit_tool (Command Line Test Harness)
|
kunit_tool (Command Line Test Harness)
|
||||||
======================================
|
======================================
|
||||||
|
|
||||||
|
|||||||
@@ -242,7 +242,7 @@ example:
|
|||||||
|
|
||||||
int rectangle_area(struct shape *this)
|
int rectangle_area(struct shape *this)
|
||||||
{
|
{
|
||||||
struct rectangle *self = container_of(this, struct shape, parent);
|
struct rectangle *self = container_of(this, struct rectangle, parent);
|
||||||
|
|
||||||
return self->length * self->width;
|
return self->length * self->width;
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -100,3 +100,5 @@ have already built it.
|
|||||||
|
|
||||||
The optional make variable CF can be used to pass arguments to sparse. The
|
The optional make variable CF can be used to pass arguments to sparse. The
|
||||||
build system passes -Wbitwise to sparse automatically.
|
build system passes -Wbitwise to sparse automatically.
|
||||||
|
|
||||||
|
Note that sparse defines the __CHECKER__ preprocessor symbol.
|
||||||
|
|||||||
@@ -3,9 +3,10 @@ DT_DOC_CHECKER ?= dt-doc-validate
|
|||||||
DT_EXTRACT_EX ?= dt-extract-example
|
DT_EXTRACT_EX ?= dt-extract-example
|
||||||
DT_MK_SCHEMA ?= dt-mk-schema
|
DT_MK_SCHEMA ?= dt-mk-schema
|
||||||
|
|
||||||
DT_SCHEMA_LINT = $(shell which yamllint)
|
DT_SCHEMA_LINT = $(shell which yamllint || \
|
||||||
|
echo "warning: python package 'yamllint' not installed, skipping" >&2)
|
||||||
|
|
||||||
DT_SCHEMA_MIN_VERSION = 2021.2.1
|
DT_SCHEMA_MIN_VERSION = 2022.3
|
||||||
|
|
||||||
PHONY += check_dtschema_version
|
PHONY += check_dtschema_version
|
||||||
check_dtschema_version:
|
check_dtschema_version:
|
||||||
@@ -24,18 +25,11 @@ quiet_cmd_extract_ex = DTEX $@
|
|||||||
$(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE
|
$(obj)/%.example.dts: $(src)/%.yaml check_dtschema_version FORCE
|
||||||
$(call if_changed,extract_ex)
|
$(call if_changed,extract_ex)
|
||||||
|
|
||||||
# Use full schemas when checking %.example.dts
|
|
||||||
DT_TMP_SCHEMA := $(obj)/processed-schema-examples.json
|
|
||||||
|
|
||||||
find_all_cmd = find $(srctree)/$(src) \( -name '*.yaml' ! \
|
find_all_cmd = find $(srctree)/$(src) \( -name '*.yaml' ! \
|
||||||
-name 'processed-schema*' ! \
|
-name 'processed-schema*' \)
|
||||||
-name '*.example.dt.yaml' \)
|
|
||||||
|
|
||||||
ifeq ($(DT_SCHEMA_FILES),)
|
find_cmd = $(find_all_cmd) | grep -F "$(DT_SCHEMA_FILES)"
|
||||||
find_cmd = $(find_all_cmd)
|
CHK_DT_DOCS := $(shell $(find_cmd))
|
||||||
else
|
|
||||||
find_cmd = echo $(addprefix $(srctree)/, $(DT_SCHEMA_FILES))
|
|
||||||
endif
|
|
||||||
|
|
||||||
quiet_cmd_yamllint = LINT $(src)
|
quiet_cmd_yamllint = LINT $(src)
|
||||||
cmd_yamllint = ($(find_cmd) | \
|
cmd_yamllint = ($(find_cmd) | \
|
||||||
@@ -72,35 +66,14 @@ override DTC_FLAGS := \
|
|||||||
# Disable undocumented compatible checks until warning free
|
# Disable undocumented compatible checks until warning free
|
||||||
override DT_CHECKER_FLAGS ?=
|
override DT_CHECKER_FLAGS ?=
|
||||||
|
|
||||||
$(obj)/processed-schema-examples.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
|
$(obj)/processed-schema.json: $(DT_DOCS) $(src)/.yamllint check_dtschema_version FORCE
|
||||||
$(call if_changed_rule,chkdt)
|
$(call if_changed_rule,chkdt)
|
||||||
|
|
||||||
ifeq ($(DT_SCHEMA_FILES),)
|
always-y += processed-schema.json
|
||||||
|
always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dts, $(CHK_DT_DOCS))
|
||||||
# Unless DT_SCHEMA_FILES is specified, use the full schema for dtbs_check too.
|
always-$(CHECK_DT_BINDING) += $(patsubst $(srctree)/$(src)/%.yaml,%.example.dtb, $(CHK_DT_DOCS))
|
||||||
# Just copy processed-schema-examples.json
|
|
||||||
|
|
||||||
$(obj)/processed-schema.json: $(obj)/processed-schema-examples.json FORCE
|
|
||||||
$(call if_changed,copy)
|
|
||||||
|
|
||||||
DT_SCHEMA_FILES = $(DT_DOCS)
|
|
||||||
|
|
||||||
else
|
|
||||||
|
|
||||||
# If DT_SCHEMA_FILES is specified, use it for processed-schema.json
|
|
||||||
|
|
||||||
$(obj)/processed-schema.json: DT_MK_SCHEMA_FLAGS := -u
|
|
||||||
$(obj)/processed-schema.json: $(DT_SCHEMA_FILES) check_dtschema_version FORCE
|
|
||||||
$(call if_changed,mk_schema)
|
|
||||||
|
|
||||||
endif
|
|
||||||
|
|
||||||
always-$(CHECK_DT_BINDING) += processed-schema-examples.json
|
|
||||||
always-$(CHECK_DTBS) += processed-schema.json
|
|
||||||
always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
|
|
||||||
always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
|
|
||||||
|
|
||||||
# Hack: avoid 'Argument list too long' error for 'make clean'. Remove most of
|
# Hack: avoid 'Argument list too long' error for 'make clean'. Remove most of
|
||||||
# build artifacts here before they are processed by scripts/Makefile.clean
|
# build artifacts here before they are processed by scripts/Makefile.clean
|
||||||
clean-files = $(shell find $(obj) \( -name '*.example.dts' -o \
|
clean-files = $(shell find $(obj) \( -name '*.example.dts' -o \
|
||||||
-name '*.example.dt.yaml' \) -delete 2>/dev/null)
|
-name '*.example.dtb' \) -delete 2>/dev/null)
|
||||||
|
|||||||
28
Documentation/devicetree/bindings/arm/airoha.yaml
Normal file
28
Documentation/devicetree/bindings/arm/airoha.yaml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||||
|
%YAML 1.2
|
||||||
|
---
|
||||||
|
$id: http://devicetree.org/schemas/arm/airoha.yaml#
|
||||||
|
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||||
|
|
||||||
|
title: Airoha SoC based Platforms Device Tree Bindings
|
||||||
|
|
||||||
|
maintainers:
|
||||||
|
- Felix Fietkau <nbd@nbd.name>
|
||||||
|
- John Crispin <john@phrozen.org>
|
||||||
|
|
||||||
|
description:
|
||||||
|
Boards with an Airoha SoC shall have the following properties.
|
||||||
|
|
||||||
|
properties:
|
||||||
|
$nodename:
|
||||||
|
const: '/'
|
||||||
|
compatible:
|
||||||
|
oneOf:
|
||||||
|
- items:
|
||||||
|
- enum:
|
||||||
|
- airoha,en7523-evb
|
||||||
|
- const: airoha,en7523
|
||||||
|
|
||||||
|
additionalProperties: true
|
||||||
|
|
||||||
|
...
|
||||||
@@ -13,11 +13,45 @@ properties:
|
|||||||
$nodename:
|
$nodename:
|
||||||
const: "/"
|
const: "/"
|
||||||
compatible:
|
compatible:
|
||||||
|
oneOf:
|
||||||
|
- description: Arria 5 boards
|
||||||
items:
|
items:
|
||||||
- enum:
|
- enum:
|
||||||
- altr,socfpga-cyclone5
|
- altr,socfpga-arria5-socdk
|
||||||
- altr,socfpga-arria5
|
- const: altr,socfpga-arria5
|
||||||
- altr,socfpga-arria10
|
- const: altr,socfpga
|
||||||
|
|
||||||
|
- description: Arria 10 boards
|
||||||
|
items:
|
||||||
|
- enum:
|
||||||
|
- altr,socfpga-arria10-socdk
|
||||||
|
- enclustra,mercury-aa1
|
||||||
|
- const: altr,socfpga-arria10
|
||||||
|
- const: altr,socfpga
|
||||||
|
|
||||||
|
- description: Cyclone 5 boards
|
||||||
|
items:
|
||||||
|
- enum:
|
||||||
|
- altr,socfpga-cyclone5-socdk
|
||||||
|
- denx,mcvevk
|
||||||
|
- ebv,socrates
|
||||||
|
- macnica,sodia
|
||||||
|
- novtech,chameleon96
|
||||||
|
- samtec,vining
|
||||||
|
- terasic,de0-atlas
|
||||||
|
- terasic,socfpga-cyclone5-sockit
|
||||||
|
- const: altr,socfpga-cyclone5
|
||||||
|
- const: altr,socfpga
|
||||||
|
|
||||||
|
- description: Stratix 10 boards
|
||||||
|
items:
|
||||||
|
- enum:
|
||||||
|
- altr,socfpga-stratix10-socdk
|
||||||
|
- const: altr,socfpga-stratix10
|
||||||
|
|
||||||
|
- description: SoCFPGA VT
|
||||||
|
items:
|
||||||
|
- const: altr,socfpga-vt
|
||||||
- const: altr,socfpga
|
- const: altr,socfpga
|
||||||
|
|
||||||
additionalProperties: true
|
additionalProperties: true
|
||||||
|
|||||||
@@ -108,6 +108,7 @@ properties:
|
|||||||
- amlogic,p230
|
- amlogic,p230
|
||||||
- amlogic,p231
|
- amlogic,p231
|
||||||
- libretech,aml-s905d-pc
|
- libretech,aml-s905d-pc
|
||||||
|
- osmc,vero4k-plus
|
||||||
- phicomm,n1
|
- phicomm,n1
|
||||||
- smartlabs,sml5442tw
|
- smartlabs,sml5442tw
|
||||||
- videostrong,gxl-kii-pro
|
- videostrong,gxl-kii-pro
|
||||||
@@ -170,9 +171,14 @@ properties:
|
|||||||
- description: Boards with the Amlogic Meson SM1 S905X3/D3/Y3 SoC
|
- description: Boards with the Amlogic Meson SM1 S905X3/D3/Y3 SoC
|
||||||
items:
|
items:
|
||||||
- enum:
|
- enum:
|
||||||
|
- amediatech,x96-air
|
||||||
|
- amediatech,x96-air-gbit
|
||||||
- bananapi,bpi-m5
|
- bananapi,bpi-m5
|
||||||
|
- cyx,a95xf3-air
|
||||||
|
- cyx,a95xf3-air-gbit
|
||||||
- hardkernel,odroid-c4
|
- hardkernel,odroid-c4
|
||||||
- hardkernel,odroid-hc4
|
- hardkernel,odroid-hc4
|
||||||
|
- haochuangyi,h96-max
|
||||||
- khadas,vim3l
|
- khadas,vim3l
|
||||||
- seirobotics,sei610
|
- seirobotics,sei610
|
||||||
- const: amlogic,sm1
|
- const: amlogic,sm1
|
||||||
@@ -183,6 +189,12 @@ properties:
|
|||||||
- amlogic,ad401
|
- amlogic,ad401
|
||||||
- const: amlogic,a1
|
- const: amlogic,a1
|
||||||
|
|
||||||
|
- description: Boards with the Amlogic Meson S4 S805X2 SoC
|
||||||
|
items:
|
||||||
|
- enum:
|
||||||
|
- amlogic,aq222
|
||||||
|
- const: amlogic,s4
|
||||||
|
|
||||||
additionalProperties: true
|
additionalProperties: true
|
||||||
|
|
||||||
...
|
...
|
||||||
|
|||||||
@@ -42,7 +42,7 @@ patternProperties:
|
|||||||
description:
|
description:
|
||||||
The individual power management domains within this controller
|
The individual power management domains within this controller
|
||||||
type: object
|
type: object
|
||||||
$ref: /power/apple,pmgr-pwrstate.yaml#
|
$ref: /schemas/power/apple,pmgr-pwrstate.yaml#
|
||||||
|
|
||||||
required:
|
required:
|
||||||
- compatible
|
- compatible
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user