Merge tag 'v6.0-rc6' into locking/core, to refresh the branch
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
@@ -516,6 +516,7 @@ ForEachMacros:
|
|||||||
- 'of_property_for_each_string'
|
- 'of_property_for_each_string'
|
||||||
- 'of_property_for_each_u32'
|
- 'of_property_for_each_u32'
|
||||||
- 'pci_bus_for_each_resource'
|
- 'pci_bus_for_each_resource'
|
||||||
|
- 'pci_doe_for_each_off'
|
||||||
- 'pcl_for_each_chunk'
|
- 'pcl_for_each_chunk'
|
||||||
- 'pcl_for_each_segment'
|
- 'pcl_for_each_segment'
|
||||||
- 'pcm_for_each_format'
|
- 'pcm_for_each_format'
|
||||||
|
|||||||
@@ -1,2 +1,4 @@
|
|||||||
|
Alan Cox <alan@lxorguk.ukuu.org.uk>
|
||||||
|
Alan Cox <root@hraefn.swansea.linux.org.uk>
|
||||||
Christoph Hellwig <hch@lst.de>
|
Christoph Hellwig <hch@lst.de>
|
||||||
Marc Gonzalez <marc.w.gonzalez@free.fr>
|
Marc Gonzalez <marc.w.gonzalez@free.fr>
|
||||||
|
|||||||
10
.mailmap
10
.mailmap
@@ -78,6 +78,7 @@ Boris Brezillon <bbrezillon@kernel.org> <b.brezillon.dev@gmail.com>
|
|||||||
Boris Brezillon <bbrezillon@kernel.org> <b.brezillon@overkiz.com>
|
Boris Brezillon <bbrezillon@kernel.org> <b.brezillon@overkiz.com>
|
||||||
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
|
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@bootlin.com>
|
||||||
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
|
Boris Brezillon <bbrezillon@kernel.org> <boris.brezillon@free-electrons.com>
|
||||||
|
Brendan Higgins <brendan.higgins@linux.dev> <brendanhiggins@google.com>
|
||||||
Brian Avery <b.avery@hp.com>
|
Brian Avery <b.avery@hp.com>
|
||||||
Brian King <brking@us.ibm.com>
|
Brian King <brking@us.ibm.com>
|
||||||
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
|
||||||
@@ -97,8 +98,7 @@ Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
|
|||||||
Christian Marangi <ansuelsmth@gmail.com>
|
Christian Marangi <ansuelsmth@gmail.com>
|
||||||
Christophe Ricard <christophe.ricard@gmail.com>
|
Christophe Ricard <christophe.ricard@gmail.com>
|
||||||
Christoph Hellwig <hch@lst.de>
|
Christoph Hellwig <hch@lst.de>
|
||||||
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
|
Colin Ian King <colin.i.king@gmail.com> <colin.king@canonical.com>
|
||||||
Colin Ian King <colin.king@intel.com> <colin.i.king@gmail.com>
|
|
||||||
Corey Minyard <minyard@acm.org>
|
Corey Minyard <minyard@acm.org>
|
||||||
Damian Hobson-Garcia <dhobsong@igel.co.jp>
|
Damian Hobson-Garcia <dhobsong@igel.co.jp>
|
||||||
Daniel Borkmann <daniel@iogearbox.net> <danborkmann@googlemail.com>
|
Daniel Borkmann <daniel@iogearbox.net> <danborkmann@googlemail.com>
|
||||||
@@ -149,6 +149,8 @@ Greg Kroah-Hartman <gregkh@suse.de>
|
|||||||
Greg Kroah-Hartman <greg@kroah.com>
|
Greg Kroah-Hartman <greg@kroah.com>
|
||||||
Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com>
|
Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com>
|
||||||
Gregory CLEMENT <gregory.clement@bootlin.com> <gregory.clement@free-electrons.com>
|
Gregory CLEMENT <gregory.clement@bootlin.com> <gregory.clement@free-electrons.com>
|
||||||
|
Guilherme G. Piccoli <kernel@gpiccoli.net> <gpiccoli@linux.vnet.ibm.com>
|
||||||
|
Guilherme G. Piccoli <kernel@gpiccoli.net> <gpiccoli@canonical.com>
|
||||||
Guo Ren <guoren@kernel.org> <guoren@linux.alibaba.com>
|
Guo Ren <guoren@kernel.org> <guoren@linux.alibaba.com>
|
||||||
Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
|
Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
|
||||||
Gustavo Padovan <gustavo@las.ic.unicamp.br>
|
Gustavo Padovan <gustavo@las.ic.unicamp.br>
|
||||||
@@ -230,7 +232,7 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
|
|||||||
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
|
||||||
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
|
||||||
Kenneth W Chen <kenneth.w.chen@intel.com>
|
Kenneth W Chen <kenneth.w.chen@intel.com>
|
||||||
Kirill Tkhai <kirill.tkhai@openvz.org> <ktkhai@virtuozzo.com>
|
Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
|
||||||
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
|
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
|
||||||
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
|
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
|
||||||
Koushik <raghavendra.koushik@neterion.com>
|
Koushik <raghavendra.koushik@neterion.com>
|
||||||
@@ -252,6 +254,7 @@ Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
|
|||||||
Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
|
Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
|
||||||
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
|
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
|
||||||
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
|
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
|
||||||
|
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
|
||||||
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
|
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
|
||||||
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
|
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
|
||||||
Maciej W. Rozycki <macro@orcam.me.uk> <macro@linux-mips.org>
|
Maciej W. Rozycki <macro@orcam.me.uk> <macro@linux-mips.org>
|
||||||
@@ -312,6 +315,7 @@ Morten Welinder <welinder@troll.com>
|
|||||||
Mythri P K <mythripk@ti.com>
|
Mythri P K <mythripk@ti.com>
|
||||||
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
|
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
|
||||||
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
|
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
|
||||||
|
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
|
||||||
Nguyen Anh Quynh <aquynh@gmail.com>
|
Nguyen Anh Quynh <aquynh@gmail.com>
|
||||||
Nicholas Piggin <npiggin@gmail.com> <npiggen@suse.de>
|
Nicholas Piggin <npiggin@gmail.com> <npiggen@suse.de>
|
||||||
Nicholas Piggin <npiggin@gmail.com> <npiggin@kernel.dk>
|
Nicholas Piggin <npiggin@gmail.com> <npiggin@kernel.dk>
|
||||||
|
|||||||
4
CREDITS
4
CREDITS
@@ -3495,6 +3495,10 @@ D: wd33c93 SCSI driver (linux-m68k)
|
|||||||
S: San Jose, California
|
S: San Jose, California
|
||||||
S: USA
|
S: USA
|
||||||
|
|
||||||
|
N: Joonyoung Shim
|
||||||
|
E: y0922.shim@samsung.com
|
||||||
|
D: Samsung Exynos DRM drivers
|
||||||
|
|
||||||
N: Robert Siemer
|
N: Robert Siemer
|
||||||
E: Robert.Siemer@gmx.de
|
E: Robert.Siemer@gmx.de
|
||||||
P: 2048/C99A4289 2F DC 17 2E 56 62 01 C8 3D F2 AC 09 F2 E5 DD EE
|
P: 2048/C99A4289 2F DC 17 2E 56 62 01 C8 3D F2 AC 09 F2 E5 DD EE
|
||||||
|
|||||||
@@ -260,6 +260,15 @@ Description:
|
|||||||
for discards, and don't read this file.
|
for discards, and don't read this file.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/block/<disk>/queue/dma_alignment
|
||||||
|
Date: May 2022
|
||||||
|
Contact: linux-block@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Reports the alignment that user space addresses must have to be
|
||||||
|
used for raw block device access with O_DIRECT and other driver
|
||||||
|
specific passthrough mechanisms.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/block/<disk>/queue/fua
|
What: /sys/block/<disk>/queue/fua
|
||||||
Date: May 2018
|
Date: May 2018
|
||||||
Contact: linux-block@vger.kernel.org
|
Contact: linux-block@vger.kernel.org
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_health
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_health
|
||||||
Date: June 2018
|
Date: June 2018
|
||||||
KernelVersion: 4.19
|
KernelVersion: 4.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file shows ASIC health status. The possible values are:
|
Description: This file shows ASIC health status. The possible values are:
|
||||||
0 - health failed, 2 - health OK, 3 - ASIC in booting state.
|
0 - health failed, 2 - health OK, 3 - ASIC in booting state.
|
||||||
|
|
||||||
@@ -11,7 +11,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld1_version
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld2_version
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld2_version
|
||||||
Date: June 2018
|
Date: June 2018
|
||||||
KernelVersion: 4.19
|
KernelVersion: 4.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show with which CPLD versions have been burned
|
Description: These files show with which CPLD versions have been burned
|
||||||
on carrier and switch boards.
|
on carrier and switch boards.
|
||||||
|
|
||||||
@@ -20,7 +20,7 @@ Description: These files show with which CPLD versions have been burned
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/fan_dir
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/fan_dir
|
||||||
Date: December 2018
|
Date: December 2018
|
||||||
KernelVersion: 5.0
|
KernelVersion: 5.0
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file shows the system fans direction:
|
Description: This file shows the system fans direction:
|
||||||
forward direction - relevant bit is set 0;
|
forward direction - relevant bit is set 0;
|
||||||
reversed direction - relevant bit is set 1.
|
reversed direction - relevant bit is set 1.
|
||||||
@@ -30,7 +30,7 @@ Description: This file shows the system fans direction:
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_version
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_version
|
||||||
Date: November 2018
|
Date: November 2018
|
||||||
KernelVersion: 5.0
|
KernelVersion: 5.0
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show with which CPLD versions have been burned
|
Description: These files show with which CPLD versions have been burned
|
||||||
on LED or Gearbox board.
|
on LED or Gearbox board.
|
||||||
|
|
||||||
@@ -39,7 +39,7 @@ Description: These files show with which CPLD versions have been burned
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/jtag_enable
|
||||||
Date: November 2018
|
Date: November 2018
|
||||||
KernelVersion: 5.0
|
KernelVersion: 5.0
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files enable and disable the access to the JTAG domain.
|
Description: These files enable and disable the access to the JTAG domain.
|
||||||
By default access to the JTAG domain is disabled.
|
By default access to the JTAG domain is disabled.
|
||||||
|
|
||||||
@@ -48,7 +48,7 @@ Description: These files enable and disable the access to the JTAG domain.
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/select_iio
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/select_iio
|
||||||
Date: June 2018
|
Date: June 2018
|
||||||
KernelVersion: 4.19
|
KernelVersion: 4.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file allows iio devices selection.
|
Description: This file allows iio devices selection.
|
||||||
|
|
||||||
Attribute select_iio can be written with 0 or with 1. It
|
Attribute select_iio can be written with 0 or with 1. It
|
||||||
@@ -62,7 +62,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/psu1_on
|
|||||||
/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pwr_down
|
/sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pwr_down
|
||||||
Date: June 2018
|
Date: June 2018
|
||||||
KernelVersion: 4.19
|
KernelVersion: 4.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files allow asserting system power cycling, switching
|
Description: These files allow asserting system power cycling, switching
|
||||||
power supply units on and off and system's main power domain
|
power supply units on and off and system's main power domain
|
||||||
shutdown.
|
shutdown.
|
||||||
@@ -89,7 +89,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_short_pb
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_reset
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_reset
|
||||||
Date: June 2018
|
Date: June 2018
|
||||||
KernelVersion: 4.19
|
KernelVersion: 4.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show the system reset cause, as following: power
|
Description: These files show the system reset cause, as following: power
|
||||||
auxiliary outage or power refresh, ASIC thermal shutdown, halt,
|
auxiliary outage or power refresh, ASIC thermal shutdown, halt,
|
||||||
hotswap, watchdog, firmware reset, long press power button,
|
hotswap, watchdog, firmware reset, long press power button,
|
||||||
@@ -106,7 +106,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_system
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_voltmon_upgrade_fail
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_voltmon_upgrade_fail
|
||||||
Date: November 2018
|
Date: November 2018
|
||||||
KernelVersion: 5.0
|
KernelVersion: 5.0
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show the system reset cause, as following: ComEx
|
Description: These files show the system reset cause, as following: ComEx
|
||||||
power fail, reset from ComEx, system platform reset, reset
|
power fail, reset from ComEx, system platform reset, reset
|
||||||
due to voltage monitor devices upgrade failure,
|
due to voltage monitor devices upgrade failure,
|
||||||
@@ -119,7 +119,7 @@ Description: These files show the system reset cause, as following: ComEx
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version
|
||||||
Date: November 2018
|
Date: November 2018
|
||||||
KernelVersion: 5.0
|
KernelVersion: 5.0
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show with which CPLD versions have been burned
|
Description: These files show with which CPLD versions have been burned
|
||||||
on LED board.
|
on LED board.
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sff_wd
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_swb_wd
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_swb_wd
|
||||||
Date: June 2019
|
Date: June 2019
|
||||||
KernelVersion: 5.3
|
KernelVersion: 5.3
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show the system reset cause, as following:
|
Description: These files show the system reset cause, as following:
|
||||||
COMEX thermal shutdown; wathchdog power off or reset was derived
|
COMEX thermal shutdown; wathchdog power off or reset was derived
|
||||||
by one of the next components: COMEX, switch board or by Small Form
|
by one of the next components: COMEX, switch board or by Small Form
|
||||||
@@ -148,7 +148,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config1
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config2
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config2
|
||||||
Date: January 2020
|
Date: January 2020
|
||||||
KernelVersion: 5.6
|
KernelVersion: 5.6
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show system static topology identification
|
Description: These files show system static topology identification
|
||||||
like system's static I2C topology, number and type of FPGA
|
like system's static I2C topology, number and type of FPGA
|
||||||
devices within the system and so on.
|
devices within the system and so on.
|
||||||
@@ -161,7 +161,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_soc
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_pwr_off
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/reset_sw_pwr_off
|
||||||
Date: January 2020
|
Date: January 2020
|
||||||
KernelVersion: 5.6
|
KernelVersion: 5.6
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show the system reset causes, as following: reset
|
Description: These files show the system reset causes, as following: reset
|
||||||
due to AC power failure, reset invoked from software by
|
due to AC power failure, reset invoked from software by
|
||||||
assertion reset signal through CPLD. reset caused by signal
|
assertion reset signal through CPLD. reset caused by signal
|
||||||
@@ -173,7 +173,7 @@ Description: These files show the system reset causes, as following: reset
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pcie_asic_reset_dis
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/pcie_asic_reset_dis
|
||||||
Date: January 2020
|
Date: January 2020
|
||||||
KernelVersion: 5.6
|
KernelVersion: 5.6
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file allows to retain ASIC up during PCIe root complex
|
Description: This file allows to retain ASIC up during PCIe root complex
|
||||||
reset, when attribute is set 1.
|
reset, when attribute is set 1.
|
||||||
|
|
||||||
@@ -182,7 +182,7 @@ Description: This file allows to retain ASIC up during PCIe root complex
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/vpd_wp
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/vpd_wp
|
||||||
Date: January 2020
|
Date: January 2020
|
||||||
KernelVersion: 5.6
|
KernelVersion: 5.6
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file allows to overwrite system VPD hardware write
|
Description: This file allows to overwrite system VPD hardware write
|
||||||
protection when attribute is set 1.
|
protection when attribute is set 1.
|
||||||
|
|
||||||
@@ -191,7 +191,7 @@ Description: This file allows to overwrite system VPD hardware write
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/voltreg_update_status
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/voltreg_update_status
|
||||||
Date: January 2020
|
Date: January 2020
|
||||||
KernelVersion: 5.6
|
KernelVersion: 5.6
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file exposes the configuration update status of burnable
|
Description: This file exposes the configuration update status of burnable
|
||||||
voltage regulator devices. The status values are as following:
|
voltage regulator devices. The status values are as following:
|
||||||
0 - OK; 1 - CRC failure; 2 = I2C failure; 3 - in progress.
|
0 - OK; 1 - CRC failure; 2 = I2C failure; 3 - in progress.
|
||||||
@@ -201,7 +201,7 @@ Description: This file exposes the configuration update status of burnable
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ufm_version
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/ufm_version
|
||||||
Date: January 2020
|
Date: January 2020
|
||||||
KernelVersion: 5.6
|
KernelVersion: 5.6
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file exposes the firmware version of burnable voltage
|
Description: This file exposes the firmware version of burnable voltage
|
||||||
regulator devices.
|
regulator devices.
|
||||||
|
|
||||||
@@ -217,7 +217,7 @@ What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld3_version_min
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version_min
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/cpld4_version_min
|
||||||
Date: July 2020
|
Date: July 2020
|
||||||
KernelVersion: 5.9
|
KernelVersion: 5.9
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: These files show with which CPLD part numbers and minor
|
Description: These files show with which CPLD part numbers and minor
|
||||||
versions have been burned CPLD devices equipped on a
|
versions have been burned CPLD devices equipped on a
|
||||||
system.
|
system.
|
||||||
@@ -471,7 +471,7 @@ Description: These files provide the maximum powered required for line card
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/phy_reset
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/phy_reset
|
||||||
Date: May 2022
|
Date: May 2022
|
||||||
KernelVersion: 5.19
|
KernelVersion: 5.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file allows to reset PHY 88E1548 when attribute is set 0
|
Description: This file allows to reset PHY 88E1548 when attribute is set 0
|
||||||
due to some abnormal PHY behavior.
|
due to some abnormal PHY behavior.
|
||||||
Expected behavior:
|
Expected behavior:
|
||||||
@@ -483,7 +483,7 @@ Description: This file allows to reset PHY 88E1548 when attribute is set 0
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/mac_reset
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/mac_reset
|
||||||
Date: May 2022
|
Date: May 2022
|
||||||
KernelVersion: 5.19
|
KernelVersion: 5.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file allows to reset ASIC MT52132 when attribute is set 0
|
Description: This file allows to reset ASIC MT52132 when attribute is set 0
|
||||||
due to some abnormal ASIC behavior.
|
due to some abnormal ASIC behavior.
|
||||||
Expected behavior:
|
Expected behavior:
|
||||||
@@ -495,7 +495,7 @@ Description: This file allows to reset ASIC MT52132 when attribute is set 0
|
|||||||
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/qsfp_pwr_good
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/qsfp_pwr_good
|
||||||
Date: May 2022
|
Date: May 2022
|
||||||
KernelVersion: 5.19
|
KernelVersion: 5.19
|
||||||
Contact: Vadim Pasternak <vadimpmellanox.com>
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
Description: This file shows QSFP ports power status. The value is set to 0
|
Description: This file shows QSFP ports power status. The value is set to 0
|
||||||
when one of any QSFP ports is plugged. The value is set to 1 when
|
when one of any QSFP ports is plugged. The value is set to 1 when
|
||||||
there are no any QSFP ports are plugged.
|
there are no any QSFP ports are plugged.
|
||||||
@@ -503,3 +503,42 @@ Description: This file shows QSFP ports power status. The value is set to 0
|
|||||||
0 - Power good, 1 - Not power good.
|
0 - Power good, 1 - Not power good.
|
||||||
|
|
||||||
The files are read only.
|
The files are read only.
|
||||||
|
|
||||||
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic2_health
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
|
Description: This file shows 2-nd ASIC health status. The possible values are:
|
||||||
|
0 - health failed, 2 - health OK, 3 - ASIC in booting state.
|
||||||
|
|
||||||
|
The file is read only.
|
||||||
|
|
||||||
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic_reset
|
||||||
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/asic2_reset
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
|
Description: These files allow to each of ASICs by writing 1.
|
||||||
|
|
||||||
|
The files are write only.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/comm_chnl_ready
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
|
Description: This file is used to indicate remote end (for example BMC) that system
|
||||||
|
host CPU is ready for sending telemetry data to remote end.
|
||||||
|
For indication the file should be written 1.
|
||||||
|
|
||||||
|
The file is write only.
|
||||||
|
|
||||||
|
What: /sys/devices/platform/mlxplat/mlxreg-io/hwmon/hwmon*/config3
|
||||||
|
Date: January 2020
|
||||||
|
KernelVersion: 5.6
|
||||||
|
Contact: Vadim Pasternak <vadimp@nvidia.com>
|
||||||
|
Description: The file indicates COME module hardware configuration.
|
||||||
|
The value is pushed by hardware through GPIO pins.
|
||||||
|
The purpose is to expose some minor BOM changes for the same system SKU.
|
||||||
|
|
||||||
|
The file is read only.
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ What: /sys/module/<MODULENAME>/srcversion
|
|||||||
Date: Jun 2005
|
Date: Jun 2005
|
||||||
Description:
|
Description:
|
||||||
If the module source has MODULE_VERSION, this file will contain
|
If the module source has MODULE_VERSION, this file will contain
|
||||||
the checksum of the the source code.
|
the checksum of the source code.
|
||||||
|
|
||||||
What: /sys/module/<MODULENAME>/version
|
What: /sys/module/<MODULENAME>/version
|
||||||
Date: Jun 2005
|
Date: Jun 2005
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ KernelVersion: 3.13
|
|||||||
Description:
|
Description:
|
||||||
The attributes:
|
The attributes:
|
||||||
|
|
||||||
=========== ==============================================
|
============ ==============================================
|
||||||
file The path to the backing file for the LUN.
|
file The path to the backing file for the LUN.
|
||||||
Required if LUN is not marked as removable.
|
Required if LUN is not marked as removable.
|
||||||
ro Flag specifying access to the LUN shall be
|
ro Flag specifying access to the LUN shall be
|
||||||
@@ -32,4 +32,10 @@ Description:
|
|||||||
being a CD-ROM.
|
being a CD-ROM.
|
||||||
nofua Flag specifying that FUA flag
|
nofua Flag specifying that FUA flag
|
||||||
in SCSI WRITE(10,12)
|
in SCSI WRITE(10,12)
|
||||||
=========== ==============================================
|
forced_eject This write-only file is useful only when
|
||||||
|
the function is active. It causes the backing
|
||||||
|
file to be forcibly detached from the LUN,
|
||||||
|
regardless of whether the host has allowed it.
|
||||||
|
Any non-zero number of bytes written will
|
||||||
|
result in ejection.
|
||||||
|
============ ==============================================
|
||||||
|
|||||||
@@ -101,6 +101,15 @@ Description: Specify the size of the DMA transaction when using DMA to read
|
|||||||
When the write is finished, the user can read the "data_dma"
|
When the write is finished, the user can read the "data_dma"
|
||||||
blob
|
blob
|
||||||
|
|
||||||
|
What: /sys/kernel/debug/habanalabs/hl<n>/dump_razwi_events
|
||||||
|
Date: Aug 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: fkassabri@habana.ai
|
||||||
|
Description: Dumps all razwi events to dmesg if exist.
|
||||||
|
After reading the status register of an existing event
|
||||||
|
the routine will clear the status register.
|
||||||
|
Usage: cat dump_razwi_events
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
|
What: /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
|
||||||
Date: Jan 2021
|
Date: Jan 2021
|
||||||
KernelVersion: 5.12
|
KernelVersion: 5.12
|
||||||
@@ -121,14 +130,16 @@ Date: Jan 2019
|
|||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Sets I2C device address for I2C transaction that is generated
|
Description: Sets I2C device address for I2C transaction that is generated
|
||||||
by the device's CPU
|
by the device's CPU, Not available when device is loaded with secured
|
||||||
|
firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
|
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Sets I2C bus address for I2C transaction that is generated by
|
Description: Sets I2C bus address for I2C transaction that is generated by
|
||||||
the device's CPU
|
the device's CPU, Not available when device is loaded with secured
|
||||||
|
firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_data
|
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_data
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
@@ -136,39 +147,45 @@ KernelVersion: 5.1
|
|||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Triggers an I2C transaction that is generated by the device's
|
Description: Triggers an I2C transaction that is generated by the device's
|
||||||
CPU. Writing to this file generates a write transaction while
|
CPU. Writing to this file generates a write transaction while
|
||||||
reading from the file generates a read transaction
|
reading from the file generates a read transaction, Not available
|
||||||
|
when device is loaded with secured firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_len
|
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_len
|
||||||
Date: Dec 2021
|
Date: Dec 2021
|
||||||
KernelVersion: 5.17
|
KernelVersion: 5.17
|
||||||
Contact: obitton@habana.ai
|
Contact: obitton@habana.ai
|
||||||
Description: Sets I2C length in bytes for I2C transaction that is generated by
|
Description: Sets I2C length in bytes for I2C transaction that is generated by
|
||||||
the device's CPU
|
the device's CPU, Not available when device is loaded with secured
|
||||||
|
firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
|
What: /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Sets I2C register id for I2C transaction that is generated by
|
Description: Sets I2C register id for I2C transaction that is generated by
|
||||||
the device's CPU
|
the device's CPU, Not available when device is loaded with secured
|
||||||
|
firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/led0
|
What: /sys/kernel/debug/habanalabs/hl<n>/led0
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Sets the state of the first S/W led on the device
|
Description: Sets the state of the first S/W led on the device, Not available
|
||||||
|
when device is loaded with secured firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/led1
|
What: /sys/kernel/debug/habanalabs/hl<n>/led1
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Sets the state of the second S/W led on the device
|
Description: Sets the state of the second S/W led on the device, Not available
|
||||||
|
when device is loaded with secured firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/led2
|
What: /sys/kernel/debug/habanalabs/hl<n>/led2
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
KernelVersion: 5.1
|
KernelVersion: 5.1
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Sets the state of the third S/W led on the device
|
Description: Sets the state of the third S/W led on the device, Not available
|
||||||
|
when device is loaded with secured firmware
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/memory_scrub
|
What: /sys/kernel/debug/habanalabs/hl<n>/memory_scrub
|
||||||
Date: May 2022
|
Date: May 2022
|
||||||
@@ -182,7 +199,8 @@ Date: May 2022
|
|||||||
KernelVersion: 5.19
|
KernelVersion: 5.19
|
||||||
Contact: dhirschfeld@habana.ai
|
Contact: dhirschfeld@habana.ai
|
||||||
Description: The value to which the dram will be set to when the user
|
Description: The value to which the dram will be set to when the user
|
||||||
scrubs the dram using 'memory_scrub' debugfs file
|
scrubs the dram using 'memory_scrub' debugfs file and
|
||||||
|
the scrubbing value when using module param 'memory_scrub'
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/mmu
|
What: /sys/kernel/debug/habanalabs/hl<n>/mmu
|
||||||
Date: Jan 2019
|
Date: Jan 2019
|
||||||
@@ -277,7 +295,7 @@ Description: Displays a list with information about the currently user
|
|||||||
to DMA addresses
|
to DMA addresses
|
||||||
|
|
||||||
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
|
What: /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
|
||||||
Date: Aug 2021
|
Date: Oct 2021
|
||||||
KernelVersion: 5.15
|
KernelVersion: 5.15
|
||||||
Contact: ogabbay@kernel.org
|
Contact: ogabbay@kernel.org
|
||||||
Description: Allows to search for specific user pointers (user virtual
|
Description: Allows to search for specific user pointers (user virtual
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ Description:
|
|||||||
MMUPageSize: 4 kB
|
MMUPageSize: 4 kB
|
||||||
Rss: 884 kB
|
Rss: 884 kB
|
||||||
Pss: 385 kB
|
Pss: 385 kB
|
||||||
|
Pss_Dirty: 68 kB
|
||||||
Pss_Anon: 301 kB
|
Pss_Anon: 301 kB
|
||||||
Pss_File: 80 kB
|
Pss_File: 80 kB
|
||||||
Pss_Shmem: 4 kB
|
Pss_Shmem: 4 kB
|
||||||
|
|||||||
@@ -7,6 +7,7 @@ Description:
|
|||||||
all descendant memdevs for unbind. Writing '1' to this attribute
|
all descendant memdevs for unbind. Writing '1' to this attribute
|
||||||
flushes that work.
|
flushes that work.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/memX/firmware_version
|
What: /sys/bus/cxl/devices/memX/firmware_version
|
||||||
Date: December, 2020
|
Date: December, 2020
|
||||||
KernelVersion: v5.12
|
KernelVersion: v5.12
|
||||||
@@ -16,6 +17,7 @@ Description:
|
|||||||
Memory Device Output Payload in the CXL-2.0
|
Memory Device Output Payload in the CXL-2.0
|
||||||
specification.
|
specification.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/memX/ram/size
|
What: /sys/bus/cxl/devices/memX/ram/size
|
||||||
Date: December, 2020
|
Date: December, 2020
|
||||||
KernelVersion: v5.12
|
KernelVersion: v5.12
|
||||||
@@ -25,6 +27,7 @@ Description:
|
|||||||
identically named field in the Identify Memory Device Output
|
identically named field in the Identify Memory Device Output
|
||||||
Payload in the CXL-2.0 specification.
|
Payload in the CXL-2.0 specification.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/memX/pmem/size
|
What: /sys/bus/cxl/devices/memX/pmem/size
|
||||||
Date: December, 2020
|
Date: December, 2020
|
||||||
KernelVersion: v5.12
|
KernelVersion: v5.12
|
||||||
@@ -34,6 +37,7 @@ Description:
|
|||||||
identically named field in the Identify Memory Device Output
|
identically named field in the Identify Memory Device Output
|
||||||
Payload in the CXL-2.0 specification.
|
Payload in the CXL-2.0 specification.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/memX/serial
|
What: /sys/bus/cxl/devices/memX/serial
|
||||||
Date: January, 2022
|
Date: January, 2022
|
||||||
KernelVersion: v5.18
|
KernelVersion: v5.18
|
||||||
@@ -43,6 +47,7 @@ Description:
|
|||||||
capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
|
capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2
|
||||||
Memory Device PCIe Capabilities and Extended Capabilities.
|
Memory Device PCIe Capabilities and Extended Capabilities.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/memX/numa_node
|
What: /sys/bus/cxl/devices/memX/numa_node
|
||||||
Date: January, 2022
|
Date: January, 2022
|
||||||
KernelVersion: v5.18
|
KernelVersion: v5.18
|
||||||
@@ -52,114 +57,334 @@ Description:
|
|||||||
host PCI device for this memory device, emit the CPU node
|
host PCI device for this memory device, emit the CPU node
|
||||||
affinity for this device.
|
affinity for this device.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/*/devtype
|
What: /sys/bus/cxl/devices/*/devtype
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
CXL device objects export the devtype attribute which mirrors
|
(RO) CXL device objects export the devtype attribute which
|
||||||
the same value communicated in the DEVTYPE environment variable
|
mirrors the same value communicated in the DEVTYPE environment
|
||||||
for uevents for devices on the "cxl" bus.
|
variable for uevents for devices on the "cxl" bus.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/*/modalias
|
What: /sys/bus/cxl/devices/*/modalias
|
||||||
Date: December, 2021
|
Date: December, 2021
|
||||||
KernelVersion: v5.18
|
KernelVersion: v5.18
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
CXL device objects export the modalias attribute which mirrors
|
(RO) CXL device objects export the modalias attribute which
|
||||||
the same value communicated in the MODALIAS environment variable
|
mirrors the same value communicated in the MODALIAS environment
|
||||||
for uevents for devices on the "cxl" bus.
|
variable for uevents for devices on the "cxl" bus.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/portX/uport
|
What: /sys/bus/cxl/devices/portX/uport
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
CXL port objects are enumerated from either a platform firmware
|
(RO) CXL port objects are enumerated from either a platform
|
||||||
device (ACPI0017 and ACPI0016) or PCIe switch upstream port with
|
firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream
|
||||||
CXL component registers. The 'uport' symlink connects the CXL
|
port with CXL component registers. The 'uport' symlink connects
|
||||||
portX object to the device that published the CXL port
|
the CXL portX object to the device that published the CXL port
|
||||||
capability.
|
capability.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/portX/dportY
|
What: /sys/bus/cxl/devices/portX/dportY
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
CXL port objects are enumerated from either a platform firmware
|
(RO) CXL port objects are enumerated from either a platform
|
||||||
device (ACPI0017 and ACPI0016) or PCIe switch upstream port with
|
firmware device (ACPI0017 and ACPI0016) or PCIe switch upstream
|
||||||
CXL component registers. The 'dportY' symlink identifies one or
|
port with CXL component registers. The 'dportY' symlink
|
||||||
more downstream ports that the upstream port may target in its
|
identifies one or more downstream ports that the upstream port
|
||||||
decode of CXL memory resources. The 'Y' integer reflects the
|
may target in its decode of CXL memory resources. The 'Y'
|
||||||
hardware port unique-id used in the hardware decoder target
|
integer reflects the hardware port unique-id used in the
|
||||||
list.
|
hardware decoder target list.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/decoderX.Y
|
What: /sys/bus/cxl/devices/decoderX.Y
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
CXL decoder objects are enumerated from either a platform
|
(RO) CXL decoder objects are enumerated from either a platform
|
||||||
firmware description, or a CXL HDM decoder register set in a
|
firmware description, or a CXL HDM decoder register set in a
|
||||||
PCIe device (see CXL 2.0 section 8.2.5.12 CXL HDM Decoder
|
PCIe device (see CXL 2.0 section 8.2.5.12 CXL HDM Decoder
|
||||||
Capability Structure). The 'X' in decoderX.Y represents the
|
Capability Structure). The 'X' in decoderX.Y represents the
|
||||||
cxl_port container of this decoder, and 'Y' represents the
|
cxl_port container of this decoder, and 'Y' represents the
|
||||||
instance id of a given decoder resource.
|
instance id of a given decoder resource.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/decoderX.Y/{start,size}
|
What: /sys/bus/cxl/devices/decoderX.Y/{start,size}
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
The 'start' and 'size' attributes together convey the physical
|
(RO) The 'start' and 'size' attributes together convey the
|
||||||
address base and number of bytes mapped in the decoder's decode
|
physical address base and number of bytes mapped in the
|
||||||
window. For decoders of devtype "cxl_decoder_root" the address
|
decoder's decode window. For decoders of devtype
|
||||||
range is fixed. For decoders of devtype "cxl_decoder_switch" the
|
"cxl_decoder_root" the address range is fixed. For decoders of
|
||||||
address is bounded by the decode range of the cxl_port ancestor
|
devtype "cxl_decoder_switch" the address is bounded by the
|
||||||
of the decoder's cxl_port, and dynamically updates based on the
|
decode range of the cxl_port ancestor of the decoder's cxl_port,
|
||||||
active memory regions in that address space.
|
and dynamically updates based on the active memory regions in
|
||||||
|
that address space.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/decoderX.Y/locked
|
What: /sys/bus/cxl/devices/decoderX.Y/locked
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
CXL HDM decoders have the capability to lock the configuration
|
(RO) CXL HDM decoders have the capability to lock the
|
||||||
until the next device reset. For decoders of devtype
|
configuration until the next device reset. For decoders of
|
||||||
"cxl_decoder_root" there is no standard facility to unlock them.
|
devtype "cxl_decoder_root" there is no standard facility to
|
||||||
For decoders of devtype "cxl_decoder_switch" a secondary bus
|
unlock them. For decoders of devtype "cxl_decoder_switch" a
|
||||||
reset, of the PCIe bridge that provides the bus for this
|
secondary bus reset, of the PCIe bridge that provides the bus
|
||||||
decoders uport, unlocks / resets the decoder.
|
for this decoders uport, unlocks / resets the decoder.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/decoderX.Y/target_list
|
What: /sys/bus/cxl/devices/decoderX.Y/target_list
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
Display a comma separated list of the current decoder target
|
(RO) Display a comma separated list of the current decoder
|
||||||
configuration. The list is ordered by the current configured
|
target configuration. The list is ordered by the current
|
||||||
interleave order of the decoder's dport instances. Each entry in
|
configured interleave order of the decoder's dport instances.
|
||||||
the list is a dport id.
|
Each entry in the list is a dport id.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/decoderX.Y/cap_{pmem,ram,type2,type3}
|
What: /sys/bus/cxl/devices/decoderX.Y/cap_{pmem,ram,type2,type3}
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
When a CXL decoder is of devtype "cxl_decoder_root", it
|
(RO) When a CXL decoder is of devtype "cxl_decoder_root", it
|
||||||
represents a fixed memory window identified by platform
|
represents a fixed memory window identified by platform
|
||||||
firmware. A fixed window may only support a subset of memory
|
firmware. A fixed window may only support a subset of memory
|
||||||
types. The 'cap_*' attributes indicate whether persistent
|
types. The 'cap_*' attributes indicate whether persistent
|
||||||
memory, volatile memory, accelerator memory, and / or expander
|
memory, volatile memory, accelerator memory, and / or expander
|
||||||
memory may be mapped behind this decoder's memory window.
|
memory may be mapped behind this decoder's memory window.
|
||||||
|
|
||||||
|
|
||||||
What: /sys/bus/cxl/devices/decoderX.Y/target_type
|
What: /sys/bus/cxl/devices/decoderX.Y/target_type
|
||||||
Date: June, 2021
|
Date: June, 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: linux-cxl@vger.kernel.org
|
Contact: linux-cxl@vger.kernel.org
|
||||||
Description:
|
Description:
|
||||||
When a CXL decoder is of devtype "cxl_decoder_switch", it can
|
(RO) When a CXL decoder is of devtype "cxl_decoder_switch", it
|
||||||
optionally decode either accelerator memory (type-2) or expander
|
can optionally decode either accelerator memory (type-2) or
|
||||||
memory (type-3). The 'target_type' attribute indicates the
|
expander memory (type-3). The 'target_type' attribute indicates
|
||||||
current setting which may dynamically change based on what
|
the current setting which may dynamically change based on what
|
||||||
memory regions are activated in this decode hierarchy.
|
memory regions are activated in this decode hierarchy.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/endpointX/CDAT
|
||||||
|
Date: July, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RO) If this sysfs entry is not present no DOE mailbox was
|
||||||
|
found to support CDAT data. If it is present and the length of
|
||||||
|
the data is 0 reading the CDAT data failed. Otherwise the CDAT
|
||||||
|
data is reported.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/decoderX.Y/mode
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
|
||||||
|
translates from a host physical address range, to a device local
|
||||||
|
address range. Device-local address ranges are further split
|
||||||
|
into a 'ram' (volatile memory) range and 'pmem' (persistent
|
||||||
|
memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
|
||||||
|
'mixed', or 'none'. The 'mixed' indication is for error cases
|
||||||
|
when a decoder straddles the volatile/persistent partition
|
||||||
|
boundary, and 'none' indicates the decoder is not actively
|
||||||
|
decoding, or no DPA allocation policy has been set.
|
||||||
|
|
||||||
|
'mode' can be written, when the decoder is in the 'disabled'
|
||||||
|
state, with either 'ram' or 'pmem' to set the boundaries for the
|
||||||
|
next allocation.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/decoderX.Y/dpa_resource
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RO) When a CXL decoder is of devtype "cxl_decoder_endpoint",
|
||||||
|
and its 'dpa_size' attribute is non-zero, this attribute
|
||||||
|
indicates the device physical address (DPA) base address of the
|
||||||
|
allocation.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/decoderX.Y/dpa_size
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
|
||||||
|
translates from a host physical address range, to a device local
|
||||||
|
address range. The range, base address plus length in bytes, of
|
||||||
|
DPA allocated to this decoder is conveyed in these 2 attributes.
|
||||||
|
Allocations can be mutated as long as the decoder is in the
|
||||||
|
disabled state. A write to 'dpa_size' releases the previous DPA
|
||||||
|
allocation and then attempts to allocate from the free capacity
|
||||||
|
in the device partition referred to by 'decoderX.Y/mode'.
|
||||||
|
Allocate and free requests can only be performed on the highest
|
||||||
|
instance number disabled decoder with non-zero size. I.e.
|
||||||
|
allocations are enforced to occur in increasing 'decoderX.Y/id'
|
||||||
|
order and frees are enforced to occur in decreasing
|
||||||
|
'decoderX.Y/id' order.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/decoderX.Y/interleave_ways
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RO) The number of targets across which this decoder's host
|
||||||
|
physical address (HPA) memory range is interleaved. The device
|
||||||
|
maps every Nth block of HPA (of size ==
|
||||||
|
'interleave_granularity') to consecutive DPA addresses. The
|
||||||
|
decoder's position in the interleave is determined by the
|
||||||
|
device's (endpoint or switch) switch ancestry. For root
|
||||||
|
decoders their interleave is specified by platform firmware and
|
||||||
|
they only specify a downstream target order for host bridges.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/decoderX.Y/interleave_granularity
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RO) The number of consecutive bytes of host physical address
|
||||||
|
space this decoder claims at address N before the decode rotates
|
||||||
|
to the next target in the interleave at address N +
|
||||||
|
interleave_granularity (assuming N is aligned to
|
||||||
|
interleave_granularity).
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/decoderX.Y/create_pmem_region
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) Write a string in the form 'regionZ' to start the process
|
||||||
|
of defining a new persistent memory region (interleave-set)
|
||||||
|
within the decode range bounded by root decoder 'decoderX.Y'.
|
||||||
|
The value written must match the current value returned from
|
||||||
|
reading this attribute. An atomic compare exchange operation is
|
||||||
|
done on write to assign the requested id to a region and
|
||||||
|
allocate the region-id for the next creation attempt. EBUSY is
|
||||||
|
returned if the region name written does not match the current
|
||||||
|
cached value.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/decoderX.Y/delete_region
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(WO) Write a string in the form 'regionZ' to delete that region,
|
||||||
|
provided it is currently idle / not bound to a driver.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/regionZ/uuid
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) Write a unique identifier for the region. This field must
|
||||||
|
be set for persistent regions and it must not conflict with the
|
||||||
|
UUID of another region.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/regionZ/interleave_granularity
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) Set the number of consecutive bytes each device in the
|
||||||
|
interleave set will claim. The possible interleave granularity
|
||||||
|
values are determined by the CXL spec and the participating
|
||||||
|
devices.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/regionZ/interleave_ways
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) Configures the number of devices participating in the
|
||||||
|
region is set by writing this value. Each device will provide
|
||||||
|
1/interleave_ways of storage for the region.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/regionZ/size
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) System physical address space to be consumed by the region.
|
||||||
|
When written trigger the driver to allocate space out of the
|
||||||
|
parent root decoder's address space. When read the size of the
|
||||||
|
address space is reported and should match the span of the
|
||||||
|
region's resource attribute. Size shall be set after the
|
||||||
|
interleave configuration parameters. Once set it cannot be
|
||||||
|
changed, only freed by writing 0. The kernel makes no guarantees
|
||||||
|
that data is maintained over an address space freeing event, and
|
||||||
|
there is no guarantee that a free followed by an allocate
|
||||||
|
results in the same address being allocated.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/regionZ/resource
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RO) A region is a contiguous partition of a CXL root decoder
|
||||||
|
address space. Region capacity is allocated by writing to the
|
||||||
|
size attribute, the resulting physical address space determined
|
||||||
|
by the driver is reflected here. It is therefore not useful to
|
||||||
|
read this before writing a value to the size attribute.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/regionZ/target[0..N]
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) Write an endpoint decoder object name to 'targetX' where X
|
||||||
|
is the intended position of the endpoint device in the region
|
||||||
|
interleave and N is the 'interleave_ways' setting for the
|
||||||
|
region. ENXIO is returned if the write results in an impossible
|
||||||
|
to map decode scenario, like the endpoint is unreachable at that
|
||||||
|
position relative to the root decoder interleave. EBUSY is
|
||||||
|
returned if the position in the region is already occupied, or
|
||||||
|
if the region is not in a state to accept interleave
|
||||||
|
configuration changes. EINVAL is returned if the object name is
|
||||||
|
not an endpoint decoder. Once all positions have been
|
||||||
|
successfully written a final validation for decode conflicts is
|
||||||
|
performed before activating the region.
|
||||||
|
|
||||||
|
|
||||||
|
What: /sys/bus/cxl/devices/regionZ/commit
|
||||||
|
Date: May, 2022
|
||||||
|
KernelVersion: v5.20
|
||||||
|
Contact: linux-cxl@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) Write a boolean 'true' string value to this attribute to
|
||||||
|
trigger the region to transition from the software programmed
|
||||||
|
state to the actively decoding in hardware state. The commit
|
||||||
|
operation in addition to validating that the region is in proper
|
||||||
|
configured state, validates that the decoders are being
|
||||||
|
committed in spec mandated order (last committed decoder id +
|
||||||
|
1), and checks that the hardware accepts the commit request.
|
||||||
|
Reading this value indicates whether the region is committed or
|
||||||
|
not.
|
||||||
|
|||||||
@@ -0,0 +1,18 @@
|
|||||||
|
What: /sys/bus/event_source/devices/<dev>/caps
|
||||||
|
Date: May 2022
|
||||||
|
KernelVersion: 5.19
|
||||||
|
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||||
|
Description:
|
||||||
|
Attribute group to describe the capabilities exposed
|
||||||
|
for a particular pmu. Each attribute of this group can
|
||||||
|
expose information specific to a PMU, say pmu_name, so that
|
||||||
|
userspace can understand some of the feature which the
|
||||||
|
platform specific PMU supports.
|
||||||
|
|
||||||
|
One of the example available capability in supported platform
|
||||||
|
like Intel is pmu_name, which exposes underlying CPU name known
|
||||||
|
to the PMU driver.
|
||||||
|
|
||||||
|
Example output in powerpc:
|
||||||
|
grep . /sys/bus/event_source/devices/cpu/caps/*
|
||||||
|
/sys/bus/event_source/devices/cpu/caps/pmu_name:POWER9
|
||||||
@@ -79,6 +79,11 @@ Description:
|
|||||||
* "accel-base"
|
* "accel-base"
|
||||||
* "accel-display"
|
* "accel-display"
|
||||||
|
|
||||||
|
For devices where an accelerometer is housed in the swivel camera subassembly
|
||||||
|
(for AR application), the following standardized label is used:
|
||||||
|
|
||||||
|
* "accel-camera"
|
||||||
|
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/current_timestamp_clock
|
What: /sys/bus/iio/devices/iio:deviceX/current_timestamp_clock
|
||||||
KernelVersion: 4.5
|
KernelVersion: 4.5
|
||||||
Contact: linux-iio@vger.kernel.org
|
Contact: linux-iio@vger.kernel.org
|
||||||
@@ -102,6 +107,9 @@ Description:
|
|||||||
relevant directories. If it affects all of the above
|
relevant directories. If it affects all of the above
|
||||||
then it is to be found in the base device directory.
|
then it is to be found in the base device directory.
|
||||||
|
|
||||||
|
The stm32-timer-trigger has the additional characteristic that
|
||||||
|
a sampling_frequency of 0 is defined to stop sampling.
|
||||||
|
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency_available
|
What: /sys/bus/iio/devices/iio:deviceX/sampling_frequency_available
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_sampling_frequency_available
|
What: /sys/bus/iio/devices/iio:deviceX/in_intensity_sampling_frequency_available
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_sampling_frequency_available
|
What: /sys/bus/iio/devices/iio:deviceX/in_proximity_sampling_frequency_available
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ Contact: Gwendal Grignou <gwendal@chromium.org>
|
|||||||
Description:
|
Description:
|
||||||
SX9324 has 3 inputs, CS0, CS1 and CS2. Hardware layout
|
SX9324 has 3 inputs, CS0, CS1 and CS2. Hardware layout
|
||||||
defines if the input is
|
defines if the input is
|
||||||
|
|
||||||
+ not connected (HZ),
|
+ not connected (HZ),
|
||||||
+ grounded (GD),
|
+ grounded (GD),
|
||||||
+ connected to an antenna where it can act as a base
|
+ connected to an antenna where it can act as a base
|
||||||
|
|||||||
@@ -1,31 +0,0 @@
|
|||||||
What: /sys/bus/iio/devices/iio:deviceX/fault_oc
|
|
||||||
KernelVersion: 5.1
|
|
||||||
Contact: linux-iio@vger.kernel.org
|
|
||||||
Description:
|
|
||||||
Open-circuit fault. The detection of open-circuit faults,
|
|
||||||
such as those caused by broken thermocouple wires.
|
|
||||||
Reading returns either '1' or '0'.
|
|
||||||
|
|
||||||
=== =======================================================
|
|
||||||
'1' An open circuit such as broken thermocouple wires
|
|
||||||
has been detected.
|
|
||||||
'0' No open circuit or broken thermocouple wires are detected
|
|
||||||
=== =======================================================
|
|
||||||
|
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/fault_ovuv
|
|
||||||
KernelVersion: 5.1
|
|
||||||
Contact: linux-iio@vger.kernel.org
|
|
||||||
Description:
|
|
||||||
Overvoltage or Undervoltage Input Fault. The internal circuitry
|
|
||||||
is protected from excessive voltages applied to the thermocouple
|
|
||||||
cables by integrated MOSFETs at the T+ and T- inputs, and the
|
|
||||||
BIAS output. These MOSFETs turn off when the input voltage is
|
|
||||||
negative or greater than VDD.
|
|
||||||
|
|
||||||
Reading returns either '1' or '0'.
|
|
||||||
|
|
||||||
=== =======================================================
|
|
||||||
'1' The input voltage is negative or greater than VDD.
|
|
||||||
'0' The input voltage is positive and less than VDD (normal
|
|
||||||
state).
|
|
||||||
=== =======================================================
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
What: /sys/bus/iio/devices/iio:deviceX/fault_ovuv
|
|
||||||
KernelVersion: 5.11
|
|
||||||
Contact: linux-iio@vger.kernel.org
|
|
||||||
Description:
|
|
||||||
Overvoltage or Undervoltage Input fault. The internal circuitry
|
|
||||||
is protected from excessive voltages applied to the thermocouple
|
|
||||||
cables at FORCE+, FORCE2, RTDIN+ & RTDIN-. This circuitry turn
|
|
||||||
off when the input voltage is negative or greater than VDD.
|
|
||||||
|
|
||||||
Reading returns '1' if input voltage is negative or greater
|
|
||||||
than VDD, otherwise '0'.
|
|
||||||
|
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_filter_notch_center_frequency
|
|
||||||
KernelVersion: 5.11
|
|
||||||
Contact: linux-iio@vger.kernel.org
|
|
||||||
Description:
|
|
||||||
Notch frequency in Hz for a noise rejection filter. Used i.e for
|
|
||||||
line noise rejection.
|
|
||||||
|
|
||||||
Valid notch filter values are 50 Hz and 60 Hz.
|
|
||||||
18
Documentation/ABI/testing/sysfs-bus-iio-thermocouple
Normal file
18
Documentation/ABI/testing/sysfs-bus-iio-thermocouple
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
What: /sys/bus/iio/devices/iio:deviceX/fault_ovuv
|
||||||
|
KernelVersion: 5.1
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Overvoltage or Undervoltage Input Fault. The internal circuitry
|
||||||
|
is protected from excessive voltages applied to the thermocouple
|
||||||
|
cables. The device can also detect if such a condition occurs.
|
||||||
|
|
||||||
|
Reading returns '1' if input voltage is negative or greater
|
||||||
|
than VDD, otherwise '0'.
|
||||||
|
|
||||||
|
What: /sys/bus/iio/devices/iio:deviceX/fault_oc
|
||||||
|
KernelVersion: 5.1
|
||||||
|
Contact: linux-iio@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
Open-circuit fault. The detection of open-circuit faults,
|
||||||
|
such as those caused by broken thermocouple wires.
|
||||||
|
Reading returns '1' if fault, '0' otherwise.
|
||||||
@@ -90,14 +90,6 @@ Description:
|
|||||||
Reading returns the current master modes.
|
Reading returns the current master modes.
|
||||||
Writing set the master mode
|
Writing set the master mode
|
||||||
|
|
||||||
What: /sys/bus/iio/devices/triggerX/sampling_frequency
|
|
||||||
KernelVersion: 4.11
|
|
||||||
Contact: benjamin.gaignard@st.com
|
|
||||||
Description:
|
|
||||||
Reading returns the current sampling frequency.
|
|
||||||
Writing an value different of 0 set and start sampling.
|
|
||||||
Writing 0 stop sampling.
|
|
||||||
|
|
||||||
What: /sys/bus/iio/devices/iio:deviceX/in_count0_preset
|
What: /sys/bus/iio/devices/iio:deviceX/in_count0_preset
|
||||||
KernelVersion: 4.12
|
KernelVersion: 4.12
|
||||||
Contact: benjamin.gaignard@st.com
|
Contact: benjamin.gaignard@st.com
|
||||||
|
|||||||
@@ -0,0 +1,8 @@
|
|||||||
|
What: /sys/bus/platform/devices/<dev>/always_powered_in_suspend
|
||||||
|
Date: June 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Matthias Kaehlcke <matthias@kaehlcke.net>
|
||||||
|
linux-usb@vger.kernel.org
|
||||||
|
Description:
|
||||||
|
(RW) Controls whether the USB hub remains always powered
|
||||||
|
during system suspend or not.
|
||||||
@@ -0,0 +1,57 @@
|
|||||||
|
What: /sys/bus/surface_aggregator/devices/01:0e:01:00:01/state
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Maximilian Luz <luzmaximilian@gmail.com>
|
||||||
|
Description:
|
||||||
|
This attribute returns a string with the current type-cover
|
||||||
|
or device posture, as indicated by the embedded controller.
|
||||||
|
Currently returned posture states are:
|
||||||
|
|
||||||
|
- "disconnected": The type-cover has been disconnected.
|
||||||
|
|
||||||
|
- "closed": The type-cover has been folded closed and lies on
|
||||||
|
top of the display.
|
||||||
|
|
||||||
|
- "laptop": The type-cover is open and in laptop-mode, i.e.,
|
||||||
|
ready for normal use.
|
||||||
|
|
||||||
|
- "folded-canvas": The type-cover has been folded back
|
||||||
|
part-ways, but does not lie flush with the back side of the
|
||||||
|
device. In general, this means that the kick-stand is used
|
||||||
|
and extended atop of the cover.
|
||||||
|
|
||||||
|
- "folded-back": The type cover has been fully folded back and
|
||||||
|
lies flush with the back side of the device.
|
||||||
|
|
||||||
|
- "<unknown>": The current state is unknown to the driver, for
|
||||||
|
example due to newer as-of-yet unsupported hardware.
|
||||||
|
|
||||||
|
New states may be introduced with new hardware. Users therefore
|
||||||
|
must not rely on this list of states being exhaustive and
|
||||||
|
gracefully handle unknown states.
|
||||||
|
|
||||||
|
What: /sys/bus/surface_aggregator/devices/01:26:01:00:01/state
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Maximilian Luz <luzmaximilian@gmail.com>
|
||||||
|
Description:
|
||||||
|
This attribute returns a string with the current device posture, as indicated by the embedded controller. Currently
|
||||||
|
returned posture states are:
|
||||||
|
|
||||||
|
- "closed": The lid of the device is closed.
|
||||||
|
|
||||||
|
- "laptop": The lid of the device is opened and the device
|
||||||
|
operates as a normal laptop.
|
||||||
|
|
||||||
|
- "slate": The screen covers the keyboard or has been flipped
|
||||||
|
back and the device operates mainly based on touch input.
|
||||||
|
|
||||||
|
- "tablet": The device operates as tablet and exclusively
|
||||||
|
relies on touch input (or external peripherals).
|
||||||
|
|
||||||
|
- "<unknown>": The current state is unknown to the driver, for
|
||||||
|
example due to newer as-of-yet unsupported hardware.
|
||||||
|
|
||||||
|
New states may be introduced with new hardware. Users therefore
|
||||||
|
must not rely on this list of states being exhaustive and
|
||||||
|
gracefully handle unknown states.
|
||||||
@@ -253,6 +253,17 @@ Description:
|
|||||||
only if the system firmware is capable of describing the
|
only if the system firmware is capable of describing the
|
||||||
connection between a port and its connector.
|
connection between a port and its connector.
|
||||||
|
|
||||||
|
What: /sys/bus/usb/devices/.../<hub_interface>/port<X>/disable
|
||||||
|
Date: June 2022
|
||||||
|
Contact: Michael Grzeschik <m.grzeschik@pengutronix.de>
|
||||||
|
Description:
|
||||||
|
This file controls the state of a USB port, including
|
||||||
|
Vbus power output (but only on hubs that support
|
||||||
|
power switching -- most hubs don't support it). If
|
||||||
|
a port is disabled, the port is unusable: Devices
|
||||||
|
attached to the port will not be detected, initialized,
|
||||||
|
or enumerated.
|
||||||
|
|
||||||
What: /sys/bus/usb/devices/.../power/usb2_lpm_l1_timeout
|
What: /sys/bus/usb/devices/.../power/usb2_lpm_l1_timeout
|
||||||
Date: May 2013
|
Date: May 2013
|
||||||
Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
|
Contact: Mathias Nyman <mathias.nyman@linux.intel.com>
|
||||||
|
|||||||
@@ -938,3 +938,12 @@ Description:
|
|||||||
- 1: enable
|
- 1: enable
|
||||||
|
|
||||||
RW
|
RW
|
||||||
|
|
||||||
|
What: /sys/class/hwmon/hwmonX/device/pec
|
||||||
|
Description:
|
||||||
|
PEC support on I2C devices
|
||||||
|
|
||||||
|
- 0, off, n: disable
|
||||||
|
- 1, on, y: enable
|
||||||
|
|
||||||
|
RW
|
||||||
|
|||||||
@@ -81,7 +81,7 @@ Description:
|
|||||||
What: /sys/class/pwm/pwmchip<N>/pwmX/capture
|
What: /sys/class/pwm/pwmchip<N>/pwmX/capture
|
||||||
Date: June 2016
|
Date: June 2016
|
||||||
KernelVersion: 4.8
|
KernelVersion: 4.8
|
||||||
Contact: Lee Jones <lee.jones@linaro.org>
|
Contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
Capture information about a PWM signal. The output format is a
|
Capture information about a PWM signal. The output format is a
|
||||||
pair unsigned integers (period and duty cycle), separated by a
|
pair unsigned integers (period and duty cycle), separated by a
|
||||||
|
|||||||
@@ -78,7 +78,7 @@ What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_name
|
|||||||
Date: Feb 2020
|
Date: Feb 2020
|
||||||
KernelVersion: 5.7
|
KernelVersion: 5.7
|
||||||
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
|
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
|
||||||
Description: RO, Contains the the name of HCA the connection established on.
|
Description: RO, Contains the name of HCA the connection established on.
|
||||||
|
|
||||||
What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_port
|
What: /sys/class/rtrs-client/<session-name>/paths/<src@dst>/hca_port
|
||||||
Date: Feb 2020
|
Date: Feb 2020
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_name
|
|||||||
Date: Feb 2020
|
Date: Feb 2020
|
||||||
KernelVersion: 5.7
|
KernelVersion: 5.7
|
||||||
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
|
Contact: Jack Wang <jinpu.wang@cloud.ionos.com> Danil Kipnis <danil.kipnis@cloud.ionos.com>
|
||||||
Description: RO, Contains the the name of HCA the connection established on.
|
Description: RO, Contains the name of HCA the connection established on.
|
||||||
|
|
||||||
What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_port
|
What: /sys/class/rtrs-server/<session-name>/paths/<src@dst>/hca_port
|
||||||
Date: Feb 2020
|
Date: Feb 2020
|
||||||
|
|||||||
@@ -141,6 +141,14 @@ Description:
|
|||||||
- "reverse": CC2 orientation
|
- "reverse": CC2 orientation
|
||||||
- "unknown": Orientation cannot be determined.
|
- "unknown": Orientation cannot be determined.
|
||||||
|
|
||||||
|
What: /sys/class/typec/<port>/select_usb_power_delivery
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Lists the USB Power Delivery Capabilities that the port can
|
||||||
|
advertise to the partner. The currently used capabilities are in
|
||||||
|
brackets. Selection happens by writing to the file.
|
||||||
|
|
||||||
USB Type-C partner devices (eg. /sys/class/typec/port0-partner/)
|
USB Type-C partner devices (eg. /sys/class/typec/port0-partner/)
|
||||||
|
|
||||||
What: /sys/class/typec/<port>-partner/accessory_mode
|
What: /sys/class/typec/<port>-partner/accessory_mode
|
||||||
|
|||||||
240
Documentation/ABI/testing/sysfs-class-usb_power_delivery
Normal file
240
Documentation/ABI/testing/sysfs-class-usb_power_delivery
Normal file
@@ -0,0 +1,240 @@
|
|||||||
|
What: /sys/class/usb_power_delivery
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Directory for USB Power Delivery devices.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../revision
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
File showing the USB Power Delivery Specification Revision used
|
||||||
|
in communication.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../version
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This is an optional attribute file showing the version of the
|
||||||
|
specific revision of the USB Power Delivery Specification. In
|
||||||
|
most cases the specification version is not known and the file
|
||||||
|
is not available.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../source-capabilities
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
The source capabilities message "Source_Capabilities" contains a
|
||||||
|
set of Power Data Objects (PDO), each representing a type of
|
||||||
|
power supply. The order of the PDO objects is defined in the USB
|
||||||
|
Power Delivery Specification. Each PDO - power supply - will
|
||||||
|
have its own device, and the PDO device name will start with the
|
||||||
|
object position number as the first character followed by the
|
||||||
|
power supply type name (":" as delimiter).
|
||||||
|
|
||||||
|
/sys/class/usb_power_delivery/.../source_capabilities/<position>:<type>
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../sink-capabilities
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
The sink capability message "Sink_Capabilities" contains a set
|
||||||
|
of Power Data Objects (PDO) just like with source capabilities,
|
||||||
|
but instead of describing the power capabilities, these objects
|
||||||
|
describe the power requirements.
|
||||||
|
|
||||||
|
The order of the objects in the sink capability message is the
|
||||||
|
same as with the source capabilities message.
|
||||||
|
|
||||||
|
Fixed Supplies
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:fixed_supply
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Devices containing the attributes (the bit fields) defined for
|
||||||
|
Fixed Supplies.
|
||||||
|
|
||||||
|
The device "1:fixed_supply" is special. USB Power Delivery
|
||||||
|
Specification dictates that the first PDO (at object position
|
||||||
|
1), and the only mandatory PDO, is always the vSafe5V Fixed
|
||||||
|
Supply Object. vSafe5V Object has additional fields defined for
|
||||||
|
it that the other Fixed Supply Objects do not have and that are
|
||||||
|
related to the USB capabilities rather than power capabilities.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/dual_role_power
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This file contains boolean value that tells does the device
|
||||||
|
support both source and sink power roles.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/usb_suspend_supported
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This file shows the value of the USB Suspend Supported bit in
|
||||||
|
vSafe5V Fixed Supply Object. If the bit is set then the device
|
||||||
|
will follow the USB 2.0 and USB 3.2 rules for suspend and
|
||||||
|
resume.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/unconstrained_power
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This file shows the value of the Unconstrained Power bit in
|
||||||
|
vSafe5V Fixed Supply Object. The bit is set when an external
|
||||||
|
source of power, powerful enough to power the entire system on
|
||||||
|
its own, is available for the device.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/usb_communication_capable
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This file shows the value of the USB Communication Capable bit in
|
||||||
|
vSafe5V Fixed Supply Object.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/dual_role_data
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This file shows the value of the Dual-Role Data bit in vSafe5V
|
||||||
|
Fixed Supply Object. Dual role data means ability act as both
|
||||||
|
USB host and USB device.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/1:fixed_supply/unchunked_extended_messages_supported
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This file shows the value of the Unchunked Extended Messages
|
||||||
|
Supported bit in vSafe5V Fixed Supply Object.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:fixed_supply/voltage
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
The voltage the supply supports in millivolts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:fixed_supply/maximum_current
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Maximum current of the fixed source supply in milliamperes.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:fixed_supply/operational_current
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Operational current of the sink in milliamperes.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:fixed_supply/fast_role_swap_current
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
This file contains the value of the "Fast Role Swap USB Type-C
|
||||||
|
Current" field that tells the current level the sink requires
|
||||||
|
after a Fast Role Swap.
|
||||||
|
0 - Fast Swap not supported"
|
||||||
|
1 - Default USB Power"
|
||||||
|
2 - 1.5A@5V"
|
||||||
|
3 - 3.0A@5V"
|
||||||
|
|
||||||
|
Variable Supplies
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:variable_supply
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Variable Power Supply PDO.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:variable_supply/maximum_voltage
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Maximum Voltage in millivolts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:variable_supply/minimum_voltage
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Minimum Voltage in millivolts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:variable_supply/maximum_current
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
The maximum current in milliamperes that the source can supply
|
||||||
|
at the given Voltage range.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:variable_supply/operational_current
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
The operational current in milliamperes that the sink requires
|
||||||
|
at the given Voltage range.
|
||||||
|
|
||||||
|
Battery Supplies
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:battery
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Battery PDO.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:battery/maximum_voltage
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Maximum Voltage in millivolts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:battery/minimum_voltage
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Minimum Voltage in millivolts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:battery/maximum_power
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Maximum allowable Power in milliwatts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../sink-capabilities/<position>:battery/operational_power
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
The operational power that the sink requires at the given
|
||||||
|
voltage range.
|
||||||
|
|
||||||
|
Standard Power Range (SPR) Programmable Power Supplies
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Programmable Power Supply (PPS) Augmented PDO (APDO).
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply/maximum_voltage
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Maximum Voltage in millivolts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply/minimum_voltage
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Minimum Voltage in millivolts.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../<capability>/<position>:programmable_supply/maximum_current
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
Maximum Current in milliamperes.
|
||||||
|
|
||||||
|
What: /sys/class/usb_power_delivery/.../source-capabilities/<position>:programmable_supply/pps_power_limited
|
||||||
|
Date: May 2022
|
||||||
|
Contact: Heikki Krogerus <heikki.krogerus@linux.intel.com>
|
||||||
|
Description:
|
||||||
|
The PPS Power Limited bit indicates whether or not the source
|
||||||
|
supply will exceed the rated output power if requested.
|
||||||
33
Documentation/ABI/testing/sysfs-class-vduse
Normal file
33
Documentation/ABI/testing/sysfs-class-vduse
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
What: /sys/class/vduse/
|
||||||
|
Date: Oct 2021
|
||||||
|
KernelVersion: 5.15
|
||||||
|
Contact: Yongji Xie <xieyongji@bytedance.com>
|
||||||
|
Description:
|
||||||
|
The vduse/ class sub-directory belongs to the VDUSE
|
||||||
|
framework and provides a sysfs interface for configuring
|
||||||
|
VDUSE devices.
|
||||||
|
|
||||||
|
What: /sys/class/vduse/control/
|
||||||
|
Date: Oct 2021
|
||||||
|
KernelVersion: 5.15
|
||||||
|
Contact: Yongji Xie <xieyongji@bytedance.com>
|
||||||
|
Description:
|
||||||
|
This directory entry is created for the control device
|
||||||
|
of VDUSE framework.
|
||||||
|
|
||||||
|
What: /sys/class/vduse/<device-name>/
|
||||||
|
Date: Oct 2021
|
||||||
|
KernelVersion: 5.15
|
||||||
|
Contact: Yongji Xie <xieyongji@bytedance.com>
|
||||||
|
Description:
|
||||||
|
This directory entry is created when a VDUSE device is
|
||||||
|
created via the control device.
|
||||||
|
|
||||||
|
What: /sys/class/vduse/<device-name>/msg_timeout
|
||||||
|
Date: Oct 2021
|
||||||
|
KernelVersion: 5.15
|
||||||
|
Contact: Yongji Xie <xieyongji@bytedance.com>
|
||||||
|
Description:
|
||||||
|
(RW) The timeout (in seconds) for waiting for the control
|
||||||
|
message's response from userspace. Default value is 30s.
|
||||||
|
Writing a '0' to the file means to disable the timeout.
|
||||||
@@ -74,7 +74,7 @@ Description:
|
|||||||
|
|
||||||
Reads also cause the AC alarm timer status to be reset.
|
Reads also cause the AC alarm timer status to be reset.
|
||||||
|
|
||||||
Another way to reset the the status of the AC alarm timer is to
|
Another way to reset the status of the AC alarm timer is to
|
||||||
write (the number) 0 to this file.
|
write (the number) 0 to this file.
|
||||||
|
|
||||||
If the status return value indicates that the timer has expired,
|
If the status return value indicates that the timer has expired,
|
||||||
|
|||||||
@@ -46,33 +46,69 @@ Description:
|
|||||||
that is supported by the hardware. The possible values
|
that is supported by the hardware. The possible values
|
||||||
are "MAPv4" or "MAPv5".
|
are "MAPv4" or "MAPv5".
|
||||||
|
|
||||||
|
What: .../XXXXXXX.ipa/endpoint_id/
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: v5.19
|
||||||
|
Contact: Alex Elder <elder@kernel.org>
|
||||||
|
Description:
|
||||||
|
The .../XXXXXXX.ipa/endpoint_id/ directory contains
|
||||||
|
attributes that define IDs associated with IPA
|
||||||
|
endpoints. The "rx" or "tx" in an endpoint name is
|
||||||
|
from the perspective of the AP. An endpoint ID is a
|
||||||
|
small unsigned integer.
|
||||||
|
|
||||||
|
What: .../XXXXXXX.ipa/endpoint_id/modem_rx
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: v5.19
|
||||||
|
Contact: Alex Elder <elder@kernel.org>
|
||||||
|
Description:
|
||||||
|
The .../XXXXXXX.ipa/endpoint_id/modem_rx file contains
|
||||||
|
the ID of the AP endpoint on which packets originating
|
||||||
|
from the embedded modem are received.
|
||||||
|
|
||||||
|
What: .../XXXXXXX.ipa/endpoint_id/modem_tx
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: v5.19
|
||||||
|
Contact: Alex Elder <elder@kernel.org>
|
||||||
|
Description:
|
||||||
|
The .../XXXXXXX.ipa/endpoint_id/modem_tx file contains
|
||||||
|
the ID of the AP endpoint on which packets destined
|
||||||
|
for the embedded modem are sent.
|
||||||
|
|
||||||
|
What: .../XXXXXXX.ipa/endpoint_id/monitor_rx
|
||||||
|
Date: July 2022
|
||||||
|
KernelVersion: v5.19
|
||||||
|
Contact: Alex Elder <elder@kernel.org>
|
||||||
|
Description:
|
||||||
|
The .../XXXXXXX.ipa/endpoint_id/monitor_rx file contains
|
||||||
|
the ID of the AP endpoint on which IPA "monitor" data is
|
||||||
|
received. The monitor endpoint supplies replicas of
|
||||||
|
packets that enter the IPA hardware for processing.
|
||||||
|
Each replicated packet is preceded by a fixed-size "ODL"
|
||||||
|
header (see .../XXXXXXX.ipa/feature/monitor, above).
|
||||||
|
Large packets are truncated, to reduce the bandwidth
|
||||||
|
required to provide the monitor function.
|
||||||
|
|
||||||
What: .../XXXXXXX.ipa/modem/
|
What: .../XXXXXXX.ipa/modem/
|
||||||
Date: June 2021
|
Date: June 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: Alex Elder <elder@kernel.org>
|
Contact: Alex Elder <elder@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
The .../XXXXXXX.ipa/modem/ directory contains a set of
|
The .../XXXXXXX.ipa/modem/ directory contains attributes
|
||||||
attributes describing properties of the modem execution
|
describing properties of the modem embedded in the SoC.
|
||||||
environment reachable by the IPA hardware.
|
|
||||||
|
|
||||||
What: .../XXXXXXX.ipa/modem/rx_endpoint_id
|
What: .../XXXXXXX.ipa/modem/rx_endpoint_id
|
||||||
Date: June 2021
|
Date: June 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: Alex Elder <elder@kernel.org>
|
Contact: Alex Elder <elder@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
The .../XXXXXXX.ipa/feature/rx_endpoint_id file contains
|
The .../XXXXXXX.ipa/modem/rx_endpoint_id file duplicates
|
||||||
the AP endpoint ID that receives packets originating from
|
the value found in .../XXXXXXX.ipa/endpoint_id/modem_rx.
|
||||||
the modem execution environment. The "rx" is from the
|
|
||||||
perspective of the AP; this endpoint is considered an "IPA
|
|
||||||
producer". An endpoint ID is a small unsigned integer.
|
|
||||||
|
|
||||||
What: .../XXXXXXX.ipa/modem/tx_endpoint_id
|
What: .../XXXXXXX.ipa/modem/tx_endpoint_id
|
||||||
Date: June 2021
|
Date: June 2021
|
||||||
KernelVersion: v5.14
|
KernelVersion: v5.14
|
||||||
Contact: Alex Elder <elder@kernel.org>
|
Contact: Alex Elder <elder@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
The .../XXXXXXX.ipa/feature/tx_endpoint_id file contains
|
The .../XXXXXXX.ipa/modem/tx_endpoint_id file duplicates
|
||||||
the AP endpoint ID used to transmit packets destined for
|
the value found in .../XXXXXXX.ipa/endpoint_id/modem_tx.
|
||||||
the modem execution environment. The "tx" is from the
|
|
||||||
perspective of the AP; this endpoint is considered an "IPA
|
|
||||||
consumer". An endpoint ID is a small unsigned integer.
|
|
||||||
|
|||||||
@@ -303,5 +303,5 @@ Date: Apr 2010
|
|||||||
Contact: Dominik Brodowski <linux@dominikbrodowski.net>
|
Contact: Dominik Brodowski <linux@dominikbrodowski.net>
|
||||||
Description:
|
Description:
|
||||||
Reports the runtime PM children usage count of a device, or
|
Reports the runtime PM children usage count of a device, or
|
||||||
0 if the the children will be ignored.
|
0 if the children will be ignored.
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
What: /sys/devices/socX
|
What: /sys/devices/socX
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
contact: Lee Jones <lee.jones@linaro.org>
|
contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
The /sys/devices/ directory contains a sub-directory for each
|
The /sys/devices/ directory contains a sub-directory for each
|
||||||
System-on-Chip (SoC) device on a running platform. Information
|
System-on-Chip (SoC) device on a running platform. Information
|
||||||
@@ -14,14 +14,14 @@ Description:
|
|||||||
|
|
||||||
What: /sys/devices/socX/machine
|
What: /sys/devices/socX/machine
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
contact: Lee Jones <lee.jones@linaro.org>
|
contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
Read-only attribute common to all SoCs. Contains the SoC machine
|
Read-only attribute common to all SoCs. Contains the SoC machine
|
||||||
name (e.g. Ux500).
|
name (e.g. Ux500).
|
||||||
|
|
||||||
What: /sys/devices/socX/family
|
What: /sys/devices/socX/family
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
contact: Lee Jones <lee.jones@linaro.org>
|
contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
Read-only attribute common to all SoCs. Contains SoC family name
|
Read-only attribute common to all SoCs. Contains SoC family name
|
||||||
(e.g. DB8500).
|
(e.g. DB8500).
|
||||||
@@ -59,7 +59,7 @@ Description:
|
|||||||
|
|
||||||
What: /sys/devices/socX/soc_id
|
What: /sys/devices/socX/soc_id
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
contact: Lee Jones <lee.jones@linaro.org>
|
contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
Read-only attribute supported by most SoCs. In the case of
|
Read-only attribute supported by most SoCs. In the case of
|
||||||
ST-Ericsson's chips this contains the SoC serial number.
|
ST-Ericsson's chips this contains the SoC serial number.
|
||||||
@@ -72,21 +72,21 @@ Description:
|
|||||||
|
|
||||||
What: /sys/devices/socX/revision
|
What: /sys/devices/socX/revision
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
contact: Lee Jones <lee.jones@linaro.org>
|
contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
Read-only attribute supported by most SoCs. Contains the SoC's
|
Read-only attribute supported by most SoCs. Contains the SoC's
|
||||||
manufacturing revision number.
|
manufacturing revision number.
|
||||||
|
|
||||||
What: /sys/devices/socX/process
|
What: /sys/devices/socX/process
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
contact: Lee Jones <lee.jones@linaro.org>
|
contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
Read-only attribute supported ST-Ericsson's silicon. Contains the
|
Read-only attribute supported ST-Ericsson's silicon. Contains the
|
||||||
the process by which the silicon chip was manufactured.
|
the process by which the silicon chip was manufactured.
|
||||||
|
|
||||||
What: /sys/bus/soc
|
What: /sys/bus/soc
|
||||||
Date: January 2012
|
Date: January 2012
|
||||||
contact: Lee Jones <lee.jones@linaro.org>
|
contact: Lee Jones <lee@kernel.org>
|
||||||
Description:
|
Description:
|
||||||
The /sys/bus/soc/ directory contains the usual sub-folders
|
The /sys/bus/soc/ directory contains the usual sub-folders
|
||||||
expected under most buses. /sys/bus/soc/devices is of particular
|
expected under most buses. /sys/bus/soc/devices is of particular
|
||||||
|
|||||||
@@ -67,8 +67,7 @@ Description: Discover NUMA node a CPU belongs to
|
|||||||
/sys/devices/system/cpu/cpu42/node2 -> ../../node/node2
|
/sys/devices/system/cpu/cpu42/node2 -> ../../node/node2
|
||||||
|
|
||||||
|
|
||||||
What: /sys/devices/system/cpu/cpuX/topology/core_id
|
What: /sys/devices/system/cpu/cpuX/topology/core_siblings
|
||||||
/sys/devices/system/cpu/cpuX/topology/core_siblings
|
|
||||||
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
|
/sys/devices/system/cpu/cpuX/topology/core_siblings_list
|
||||||
/sys/devices/system/cpu/cpuX/topology/physical_package_id
|
/sys/devices/system/cpu/cpuX/topology/physical_package_id
|
||||||
/sys/devices/system/cpu/cpuX/topology/thread_siblings
|
/sys/devices/system/cpu/cpuX/topology/thread_siblings
|
||||||
@@ -84,10 +83,6 @@ Description: CPU topology files that describe a logical CPU's relationship
|
|||||||
|
|
||||||
Briefly, the files above are:
|
Briefly, the files above are:
|
||||||
|
|
||||||
core_id: the CPU core ID of cpuX. Typically it is the
|
|
||||||
hardware platform's identifier (rather than the kernel's).
|
|
||||||
The actual value is architecture and platform dependent.
|
|
||||||
|
|
||||||
core_siblings: internal kernel map of cpuX's hardware threads
|
core_siblings: internal kernel map of cpuX's hardware threads
|
||||||
within the same physical_package_id.
|
within the same physical_package_id.
|
||||||
|
|
||||||
@@ -528,6 +523,7 @@ What: /sys/devices/system/cpu/vulnerabilities
|
|||||||
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
|
||||||
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
|
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
|
||||||
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
|
/sys/devices/system/cpu/vulnerabilities/mmio_stale_data
|
||||||
|
/sys/devices/system/cpu/vulnerabilities/retbleed
|
||||||
Date: January 2018
|
Date: January 2018
|
||||||
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
|
||||||
Description: Information about CPU vulnerabilities
|
Description: Information about CPU vulnerabilities
|
||||||
|
|||||||
@@ -0,0 +1,61 @@
|
|||||||
|
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_root_entry_hash
|
||||||
|
Date: Sep 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Russ Weight <russell.h.weight@intel.com>
|
||||||
|
Description: Read only. Returns the root entry hash for the static
|
||||||
|
region if one is programmed, else it returns the
|
||||||
|
string: "hash not programmed". This file is only
|
||||||
|
visible if the underlying device supports it.
|
||||||
|
Format: string.
|
||||||
|
|
||||||
|
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_root_entry_hash
|
||||||
|
Date: Sep 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Russ Weight <russell.h.weight@intel.com>
|
||||||
|
Description: Read only. Returns the root entry hash for the partial
|
||||||
|
reconfiguration region if one is programmed, else it
|
||||||
|
returns the string: "hash not programmed". This file
|
||||||
|
is only visible if the underlying device supports it.
|
||||||
|
Format: string.
|
||||||
|
|
||||||
|
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_root_entry_hash
|
||||||
|
Date: Sep 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Russ Weight <russell.h.weight@intel.com>
|
||||||
|
Description: Read only. Returns the root entry hash for the BMC image
|
||||||
|
if one is programmed, else it returns the string:
|
||||||
|
"hash not programmed". This file is only visible if the
|
||||||
|
underlying device supports it.
|
||||||
|
Format: string.
|
||||||
|
|
||||||
|
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/sr_canceled_csks
|
||||||
|
Date: Sep 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Russ Weight <russell.h.weight@intel.com>
|
||||||
|
Description: Read only. Returns a list of indices for canceled code
|
||||||
|
signing keys for the static region. The standard bitmap
|
||||||
|
list format is used (e.g. "1,2-6,9").
|
||||||
|
|
||||||
|
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/pr_canceled_csks
|
||||||
|
Date: Sep 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Russ Weight <russell.h.weight@intel.com>
|
||||||
|
Description: Read only. Returns a list of indices for canceled code
|
||||||
|
signing keys for the partial reconfiguration region. The
|
||||||
|
standard bitmap list format is used (e.g. "1,2-6,9").
|
||||||
|
|
||||||
|
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/bmc_canceled_csks
|
||||||
|
Date: Sep 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Russ Weight <russell.h.weight@intel.com>
|
||||||
|
Description: Read only. Returns a list of indices for canceled code
|
||||||
|
signing keys for the BMC. The standard bitmap list format
|
||||||
|
is used (e.g. "1,2-6,9").
|
||||||
|
|
||||||
|
What: /sys/bus/platform/drivers/intel-m10bmc-sec-update/.../security/flash_count
|
||||||
|
Date: Sep 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: Russ Weight <russell.h.weight@intel.com>
|
||||||
|
Description: Read only. Returns number of times the secure update
|
||||||
|
staging area has been flashed.
|
||||||
|
Format: "%u".
|
||||||
49
Documentation/ABI/testing/sysfs-driver-qat
Normal file
49
Documentation/ABI/testing/sysfs-driver-qat
Normal file
@@ -0,0 +1,49 @@
|
|||||||
|
What: /sys/bus/pci/devices/<BDF>/qat/state
|
||||||
|
Date: June 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: qat-linux@intel.com
|
||||||
|
Description: (RW) Reports the current state of the QAT device. Write to
|
||||||
|
the file to start or stop the device.
|
||||||
|
|
||||||
|
The values are:
|
||||||
|
|
||||||
|
* up: the device is up and running
|
||||||
|
* down: the device is down
|
||||||
|
|
||||||
|
|
||||||
|
It is possible to transition the device from up to down only
|
||||||
|
if the device is up and vice versa.
|
||||||
|
|
||||||
|
This attribute is only available for qat_4xxx devices.
|
||||||
|
|
||||||
|
What: /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||||
|
Date: June 2022
|
||||||
|
KernelVersion: 5.20
|
||||||
|
Contact: qat-linux@intel.com
|
||||||
|
Description: (RW) Reports the current configuration of the QAT device.
|
||||||
|
Write to the file to change the configured services.
|
||||||
|
|
||||||
|
The values are:
|
||||||
|
|
||||||
|
* sym;asym: the device is configured for running crypto
|
||||||
|
services
|
||||||
|
* dc: the device is configured for running compression services
|
||||||
|
|
||||||
|
It is possible to set the configuration only if the device
|
||||||
|
is in the `down` state (see /sys/bus/pci/devices/<BDF>/qat/state)
|
||||||
|
|
||||||
|
The following example shows how to change the configuration of
|
||||||
|
a device configured for running crypto services in order to
|
||||||
|
run data compression::
|
||||||
|
|
||||||
|
# cat /sys/bus/pci/devices/<BDF>/qat/state
|
||||||
|
up
|
||||||
|
# cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||||
|
sym;asym
|
||||||
|
# echo down > /sys/bus/pci/devices/<BDF>/qat/state
|
||||||
|
# echo dc > /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||||
|
# echo up > /sys/bus/pci/devices/<BDF>/qat/state
|
||||||
|
# cat /sys/bus/pci/devices/<BDF>/qat/cfg_services
|
||||||
|
dc
|
||||||
|
|
||||||
|
This attribute is only available for qat_4xxx devices.
|
||||||
@@ -42,5 +42,5 @@ KernelVersion: 5.10
|
|||||||
Contact: Maximilian Heyne <mheyne@amazon.de>
|
Contact: Maximilian Heyne <mheyne@amazon.de>
|
||||||
Description:
|
Description:
|
||||||
Whether to enable the persistent grants feature or not. Note
|
Whether to enable the persistent grants feature or not. Note
|
||||||
that this option only takes effect on newly created backends.
|
that this option only takes effect on newly connected backends.
|
||||||
The default is Y (enable).
|
The default is Y (enable).
|
||||||
|
|||||||
@@ -15,5 +15,5 @@ KernelVersion: 5.10
|
|||||||
Contact: Maximilian Heyne <mheyne@amazon.de>
|
Contact: Maximilian Heyne <mheyne@amazon.de>
|
||||||
Description:
|
Description:
|
||||||
Whether to enable the persistent grants feature or not. Note
|
Whether to enable the persistent grants feature or not. Note
|
||||||
that this option only takes effect on newly created frontends.
|
that this option only takes effect on newly connected frontends.
|
||||||
The default is Y (enable).
|
The default is Y (enable).
|
||||||
|
|||||||
@@ -12,8 +12,9 @@ Description:
|
|||||||
configuration data to the guest userspace.
|
configuration data to the guest userspace.
|
||||||
|
|
||||||
The authoritative guest-side hardware interface documentation
|
The authoritative guest-side hardware interface documentation
|
||||||
to the fw_cfg device can be found in "docs/specs/fw_cfg.txt"
|
to the fw_cfg device can be found in "docs/specs/fw_cfg.rst"
|
||||||
in the QEMU source tree.
|
in the QEMU source tree, or online at:
|
||||||
|
https://qemu-project.gitlab.io/qemu/specs/fw_cfg.html
|
||||||
|
|
||||||
**SysFS fw_cfg Interface**
|
**SysFS fw_cfg Interface**
|
||||||
|
|
||||||
|
|||||||
@@ -580,3 +580,33 @@ Date: January 2022
|
|||||||
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||||
Description: Controls max # of node block writes to be used for roll forward
|
Description: Controls max # of node block writes to be used for roll forward
|
||||||
recovery. This can limit the roll forward recovery time.
|
recovery. This can limit the roll forward recovery time.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/unusable_blocks_per_sec
|
||||||
|
Date: June 2022
|
||||||
|
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
|
||||||
|
Description: Shows the number of unusable blocks in a section which was defined by
|
||||||
|
the zone capacity reported by underlying zoned device.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/current_atomic_write
|
||||||
|
Date: July 2022
|
||||||
|
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||||
|
Description: Show the total current atomic write block count, which is not committed yet.
|
||||||
|
This is a read-only entry.
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/peak_atomic_write
|
||||||
|
Date: July 2022
|
||||||
|
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||||
|
Description: Show the peak value of total current atomic write block count after boot.
|
||||||
|
If you write "0" here, you can initialize to "0".
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/committed_atomic_block
|
||||||
|
Date: July 2022
|
||||||
|
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||||
|
Description: Show the accumulated total committed atomic write block count after boot.
|
||||||
|
If you write "0" here, you can initialize to "0".
|
||||||
|
|
||||||
|
What: /sys/fs/f2fs/<disk>/revoked_atomic_block
|
||||||
|
Date: July 2022
|
||||||
|
Contact: "Daeho Jeong" <daehojeong@google.com>
|
||||||
|
Description: Show the accumulated total revoked atomic write block count after boot.
|
||||||
|
If you write "0" here, you can initialize to "0".
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ Description: Kernel Samepage Merging daemon sysfs interface
|
|||||||
sleep_millisecs: how many milliseconds ksm should sleep between
|
sleep_millisecs: how many milliseconds ksm should sleep between
|
||||||
scans.
|
scans.
|
||||||
|
|
||||||
See Documentation/vm/ksm.rst for more information.
|
See Documentation/mm/ksm.rst for more information.
|
||||||
|
|
||||||
What: /sys/kernel/mm/ksm/merge_across_nodes
|
What: /sys/kernel/mm/ksm/merge_across_nodes
|
||||||
Date: January 2013
|
Date: January 2013
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ Description:
|
|||||||
The alloc_calls file is read-only and lists the kernel code
|
The alloc_calls file is read-only and lists the kernel code
|
||||||
locations from which allocations for this cache were performed.
|
locations from which allocations for this cache were performed.
|
||||||
The alloc_calls file only contains information if debugging is
|
The alloc_calls file only contains information if debugging is
|
||||||
enabled for that cache (see Documentation/vm/slub.rst).
|
enabled for that cache (see Documentation/mm/slub.rst).
|
||||||
|
|
||||||
What: /sys/kernel/slab/<cache>/alloc_fastpath
|
What: /sys/kernel/slab/<cache>/alloc_fastpath
|
||||||
Date: February 2008
|
Date: February 2008
|
||||||
@@ -219,7 +219,7 @@ Contact: Pekka Enberg <penberg@cs.helsinki.fi>,
|
|||||||
Description:
|
Description:
|
||||||
The free_calls file is read-only and lists the locations of
|
The free_calls file is read-only and lists the locations of
|
||||||
object frees if slab debugging is enabled (see
|
object frees if slab debugging is enabled (see
|
||||||
Documentation/vm/slub.rst).
|
Documentation/mm/slub.rst).
|
||||||
|
|
||||||
What: /sys/kernel/slab/<cache>/free_fastpath
|
What: /sys/kernel/slab/<cache>/free_fastpath
|
||||||
Date: February 2008
|
Date: February 2008
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
config WARN_MISSING_DOCUMENTS
|
config WARN_MISSING_DOCUMENTS
|
||||||
|
|
||||||
bool "Warn if there's a missing documentation file"
|
bool "Warn if there's a missing documentation file"
|
||||||
depends on COMPILE_TEST
|
depends on COMPILE_TEST
|
||||||
help
|
help
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ PCI Endpoint Framework
|
|||||||
pci-test-howto
|
pci-test-howto
|
||||||
pci-ntb-function
|
pci-ntb-function
|
||||||
pci-ntb-howto
|
pci-ntb-howto
|
||||||
|
pci-vntb-function
|
||||||
|
pci-vntb-howto
|
||||||
|
|
||||||
function/binding/pci-test
|
function/binding/pci-test
|
||||||
function/binding/pci-ntb
|
function/binding/pci-ntb
|
||||||
|
|||||||
129
Documentation/PCI/endpoint/pci-vntb-function.rst
Normal file
129
Documentation/PCI/endpoint/pci-vntb-function.rst
Normal file
@@ -0,0 +1,129 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=================
|
||||||
|
PCI vNTB Function
|
||||||
|
=================
|
||||||
|
|
||||||
|
:Author: Frank Li <Frank.Li@nxp.com>
|
||||||
|
|
||||||
|
The difference between PCI NTB function and PCI vNTB function is
|
||||||
|
|
||||||
|
PCI NTB function need at two endpoint instances and connect HOST1
|
||||||
|
and HOST2.
|
||||||
|
|
||||||
|
PCI vNTB function only use one host and one endpoint(EP), use NTB
|
||||||
|
connect EP and PCI host
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
|
||||||
|
+------------+ +---------------------------------------+
|
||||||
|
| | | |
|
||||||
|
+------------+ | +--------------+
|
||||||
|
| NTB | | | NTB |
|
||||||
|
| NetDev | | | NetDev |
|
||||||
|
+------------+ | +--------------+
|
||||||
|
| NTB | | | NTB |
|
||||||
|
| Transfer | | | Transfer |
|
||||||
|
+------------+ | +--------------+
|
||||||
|
| | | | |
|
||||||
|
| PCI NTB | | | |
|
||||||
|
| EPF | | | |
|
||||||
|
| Driver | | | PCI Virtual |
|
||||||
|
| | +---------------+ | NTB Driver |
|
||||||
|
| | | PCI EP NTB |<------>| |
|
||||||
|
| | | FN Driver | | |
|
||||||
|
+------------+ +---------------+ +--------------+
|
||||||
|
| | | | | |
|
||||||
|
| PCI BUS | <-----> | PCI EP BUS | | Virtual PCI |
|
||||||
|
| | PCI | | | BUS |
|
||||||
|
+------------+ +---------------+--------+--------------+
|
||||||
|
PCI RC PCI EP
|
||||||
|
|
||||||
|
Constructs used for Implementing vNTB
|
||||||
|
=====================================
|
||||||
|
|
||||||
|
1) Config Region
|
||||||
|
2) Self Scratchpad Registers
|
||||||
|
3) Peer Scratchpad Registers
|
||||||
|
4) Doorbell (DB) Registers
|
||||||
|
5) Memory Window (MW)
|
||||||
|
|
||||||
|
|
||||||
|
Config Region:
|
||||||
|
--------------
|
||||||
|
|
||||||
|
It is same as PCI NTB Function driver
|
||||||
|
|
||||||
|
Scratchpad Registers:
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
It is appended after Config region.
|
||||||
|
|
||||||
|
.. code-block:: text
|
||||||
|
|
||||||
|
|
||||||
|
+--------------------------------------------------+ Base
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
| Common Config Register |
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
| |
|
||||||
|
+-----------------------+--------------------------+ Base + span_offset
|
||||||
|
| | |
|
||||||
|
| Peer Span Space | Span Space |
|
||||||
|
| | |
|
||||||
|
| | |
|
||||||
|
+-----------------------+--------------------------+ Base + span_offset
|
||||||
|
| | | + span_count * 4
|
||||||
|
| | |
|
||||||
|
| Span Space | Peer Span Space |
|
||||||
|
| | |
|
||||||
|
+-----------------------+--------------------------+
|
||||||
|
Virtual PCI Pcie Endpoint
|
||||||
|
NTB Driver NTB Driver
|
||||||
|
|
||||||
|
|
||||||
|
Doorbell Registers:
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Doorbell Registers are used by the hosts to interrupt each other.
|
||||||
|
|
||||||
|
Memory Window:
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Actual transfer of data between the two hosts will happen using the
|
||||||
|
memory window.
|
||||||
|
|
||||||
|
Modeling Constructs:
|
||||||
|
====================
|
||||||
|
|
||||||
|
32-bit BARs.
|
||||||
|
|
||||||
|
====== ===============
|
||||||
|
BAR NO CONSTRUCTS USED
|
||||||
|
====== ===============
|
||||||
|
BAR0 Config Region
|
||||||
|
BAR1 Doorbell
|
||||||
|
BAR2 Memory Window 1
|
||||||
|
BAR3 Memory Window 2
|
||||||
|
BAR4 Memory Window 3
|
||||||
|
BAR5 Memory Window 4
|
||||||
|
====== ===============
|
||||||
|
|
||||||
|
64-bit BARs.
|
||||||
|
|
||||||
|
====== ===============================
|
||||||
|
BAR NO CONSTRUCTS USED
|
||||||
|
====== ===============================
|
||||||
|
BAR0 Config Region + Scratchpad
|
||||||
|
BAR1
|
||||||
|
BAR2 Doorbell
|
||||||
|
BAR3
|
||||||
|
BAR4 Memory Window 1
|
||||||
|
BAR5
|
||||||
|
====== ===============================
|
||||||
|
|
||||||
|
|
||||||
167
Documentation/PCI/endpoint/pci-vntb-howto.rst
Normal file
167
Documentation/PCI/endpoint/pci-vntb-howto.rst
Normal file
@@ -0,0 +1,167 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===================================================================
|
||||||
|
PCI Non-Transparent Bridge (NTB) Endpoint Function (EPF) User Guide
|
||||||
|
===================================================================
|
||||||
|
|
||||||
|
:Author: Frank Li <Frank.Li@nxp.com>
|
||||||
|
|
||||||
|
This document is a guide to help users use pci-epf-vntb function driver
|
||||||
|
and ntb_hw_epf host driver for NTB functionality. The list of steps to
|
||||||
|
be followed in the host side and EP side is given below. For the hardware
|
||||||
|
configuration and internals of NTB using configurable endpoints see
|
||||||
|
Documentation/PCI/endpoint/pci-vntb-function.rst
|
||||||
|
|
||||||
|
Endpoint Device
|
||||||
|
===============
|
||||||
|
|
||||||
|
Endpoint Controller Devices
|
||||||
|
---------------------------
|
||||||
|
|
||||||
|
To find the list of endpoint controller devices in the system::
|
||||||
|
|
||||||
|
# ls /sys/class/pci_epc/
|
||||||
|
5f010000.pcie_ep
|
||||||
|
|
||||||
|
If PCI_ENDPOINT_CONFIGFS is enabled::
|
||||||
|
|
||||||
|
# ls /sys/kernel/config/pci_ep/controllers
|
||||||
|
5f010000.pcie_ep
|
||||||
|
|
||||||
|
Endpoint Function Drivers
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
To find the list of endpoint function drivers in the system::
|
||||||
|
|
||||||
|
# ls /sys/bus/pci-epf/drivers
|
||||||
|
pci_epf_ntb pci_epf_test pci_epf_vntb
|
||||||
|
|
||||||
|
If PCI_ENDPOINT_CONFIGFS is enabled::
|
||||||
|
|
||||||
|
# ls /sys/kernel/config/pci_ep/functions
|
||||||
|
pci_epf_ntb pci_epf_test pci_epf_vntb
|
||||||
|
|
||||||
|
|
||||||
|
Creating pci-epf-vntb Device
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
PCI endpoint function device can be created using the configfs. To create
|
||||||
|
pci-epf-vntb device, the following commands can be used::
|
||||||
|
|
||||||
|
# mount -t configfs none /sys/kernel/config
|
||||||
|
# cd /sys/kernel/config/pci_ep/
|
||||||
|
# mkdir functions/pci_epf_vntb/func1
|
||||||
|
|
||||||
|
The "mkdir func1" above creates the pci-epf-ntb function device that will
|
||||||
|
be probed by pci_epf_vntb driver.
|
||||||
|
|
||||||
|
The PCI endpoint framework populates the directory with the following
|
||||||
|
configurable fields::
|
||||||
|
|
||||||
|
# ls functions/pci_epf_ntb/func1
|
||||||
|
baseclass_code deviceid msi_interrupts pci-epf-ntb.0
|
||||||
|
progif_code secondary subsys_id vendorid
|
||||||
|
cache_line_size interrupt_pin msix_interrupts primary
|
||||||
|
revid subclass_code subsys_vendor_id
|
||||||
|
|
||||||
|
The PCI endpoint function driver populates these entries with default values
|
||||||
|
when the device is bound to the driver. The pci-epf-vntb driver populates
|
||||||
|
vendorid with 0xffff and interrupt_pin with 0x0001::
|
||||||
|
|
||||||
|
# cat functions/pci_epf_vntb/func1/vendorid
|
||||||
|
0xffff
|
||||||
|
# cat functions/pci_epf_vntb/func1/interrupt_pin
|
||||||
|
0x0001
|
||||||
|
|
||||||
|
|
||||||
|
Configuring pci-epf-vntb Device
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
The user can configure the pci-epf-vntb device using its configfs entry. In order
|
||||||
|
to change the vendorid and the deviceid, the following
|
||||||
|
commands can be used::
|
||||||
|
|
||||||
|
# echo 0x1957 > functions/pci_epf_vntb/func1/vendorid
|
||||||
|
# echo 0x0809 > functions/pci_epf_vntb/func1/deviceid
|
||||||
|
|
||||||
|
In order to configure NTB specific attributes, a new sub-directory to func1
|
||||||
|
should be created::
|
||||||
|
|
||||||
|
# mkdir functions/pci_epf_vntb/func1/pci_epf_vntb.0/
|
||||||
|
|
||||||
|
The NTB function driver will populate this directory with various attributes
|
||||||
|
that can be configured by the user::
|
||||||
|
|
||||||
|
# ls functions/pci_epf_vntb/func1/pci_epf_vntb.0/
|
||||||
|
db_count mw1 mw2 mw3 mw4 num_mws
|
||||||
|
spad_count
|
||||||
|
|
||||||
|
A sample configuration for NTB function is given below::
|
||||||
|
|
||||||
|
# echo 4 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/db_count
|
||||||
|
# echo 128 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/spad_count
|
||||||
|
# echo 1 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/num_mws
|
||||||
|
# echo 0x100000 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/mw1
|
||||||
|
|
||||||
|
A sample configuration for virtual NTB driver for virutal PCI bus::
|
||||||
|
|
||||||
|
# echo 0x1957 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vntb_vid
|
||||||
|
# echo 0x080A > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vntb_pid
|
||||||
|
# echo 0x10 > functions/pci_epf_vntb/func1/pci_epf_vntb.0/vbus_number
|
||||||
|
|
||||||
|
Binding pci-epf-ntb Device to EP Controller
|
||||||
|
--------------------------------------------
|
||||||
|
|
||||||
|
NTB function device should be attached to PCI endpoint controllers
|
||||||
|
connected to the host.
|
||||||
|
|
||||||
|
# ln -s controllers/5f010000.pcie_ep functions/pci-epf-ntb/func1/primary
|
||||||
|
|
||||||
|
Once the above step is completed, the PCI endpoint controllers are ready to
|
||||||
|
establish a link with the host.
|
||||||
|
|
||||||
|
|
||||||
|
Start the Link
|
||||||
|
--------------
|
||||||
|
|
||||||
|
In order for the endpoint device to establish a link with the host, the _start_
|
||||||
|
field should be populated with '1'. For NTB, both the PCI endpoint controllers
|
||||||
|
should establish link with the host (imx8 don't need this steps)::
|
||||||
|
|
||||||
|
# echo 1 > controllers/5f010000.pcie_ep/start
|
||||||
|
|
||||||
|
RootComplex Device
|
||||||
|
==================
|
||||||
|
|
||||||
|
lspci Output at Host side
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Note that the devices listed here correspond to the values populated in
|
||||||
|
"Creating pci-epf-ntb Device" section above::
|
||||||
|
|
||||||
|
# lspci
|
||||||
|
00:00.0 PCI bridge: Freescale Semiconductor Inc Device 0000 (rev 01)
|
||||||
|
01:00.0 RAM memory: Freescale Semiconductor Inc Device 0809
|
||||||
|
|
||||||
|
Endpoint Device / Virtual PCI bus
|
||||||
|
=================================
|
||||||
|
|
||||||
|
lspci Output at EP Side / Virtual PCI bus
|
||||||
|
-----------------------------------------
|
||||||
|
|
||||||
|
Note that the devices listed here correspond to the values populated in
|
||||||
|
"Creating pci-epf-ntb Device" section above::
|
||||||
|
|
||||||
|
# lspci
|
||||||
|
10:00.0 Unassigned class [ffff]: Dawicontrol Computersysteme GmbH Device 1234 (rev ff)
|
||||||
|
|
||||||
|
Using ntb_hw_epf Device
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
The host side software follows the standard NTB software architecture in Linux.
|
||||||
|
All the existing client side NTB utilities like NTB Transport Client and NTB
|
||||||
|
Netdev, NTB Ping Pong Test Client and NTB Tool Test Client can be used with NTB
|
||||||
|
function device.
|
||||||
|
|
||||||
|
For more information on NTB see
|
||||||
|
:doc:`Non-Transparent Bridge <../../driver-api/ntb>`
|
||||||
@@ -125,14 +125,14 @@ Following piece of code illustrates the usage of the SR-IOV API.
|
|||||||
...
|
...
|
||||||
}
|
}
|
||||||
|
|
||||||
static int dev_suspend(struct pci_dev *dev, pm_message_t state)
|
static int dev_suspend(struct device *dev)
|
||||||
{
|
{
|
||||||
...
|
...
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int dev_resume(struct pci_dev *dev)
|
static int dev_resume(struct device *dev)
|
||||||
{
|
{
|
||||||
...
|
...
|
||||||
|
|
||||||
@@ -165,8 +165,7 @@ Following piece of code illustrates the usage of the SR-IOV API.
|
|||||||
.id_table = dev_id_table,
|
.id_table = dev_id_table,
|
||||||
.probe = dev_probe,
|
.probe = dev_probe,
|
||||||
.remove = dev_remove,
|
.remove = dev_remove,
|
||||||
.suspend = dev_suspend,
|
.driver.pm = &dev_pm_ops,
|
||||||
.resume = dev_resume,
|
|
||||||
.shutdown = dev_shutdown,
|
.shutdown = dev_shutdown,
|
||||||
.sriov_configure = dev_sriov_configure,
|
.sriov_configure = dev_sriov_configure,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ implementation of that functionality. To support the historical interface of
|
|||||||
mmap() through files in /proc/bus/pci, platforms may also set HAVE_PCI_MMAP.
|
mmap() through files in /proc/bus/pci, platforms may also set HAVE_PCI_MMAP.
|
||||||
|
|
||||||
Alternatively, platforms which set HAVE_PCI_MMAP may provide their own
|
Alternatively, platforms which set HAVE_PCI_MMAP may provide their own
|
||||||
implementation of pci_mmap_page_range() instead of defining
|
implementation of pci_mmap_resource_range() instead of defining
|
||||||
ARCH_GENERIC_PCI_MMAP_RESOURCE.
|
ARCH_GENERIC_PCI_MMAP_RESOURCE.
|
||||||
|
|
||||||
Platforms which support write-combining maps of PCI resources must define
|
Platforms which support write-combining maps of PCI resources must define
|
||||||
|
|||||||
@@ -1844,10 +1844,10 @@ that meets this requirement.
|
|||||||
|
|
||||||
Furthermore, NMI handlers can be interrupted by what appear to RCU to be
|
Furthermore, NMI handlers can be interrupted by what appear to RCU to be
|
||||||
normal interrupts. One way that this can happen is for code that
|
normal interrupts. One way that this can happen is for code that
|
||||||
directly invokes rcu_irq_enter() and rcu_irq_exit() to be called
|
directly invokes ct_irq_enter() and ct_irq_exit() to be called
|
||||||
from an NMI handler. This astonishing fact of life prompted the current
|
from an NMI handler. This astonishing fact of life prompted the current
|
||||||
code structure, which has rcu_irq_enter() invoking
|
code structure, which has ct_irq_enter() invoking
|
||||||
rcu_nmi_enter() and rcu_irq_exit() invoking rcu_nmi_exit().
|
ct_nmi_enter() and ct_irq_exit() invoking ct_nmi_exit().
|
||||||
And yes, I also learned of this requirement the hard way.
|
And yes, I also learned of this requirement the hard way.
|
||||||
|
|
||||||
Loadable Modules
|
Loadable Modules
|
||||||
@@ -2195,7 +2195,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be:
|
|||||||
sections, and RCU believes this CPU to be idle, no problem. This
|
sections, and RCU believes this CPU to be idle, no problem. This
|
||||||
sort of thing is used by some architectures for light-weight
|
sort of thing is used by some architectures for light-weight
|
||||||
exception handlers, which can then avoid the overhead of
|
exception handlers, which can then avoid the overhead of
|
||||||
rcu_irq_enter() and rcu_irq_exit() at exception entry and
|
ct_irq_enter() and ct_irq_exit() at exception entry and
|
||||||
exit, respectively. Some go further and avoid the entireties of
|
exit, respectively. Some go further and avoid the entireties of
|
||||||
irq_enter() and irq_exit().
|
irq_enter() and irq_exit().
|
||||||
Just make very sure you are running some of your tests with
|
Just make very sure you are running some of your tests with
|
||||||
@@ -2226,7 +2226,7 @@ scheduling-clock interrupt be enabled when RCU needs it to be:
|
|||||||
+-----------------------------------------------------------------------+
|
+-----------------------------------------------------------------------+
|
||||||
| **Answer**: |
|
| **Answer**: |
|
||||||
+-----------------------------------------------------------------------+
|
+-----------------------------------------------------------------------+
|
||||||
| One approach is to do ``rcu_irq_exit();rcu_irq_enter();`` every so |
|
| One approach is to do ``ct_irq_exit();ct_irq_enter();`` every so |
|
||||||
| often. But given that long-running interrupt handlers can cause other |
|
| often. But given that long-running interrupt handlers can cause other |
|
||||||
| problems, not least for response time, shouldn't you work to keep |
|
| problems, not least for response time, shouldn't you work to keep |
|
||||||
| your interrupt handler's runtime within reasonable bounds? |
|
| your interrupt handler's runtime within reasonable bounds? |
|
||||||
|
|||||||
@@ -97,12 +97,12 @@ warnings:
|
|||||||
which will include additional debugging information.
|
which will include additional debugging information.
|
||||||
|
|
||||||
- A low-level kernel issue that either fails to invoke one of the
|
- A low-level kernel issue that either fails to invoke one of the
|
||||||
variants of rcu_user_enter(), rcu_user_exit(), rcu_idle_enter(),
|
variants of rcu_eqs_enter(true), rcu_eqs_exit(true), ct_idle_enter(),
|
||||||
rcu_idle_exit(), rcu_irq_enter(), or rcu_irq_exit() on the one
|
ct_idle_exit(), ct_irq_enter(), or ct_irq_exit() on the one
|
||||||
hand, or that invokes one of them too many times on the other.
|
hand, or that invokes one of them too many times on the other.
|
||||||
Historically, the most frequent issue has been an omission
|
Historically, the most frequent issue has been an omission
|
||||||
of either irq_enter() or irq_exit(), which in turn invoke
|
of either irq_enter() or irq_exit(), which in turn invoke
|
||||||
rcu_irq_enter() or rcu_irq_exit(), respectively. Building your
|
ct_irq_enter() or ct_irq_exit(), respectively. Building your
|
||||||
kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types
|
kernel with CONFIG_RCU_EQS_DEBUG=y can help track down these types
|
||||||
of issues, which sometimes arise in architecture-specific code.
|
of issues, which sometimes arise in architecture-specific code.
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
.. _readme:
|
.. _readme:
|
||||||
|
|
||||||
Linux kernel release 5.x <http://kernel.org/>
|
Linux kernel release 6.x <http://kernel.org/>
|
||||||
=============================================
|
=============================================
|
||||||
|
|
||||||
These are the release notes for Linux version 5. Read them carefully,
|
These are the release notes for Linux version 6. Read them carefully,
|
||||||
as they tell you what this is all about, explain how to install the
|
as they tell you what this is all about, explain how to install the
|
||||||
kernel, and what to do if something goes wrong.
|
kernel, and what to do if something goes wrong.
|
||||||
|
|
||||||
@@ -63,7 +63,7 @@ Installing the kernel source
|
|||||||
directory where you have permissions (e.g. your home directory) and
|
directory where you have permissions (e.g. your home directory) and
|
||||||
unpack it::
|
unpack it::
|
||||||
|
|
||||||
xz -cd linux-5.x.tar.xz | tar xvf -
|
xz -cd linux-6.x.tar.xz | tar xvf -
|
||||||
|
|
||||||
Replace "X" with the version number of the latest kernel.
|
Replace "X" with the version number of the latest kernel.
|
||||||
|
|
||||||
@@ -72,12 +72,12 @@ Installing the kernel source
|
|||||||
files. They should match the library, and not get messed up by
|
files. They should match the library, and not get messed up by
|
||||||
whatever the kernel-du-jour happens to be.
|
whatever the kernel-du-jour happens to be.
|
||||||
|
|
||||||
- You can also upgrade between 5.x releases by patching. Patches are
|
- You can also upgrade between 6.x releases by patching. Patches are
|
||||||
distributed in the xz format. To install by patching, get all the
|
distributed in the xz format. To install by patching, get all the
|
||||||
newer patch files, enter the top level directory of the kernel source
|
newer patch files, enter the top level directory of the kernel source
|
||||||
(linux-5.x) and execute::
|
(linux-6.x) and execute::
|
||||||
|
|
||||||
xz -cd ../patch-5.x.xz | patch -p1
|
xz -cd ../patch-6.x.xz | patch -p1
|
||||||
|
|
||||||
Replace "x" for all versions bigger than the version "x" of your current
|
Replace "x" for all versions bigger than the version "x" of your current
|
||||||
source tree, **in_order**, and you should be ok. You may want to remove
|
source tree, **in_order**, and you should be ok. You may want to remove
|
||||||
@@ -85,13 +85,13 @@ Installing the kernel source
|
|||||||
that there are no failed patches (some-file-name# or some-file-name.rej).
|
that there are no failed patches (some-file-name# or some-file-name.rej).
|
||||||
If there are, either you or I have made a mistake.
|
If there are, either you or I have made a mistake.
|
||||||
|
|
||||||
Unlike patches for the 5.x kernels, patches for the 5.x.y kernels
|
Unlike patches for the 6.x kernels, patches for the 6.x.y kernels
|
||||||
(also known as the -stable kernels) are not incremental but instead apply
|
(also known as the -stable kernels) are not incremental but instead apply
|
||||||
directly to the base 5.x kernel. For example, if your base kernel is 5.0
|
directly to the base 6.x kernel. For example, if your base kernel is 6.0
|
||||||
and you want to apply the 5.0.3 patch, you must not first apply the 5.0.1
|
and you want to apply the 6.0.3 patch, you must not first apply the 6.0.1
|
||||||
and 5.0.2 patches. Similarly, if you are running kernel version 5.0.2 and
|
and 6.0.2 patches. Similarly, if you are running kernel version 6.0.2 and
|
||||||
want to jump to 5.0.3, you must first reverse the 5.0.2 patch (that is,
|
want to jump to 6.0.3, you must first reverse the 6.0.2 patch (that is,
|
||||||
patch -R) **before** applying the 5.0.3 patch. You can read more on this in
|
patch -R) **before** applying the 6.0.3 patch. You can read more on this in
|
||||||
:ref:`Documentation/process/applying-patches.rst <applying_patches>`.
|
:ref:`Documentation/process/applying-patches.rst <applying_patches>`.
|
||||||
|
|
||||||
Alternatively, the script patch-kernel can be used to automate this
|
Alternatively, the script patch-kernel can be used to automate this
|
||||||
@@ -114,7 +114,7 @@ Installing the kernel source
|
|||||||
Software requirements
|
Software requirements
|
||||||
---------------------
|
---------------------
|
||||||
|
|
||||||
Compiling and running the 5.x kernels requires up-to-date
|
Compiling and running the 6.x kernels requires up-to-date
|
||||||
versions of various software packages. Consult
|
versions of various software packages. Consult
|
||||||
:ref:`Documentation/process/changes.rst <changes>` for the minimum version numbers
|
:ref:`Documentation/process/changes.rst <changes>` for the minimum version numbers
|
||||||
required and how to get updates for these packages. Beware that using
|
required and how to get updates for these packages. Beware that using
|
||||||
@@ -132,12 +132,12 @@ Build directory for the kernel
|
|||||||
place for the output files (including .config).
|
place for the output files (including .config).
|
||||||
Example::
|
Example::
|
||||||
|
|
||||||
kernel source code: /usr/src/linux-5.x
|
kernel source code: /usr/src/linux-6.x
|
||||||
build directory: /home/name/build/kernel
|
build directory: /home/name/build/kernel
|
||||||
|
|
||||||
To configure and build the kernel, use::
|
To configure and build the kernel, use::
|
||||||
|
|
||||||
cd /usr/src/linux-5.x
|
cd /usr/src/linux-6.x
|
||||||
make O=/home/name/build/kernel menuconfig
|
make O=/home/name/build/kernel menuconfig
|
||||||
make O=/home/name/build/kernel
|
make O=/home/name/build/kernel
|
||||||
sudo make O=/home/name/build/kernel modules_install install
|
sudo make O=/home/name/build/kernel modules_install install
|
||||||
|
|||||||
@@ -97,7 +97,7 @@ Under below explanation, we assume CONFIG_MEM_RES_CTRL_SWAP=y.
|
|||||||
=============
|
=============
|
||||||
|
|
||||||
Page Cache is charged at
|
Page Cache is charged at
|
||||||
- add_to_page_cache_locked().
|
- filemap_add_folio().
|
||||||
|
|
||||||
The logic is very clear. (About migration, see below)
|
The logic is very clear. (About migration, see below)
|
||||||
|
|
||||||
|
|||||||
@@ -184,6 +184,14 @@ cgroup v2 currently supports the following mount options.
|
|||||||
ignored on non-init namespace mounts. Please refer to the
|
ignored on non-init namespace mounts. Please refer to the
|
||||||
Delegation section for details.
|
Delegation section for details.
|
||||||
|
|
||||||
|
favordynmods
|
||||||
|
Reduce the latencies of dynamic cgroup modifications such as
|
||||||
|
task migrations and controller on/offs at the cost of making
|
||||||
|
hot path operations such as forks and exits more expensive.
|
||||||
|
The static usage pattern of creating a cgroup, enabling
|
||||||
|
controllers, and then seeding it with CLONE_INTO_CGROUP is
|
||||||
|
not affected by this option.
|
||||||
|
|
||||||
memory_localevents
|
memory_localevents
|
||||||
Only populate memory.events with data for the current cgroup,
|
Only populate memory.events with data for the current cgroup,
|
||||||
and not any subtrees. This is legacy behaviour, the default
|
and not any subtrees. This is legacy behaviour, the default
|
||||||
@@ -1229,6 +1237,13 @@ PAGE_SIZE multiple when read back.
|
|||||||
the target cgroup. If less bytes are reclaimed than the
|
the target cgroup. If less bytes are reclaimed than the
|
||||||
specified amount, -EAGAIN is returned.
|
specified amount, -EAGAIN is returned.
|
||||||
|
|
||||||
|
Please note that the proactive reclaim (triggered by this
|
||||||
|
interface) is not meant to indicate memory pressure on the
|
||||||
|
memory cgroup. Therefore socket memory balancing triggered by
|
||||||
|
the memory reclaim normally is not exercised in this case.
|
||||||
|
This means that the networking layer will not adapt based on
|
||||||
|
reclaim induced by memory.reclaim.
|
||||||
|
|
||||||
memory.peak
|
memory.peak
|
||||||
A read-only single value file which exists on non-root
|
A read-only single value file which exists on non-root
|
||||||
cgroups.
|
cgroups.
|
||||||
@@ -1433,6 +1448,24 @@ PAGE_SIZE multiple when read back.
|
|||||||
workingset_nodereclaim
|
workingset_nodereclaim
|
||||||
Number of times a shadow node has been reclaimed
|
Number of times a shadow node has been reclaimed
|
||||||
|
|
||||||
|
pgscan (npn)
|
||||||
|
Amount of scanned pages (in an inactive LRU list)
|
||||||
|
|
||||||
|
pgsteal (npn)
|
||||||
|
Amount of reclaimed pages
|
||||||
|
|
||||||
|
pgscan_kswapd (npn)
|
||||||
|
Amount of scanned pages by kswapd (in an inactive LRU list)
|
||||||
|
|
||||||
|
pgscan_direct (npn)
|
||||||
|
Amount of scanned pages directly (in an inactive LRU list)
|
||||||
|
|
||||||
|
pgsteal_kswapd (npn)
|
||||||
|
Amount of reclaimed pages by kswapd
|
||||||
|
|
||||||
|
pgsteal_direct (npn)
|
||||||
|
Amount of reclaimed pages directly
|
||||||
|
|
||||||
pgfault (npn)
|
pgfault (npn)
|
||||||
Total number of page faults incurred
|
Total number of page faults incurred
|
||||||
|
|
||||||
@@ -1442,12 +1475,6 @@ PAGE_SIZE multiple when read back.
|
|||||||
pgrefill (npn)
|
pgrefill (npn)
|
||||||
Amount of scanned pages (in an active LRU list)
|
Amount of scanned pages (in an active LRU list)
|
||||||
|
|
||||||
pgscan (npn)
|
|
||||||
Amount of scanned pages (in an inactive LRU list)
|
|
||||||
|
|
||||||
pgsteal (npn)
|
|
||||||
Amount of reclaimed pages
|
|
||||||
|
|
||||||
pgactivate (npn)
|
pgactivate (npn)
|
||||||
Amount of pages moved to the active LRU list
|
Amount of pages moved to the active LRU list
|
||||||
|
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ Constructor parameters:
|
|||||||
size)
|
size)
|
||||||
5. the number of optional parameters (the parameters with an argument
|
5. the number of optional parameters (the parameters with an argument
|
||||||
count as two)
|
count as two)
|
||||||
|
|
||||||
start_sector n (default: 0)
|
start_sector n (default: 0)
|
||||||
offset from the start of cache device in 512-byte sectors
|
offset from the start of cache device in 512-byte sectors
|
||||||
high_watermark n (default: 50)
|
high_watermark n (default: 50)
|
||||||
@@ -74,20 +75,21 @@ Constructor parameters:
|
|||||||
the origin volume in the last n milliseconds
|
the origin volume in the last n milliseconds
|
||||||
|
|
||||||
Status:
|
Status:
|
||||||
|
|
||||||
1. error indicator - 0 if there was no error, otherwise error number
|
1. error indicator - 0 if there was no error, otherwise error number
|
||||||
2. the number of blocks
|
2. the number of blocks
|
||||||
3. the number of free blocks
|
3. the number of free blocks
|
||||||
4. the number of blocks under writeback
|
4. the number of blocks under writeback
|
||||||
5. the number of read requests
|
5. the number of read blocks
|
||||||
6. the number of read requests that hit the cache
|
6. the number of read blocks that hit the cache
|
||||||
7. the number of write requests
|
7. the number of write blocks
|
||||||
8. the number of write requests that hit uncommitted block
|
8. the number of write blocks that hit uncommitted block
|
||||||
9. the number of write requests that hit committed block
|
9. the number of write blocks that hit committed block
|
||||||
10. the number of write requests that bypass the cache
|
10. the number of write blocks that bypass the cache
|
||||||
11. the number of write requests that are allocated in the cache
|
11. the number of write blocks that are allocated in the cache
|
||||||
12. the number of write requests that are blocked on the freelist
|
12. the number of write requests that are blocked on the freelist
|
||||||
13. the number of flush requests
|
13. the number of flush requests
|
||||||
14. the number of discard requests
|
14. the number of discarded blocks
|
||||||
|
|
||||||
Messages:
|
Messages:
|
||||||
flush
|
flush
|
||||||
|
|||||||
@@ -7,10 +7,9 @@ This list is the Linux Device List, the official registry of allocated
|
|||||||
device numbers and ``/dev`` directory nodes for the Linux operating
|
device numbers and ``/dev`` directory nodes for the Linux operating
|
||||||
system.
|
system.
|
||||||
|
|
||||||
The LaTeX version of this document is no longer maintained, nor is
|
The version of this document at lanana.org is no longer maintained. This
|
||||||
the document that used to reside at lanana.org. This version in the
|
version in the mainline Linux kernel is the master document. Updates
|
||||||
mainline Linux kernel is the master document. Updates shall be sent
|
shall be sent as patches to the kernel maintainers (see the
|
||||||
as patches to the kernel maintainers (see the
|
|
||||||
:ref:`Documentation/process/submitting-patches.rst <submittingpatches>` document).
|
:ref:`Documentation/process/submitting-patches.rst <submittingpatches>` document).
|
||||||
Specifically explore the sections titled "CHAR and MISC DRIVERS", and
|
Specifically explore the sections titled "CHAR and MISC DRIVERS", and
|
||||||
"BLOCK LAYER" in the MAINTAINERS file to find the right maintainers
|
"BLOCK LAYER" in the MAINTAINERS file to find the right maintainers
|
||||||
|
|||||||
@@ -7,10 +7,10 @@ as a PE/COFF image, thereby convincing EFI firmware loaders to load
|
|||||||
it as an EFI executable. The code that modifies the bzImage header,
|
it as an EFI executable. The code that modifies the bzImage header,
|
||||||
along with the EFI-specific entry point that the firmware loader
|
along with the EFI-specific entry point that the firmware loader
|
||||||
jumps to are collectively known as the "EFI boot stub", and live in
|
jumps to are collectively known as the "EFI boot stub", and live in
|
||||||
arch/x86/boot/header.S and arch/x86/boot/compressed/eboot.c,
|
arch/x86/boot/header.S and drivers/firmware/efi/libstub/x86-stub.c,
|
||||||
respectively. For ARM the EFI stub is implemented in
|
respectively. For ARM the EFI stub is implemented in
|
||||||
arch/arm/boot/compressed/efi-header.S and
|
arch/arm/boot/compressed/efi-header.S and
|
||||||
arch/arm/boot/compressed/efi-stub.c. EFI stub code that is shared
|
drivers/firmware/efi/libstub/arm32-stub.c. EFI stub code that is shared
|
||||||
between architectures is in drivers/firmware/efi/libstub.
|
between architectures is in drivers/firmware/efi/libstub.
|
||||||
|
|
||||||
For arm64, there is no compressed kernel support, so the Image itself
|
For arm64, there is no compressed kernel support, so the Image itself
|
||||||
|
|||||||
@@ -230,6 +230,20 @@ The possible values in this file are:
|
|||||||
* - 'Mitigation: Clear CPU buffers'
|
* - 'Mitigation: Clear CPU buffers'
|
||||||
- The processor is vulnerable and the CPU buffer clearing mitigation is
|
- The processor is vulnerable and the CPU buffer clearing mitigation is
|
||||||
enabled.
|
enabled.
|
||||||
|
* - 'Unknown: No mitigations'
|
||||||
|
- The processor vulnerability status is unknown because it is
|
||||||
|
out of Servicing period. Mitigation is not attempted.
|
||||||
|
|
||||||
|
Definitions:
|
||||||
|
------------
|
||||||
|
|
||||||
|
Servicing period: The process of providing functional and security updates to
|
||||||
|
Intel processors or platforms, utilizing the Intel Platform Update (IPU)
|
||||||
|
process or other similar mechanisms.
|
||||||
|
|
||||||
|
End of Servicing Updates (ESU): ESU is the date at which Intel will no
|
||||||
|
longer provide Servicing, such as through IPU or other similar update
|
||||||
|
processes. ESU dates will typically be aligned to end of quarter.
|
||||||
|
|
||||||
If the processor is vulnerable then the following information is appended to
|
If the processor is vulnerable then the following information is appended to
|
||||||
the above information:
|
the above information:
|
||||||
|
|||||||
@@ -422,6 +422,14 @@ The possible values in this file are:
|
|||||||
'RSB filling' Protection of RSB on context switch enabled
|
'RSB filling' Protection of RSB on context switch enabled
|
||||||
============= ===========================================
|
============= ===========================================
|
||||||
|
|
||||||
|
- EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
|
||||||
|
|
||||||
|
=========================== =======================================================
|
||||||
|
'PBRSB-eIBRS: SW sequence' CPU is affected and protection of RSB on VMEXIT enabled
|
||||||
|
'PBRSB-eIBRS: Vulnerable' CPU is vulnerable
|
||||||
|
'PBRSB-eIBRS: Not affected' CPU is not affected by PBRSB
|
||||||
|
=========================== =======================================================
|
||||||
|
|
||||||
Full mitigation might require a microcode update from the CPU
|
Full mitigation might require a microcode update from the CPU
|
||||||
vendor. When the necessary microcode is not available, the kernel will
|
vendor. When the necessary microcode is not available, the kernel will
|
||||||
report vulnerability.
|
report vulnerability.
|
||||||
|
|||||||
@@ -556,7 +556,7 @@
|
|||||||
nosocket -- Disable socket memory accounting.
|
nosocket -- Disable socket memory accounting.
|
||||||
nokmem -- Disable kernel memory accounting.
|
nokmem -- Disable kernel memory accounting.
|
||||||
|
|
||||||
checkreqprot [SELINUX] Set initial checkreqprot flag value.
|
checkreqprot= [SELINUX] Set initial checkreqprot flag value.
|
||||||
Format: { "0" | "1" }
|
Format: { "0" | "1" }
|
||||||
See security/selinux/Kconfig help text.
|
See security/selinux/Kconfig help text.
|
||||||
0 -- check protection applied by kernel (includes
|
0 -- check protection applied by kernel (includes
|
||||||
@@ -1158,8 +1158,12 @@
|
|||||||
nopku [X86] Disable Memory Protection Keys CPU feature found
|
nopku [X86] Disable Memory Protection Keys CPU feature found
|
||||||
in some Intel CPUs.
|
in some Intel CPUs.
|
||||||
|
|
||||||
<module>.async_probe [KNL]
|
<module>.async_probe[=<bool>] [KNL]
|
||||||
Enable asynchronous probe on this module.
|
If no <bool> value is specified or if the value
|
||||||
|
specified is not a valid <bool>, enable asynchronous
|
||||||
|
probe on this module. Otherwise, enable/disable
|
||||||
|
asynchronous probe on this module as indicated by the
|
||||||
|
<bool> value. See also: module.async_probe
|
||||||
|
|
||||||
early_ioremap_debug [KNL]
|
early_ioremap_debug [KNL]
|
||||||
Enable debug messages in early_ioremap support. This
|
Enable debug messages in early_ioremap support. This
|
||||||
@@ -1445,7 +1449,7 @@
|
|||||||
(in particular on some ATI chipsets).
|
(in particular on some ATI chipsets).
|
||||||
The kernel tries to set a reasonable default.
|
The kernel tries to set a reasonable default.
|
||||||
|
|
||||||
enforcing [SELINUX] Set initial enforcing status.
|
enforcing= [SELINUX] Set initial enforcing status.
|
||||||
Format: {"0" | "1"}
|
Format: {"0" | "1"}
|
||||||
See security/selinux/Kconfig help text.
|
See security/selinux/Kconfig help text.
|
||||||
0 -- permissive (log only, no denials).
|
0 -- permissive (log only, no denials).
|
||||||
@@ -1673,6 +1677,19 @@
|
|||||||
|
|
||||||
hlt [BUGS=ARM,SH]
|
hlt [BUGS=ARM,SH]
|
||||||
|
|
||||||
|
hostname= [KNL] Set the hostname (aka UTS nodename).
|
||||||
|
Format: <string>
|
||||||
|
This allows setting the system's hostname during early
|
||||||
|
startup. This sets the name returned by gethostname.
|
||||||
|
Using this parameter to set the hostname makes it
|
||||||
|
possible to ensure the hostname is correctly set before
|
||||||
|
any userspace processes run, avoiding the possibility
|
||||||
|
that a process may call gethostname before the hostname
|
||||||
|
has been explicitly set, resulting in the calling
|
||||||
|
process getting an incorrect result. The string must
|
||||||
|
not exceed the maximum allowed hostname length (usually
|
||||||
|
64 characters) and will be truncated otherwise.
|
||||||
|
|
||||||
hpet= [X86-32,HPET] option to control HPET usage
|
hpet= [X86-32,HPET] option to control HPET usage
|
||||||
Format: { enable (default) | disable | force |
|
Format: { enable (default) | disable | force |
|
||||||
verbose }
|
verbose }
|
||||||
@@ -1718,19 +1735,22 @@
|
|||||||
hugetlb_free_vmemmap=
|
hugetlb_free_vmemmap=
|
||||||
[KNL] Reguires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
|
[KNL] Reguires CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
|
||||||
enabled.
|
enabled.
|
||||||
|
Control if HugeTLB Vmemmap Optimization (HVO) is enabled.
|
||||||
Allows heavy hugetlb users to free up some more
|
Allows heavy hugetlb users to free up some more
|
||||||
memory (7 * PAGE_SIZE for each 2MB hugetlb page).
|
memory (7 * PAGE_SIZE for each 2MB hugetlb page).
|
||||||
Format: { [oO][Nn]/Y/y/1 | [oO][Ff]/N/n/0 (default) }
|
Format: { on | off (default) }
|
||||||
|
|
||||||
[oO][Nn]/Y/y/1: enable the feature
|
on: enable HVO
|
||||||
[oO][Ff]/N/n/0: disable the feature
|
off: disable HVO
|
||||||
|
|
||||||
Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y,
|
Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y,
|
||||||
the default is on.
|
the default is on.
|
||||||
|
|
||||||
This is not compatible with memory_hotplug.memmap_on_memory.
|
Note that the vmemmap pages may be allocated from the added
|
||||||
If both parameters are enabled, hugetlb_free_vmemmap takes
|
memory block itself when memory_hotplug.memmap_on_memory is
|
||||||
precedence over memory_hotplug.memmap_on_memory.
|
enabled, those vmemmap pages cannot be optimized even if this
|
||||||
|
feature is enabled. Other vmemmap pages not allocated from
|
||||||
|
the added memory block itself do not be affected.
|
||||||
|
|
||||||
hung_task_panic=
|
hung_task_panic=
|
||||||
[KNL] Should the hung task detector generate panics.
|
[KNL] Should the hung task detector generate panics.
|
||||||
@@ -2272,23 +2292,39 @@
|
|||||||
|
|
||||||
ivrs_ioapic [HW,X86-64]
|
ivrs_ioapic [HW,X86-64]
|
||||||
Provide an override to the IOAPIC-ID<->DEVICE-ID
|
Provide an override to the IOAPIC-ID<->DEVICE-ID
|
||||||
mapping provided in the IVRS ACPI table. For
|
mapping provided in the IVRS ACPI table.
|
||||||
example, to map IOAPIC-ID decimal 10 to
|
By default, PCI segment is 0, and can be omitted.
|
||||||
PCI device 00:14.0 write the parameter as:
|
For example:
|
||||||
|
* To map IOAPIC-ID decimal 10 to PCI device 00:14.0
|
||||||
|
write the parameter as:
|
||||||
ivrs_ioapic[10]=00:14.0
|
ivrs_ioapic[10]=00:14.0
|
||||||
|
* To map IOAPIC-ID decimal 10 to PCI segment 0x1 and
|
||||||
|
PCI device 00:14.0 write the parameter as:
|
||||||
|
ivrs_ioapic[10]=0001:00:14.0
|
||||||
|
|
||||||
ivrs_hpet [HW,X86-64]
|
ivrs_hpet [HW,X86-64]
|
||||||
Provide an override to the HPET-ID<->DEVICE-ID
|
Provide an override to the HPET-ID<->DEVICE-ID
|
||||||
mapping provided in the IVRS ACPI table. For
|
mapping provided in the IVRS ACPI table.
|
||||||
example, to map HPET-ID decimal 0 to
|
By default, PCI segment is 0, and can be omitted.
|
||||||
PCI device 00:14.0 write the parameter as:
|
For example:
|
||||||
|
* To map HPET-ID decimal 0 to PCI device 00:14.0
|
||||||
|
write the parameter as:
|
||||||
ivrs_hpet[0]=00:14.0
|
ivrs_hpet[0]=00:14.0
|
||||||
|
* To map HPET-ID decimal 10 to PCI segment 0x1 and
|
||||||
|
PCI device 00:14.0 write the parameter as:
|
||||||
|
ivrs_ioapic[10]=0001:00:14.0
|
||||||
|
|
||||||
ivrs_acpihid [HW,X86-64]
|
ivrs_acpihid [HW,X86-64]
|
||||||
Provide an override to the ACPI-HID:UID<->DEVICE-ID
|
Provide an override to the ACPI-HID:UID<->DEVICE-ID
|
||||||
mapping provided in the IVRS ACPI table. For
|
mapping provided in the IVRS ACPI table.
|
||||||
example, to map UART-HID:UID AMD0020:0 to
|
|
||||||
PCI device 00:14.5 write the parameter as:
|
For example, to map UART-HID:UID AMD0020:0 to
|
||||||
|
PCI segment 0x1 and PCI device ID 00:14.5,
|
||||||
|
write the parameter as:
|
||||||
|
ivrs_acpihid[0001:00:14.5]=AMD0020:0
|
||||||
|
|
||||||
|
By default, PCI segment is 0, and can be omitted.
|
||||||
|
For example, PCI device 00:14.5 write the parameter as:
|
||||||
ivrs_acpihid[00:14.5]=AMD0020:0
|
ivrs_acpihid[00:14.5]=AMD0020:0
|
||||||
|
|
||||||
js= [HW,JOY] Analog joystick
|
js= [HW,JOY] Analog joystick
|
||||||
@@ -2424,8 +2460,7 @@
|
|||||||
the KVM_CLEAR_DIRTY ioctl, and only for the pages being
|
the KVM_CLEAR_DIRTY ioctl, and only for the pages being
|
||||||
cleared.
|
cleared.
|
||||||
|
|
||||||
Eager page splitting currently only supports splitting
|
Eager page splitting is only supported when kvm.tdp_mmu=Y.
|
||||||
huge pages mapped by the TDP MMU.
|
|
||||||
|
|
||||||
Default is Y (on).
|
Default is Y (on).
|
||||||
|
|
||||||
@@ -3074,10 +3109,12 @@
|
|||||||
[KNL,X86,ARM] Boolean flag to enable this feature.
|
[KNL,X86,ARM] Boolean flag to enable this feature.
|
||||||
Format: {on | off (default)}
|
Format: {on | off (default)}
|
||||||
When enabled, runtime hotplugged memory will
|
When enabled, runtime hotplugged memory will
|
||||||
allocate its internal metadata (struct pages)
|
allocate its internal metadata (struct pages,
|
||||||
from the hotadded memory which will allow to
|
those vmemmap pages cannot be optimized even
|
||||||
hotadd a lot of memory without requiring
|
if hugetlb_free_vmemmap is enabled) from the
|
||||||
additional memory to do so.
|
hotadded memory which will allow to hotadd a
|
||||||
|
lot of memory without requiring additional
|
||||||
|
memory to do so.
|
||||||
This feature is disabled by default because it
|
This feature is disabled by default because it
|
||||||
has some implication on large (e.g. GB)
|
has some implication on large (e.g. GB)
|
||||||
allocations in some configurations (e.g. small
|
allocations in some configurations (e.g. small
|
||||||
@@ -3087,10 +3124,6 @@
|
|||||||
Note that even when enabled, there are a few cases where
|
Note that even when enabled, there are a few cases where
|
||||||
the feature is not effective.
|
the feature is not effective.
|
||||||
|
|
||||||
This is not compatible with hugetlb_free_vmemmap. If
|
|
||||||
both parameters are enabled, hugetlb_free_vmemmap takes
|
|
||||||
precedence over memory_hotplug.memmap_on_memory.
|
|
||||||
|
|
||||||
memtest= [KNL,X86,ARM,M68K,PPC,RISCV] Enable memtest
|
memtest= [KNL,X86,ARM,M68K,PPC,RISCV] Enable memtest
|
||||||
Format: <integer>
|
Format: <integer>
|
||||||
default : 0 <disable>
|
default : 0 <disable>
|
||||||
@@ -3109,7 +3142,7 @@
|
|||||||
mem_encrypt=on: Activate SME
|
mem_encrypt=on: Activate SME
|
||||||
mem_encrypt=off: Do not activate SME
|
mem_encrypt=off: Do not activate SME
|
||||||
|
|
||||||
Refer to Documentation/virt/kvm/amd-memory-encryption.rst
|
Refer to Documentation/virt/kvm/x86/amd-memory-encryption.rst
|
||||||
for details on when memory encryption can be activated.
|
for details on when memory encryption can be activated.
|
||||||
|
|
||||||
mem_sleep_default= [SUSPEND] Default system suspend mode:
|
mem_sleep_default= [SUSPEND] Default system suspend mode:
|
||||||
@@ -3249,6 +3282,15 @@
|
|||||||
For details see:
|
For details see:
|
||||||
Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
|
Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
|
||||||
|
|
||||||
|
module.async_probe=<bool>
|
||||||
|
[KNL] When set to true, modules will use async probing
|
||||||
|
by default. To enable/disable async probing for a
|
||||||
|
specific module, use the module specific control that
|
||||||
|
is documented under <module>.async_probe. When both
|
||||||
|
module.async_probe and <module>.async_probe are
|
||||||
|
specified, <module>.async_probe takes precedence for
|
||||||
|
the specific module.
|
||||||
|
|
||||||
module.sig_enforce
|
module.sig_enforce
|
||||||
[KNL] When CONFIG_MODULE_SIG is set, this means that
|
[KNL] When CONFIG_MODULE_SIG is set, this means that
|
||||||
modules without (valid) signatures will fail to load.
|
modules without (valid) signatures will fail to load.
|
||||||
@@ -3538,9 +3580,6 @@
|
|||||||
|
|
||||||
noautogroup Disable scheduler automatic task group creation.
|
noautogroup Disable scheduler automatic task group creation.
|
||||||
|
|
||||||
nobats [PPC] Do not use BATs for mapping kernel lowmem
|
|
||||||
on "Classic" PPC cores.
|
|
||||||
|
|
||||||
nocache [ARM]
|
nocache [ARM]
|
||||||
|
|
||||||
nodsp [SH] Disable hardware DSP at boot time.
|
nodsp [SH] Disable hardware DSP at boot time.
|
||||||
@@ -3667,6 +3706,9 @@
|
|||||||
just as if they had also been called out in the
|
just as if they had also been called out in the
|
||||||
rcu_nocbs= boot parameter.
|
rcu_nocbs= boot parameter.
|
||||||
|
|
||||||
|
Note that this argument takes precedence over
|
||||||
|
the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
|
||||||
|
|
||||||
noiotrap [SH] Disables trapped I/O port accesses.
|
noiotrap [SH] Disables trapped I/O port accesses.
|
||||||
|
|
||||||
noirqdebug [X86-32] Disables the code which attempts to detect and
|
noirqdebug [X86-32] Disables the code which attempts to detect and
|
||||||
@@ -3707,9 +3749,6 @@
|
|||||||
|
|
||||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||||
|
|
||||||
noltlbs [PPC] Do not use large page/tlb entries for kernel
|
|
||||||
lowmem mapping on PPC40x and PPC8xx
|
|
||||||
|
|
||||||
nomca [IA-64] Disable machine check abort handling
|
nomca [IA-64] Disable machine check abort handling
|
||||||
|
|
||||||
nomce [X86-32] Disable Machine Check Exception
|
nomce [X86-32] Disable Machine Check Exception
|
||||||
@@ -3741,11 +3780,6 @@
|
|||||||
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
|
noreplace-smp [X86-32,SMP] Don't replace SMP instructions
|
||||||
with UP alternatives
|
with UP alternatives
|
||||||
|
|
||||||
nordrand [X86] Disable kernel use of the RDRAND and
|
|
||||||
RDSEED instructions even if they are supported
|
|
||||||
by the processor. RDRAND and RDSEED are still
|
|
||||||
available to user space applications.
|
|
||||||
|
|
||||||
noresume [SWSUSP] Disables resume and restores original swap
|
noresume [SWSUSP] Disables resume and restores original swap
|
||||||
space.
|
space.
|
||||||
|
|
||||||
@@ -4565,6 +4599,9 @@
|
|||||||
no-callback mode from boot but the mode may be
|
no-callback mode from boot but the mode may be
|
||||||
toggled at runtime via cpusets.
|
toggled at runtime via cpusets.
|
||||||
|
|
||||||
|
Note that this argument takes precedence over
|
||||||
|
the CONFIG_RCU_NOCB_CPU_DEFAULT_ALL option.
|
||||||
|
|
||||||
rcu_nocb_poll [KNL]
|
rcu_nocb_poll [KNL]
|
||||||
Rather than requiring that offloaded CPUs
|
Rather than requiring that offloaded CPUs
|
||||||
(specified by rcu_nocbs= above) explicitly
|
(specified by rcu_nocbs= above) explicitly
|
||||||
@@ -4674,6 +4711,34 @@
|
|||||||
When RCU_NOCB_CPU is set, also adjust the
|
When RCU_NOCB_CPU is set, also adjust the
|
||||||
priority of NOCB callback kthreads.
|
priority of NOCB callback kthreads.
|
||||||
|
|
||||||
|
rcutree.rcu_divisor= [KNL]
|
||||||
|
Set the shift-right count to use to compute
|
||||||
|
the callback-invocation batch limit bl from
|
||||||
|
the number of callbacks queued on this CPU.
|
||||||
|
The result will be bounded below by the value of
|
||||||
|
the rcutree.blimit kernel parameter. Every bl
|
||||||
|
callbacks, the softirq handler will exit in
|
||||||
|
order to allow the CPU to do other work.
|
||||||
|
|
||||||
|
Please note that this callback-invocation batch
|
||||||
|
limit applies only to non-offloaded callback
|
||||||
|
invocation. Offloaded callbacks are instead
|
||||||
|
invoked in the context of an rcuoc kthread, which
|
||||||
|
scheduler will preempt as it does any other task.
|
||||||
|
|
||||||
|
rcutree.nocb_nobypass_lim_per_jiffy= [KNL]
|
||||||
|
On callback-offloaded (rcu_nocbs) CPUs,
|
||||||
|
RCU reduces the lock contention that would
|
||||||
|
otherwise be caused by callback floods through
|
||||||
|
use of the ->nocb_bypass list. However, in the
|
||||||
|
common non-flooded case, RCU queues directly to
|
||||||
|
the main ->cblist in order to avoid the extra
|
||||||
|
overhead of the ->nocb_bypass list and its lock.
|
||||||
|
But if there are too many callbacks queued during
|
||||||
|
a single jiffy, RCU pre-queues the callbacks into
|
||||||
|
the ->nocb_bypass queue. The definition of "too
|
||||||
|
many" is supplied by this kernel boot parameter.
|
||||||
|
|
||||||
rcutree.rcu_nocb_gp_stride= [KNL]
|
rcutree.rcu_nocb_gp_stride= [KNL]
|
||||||
Set the number of NOCB callback kthreads in
|
Set the number of NOCB callback kthreads in
|
||||||
each group, which defaults to the square root
|
each group, which defaults to the square root
|
||||||
@@ -5209,20 +5274,33 @@
|
|||||||
Speculative Code Execution with Return Instructions)
|
Speculative Code Execution with Return Instructions)
|
||||||
vulnerability.
|
vulnerability.
|
||||||
|
|
||||||
|
AMD-based UNRET and IBPB mitigations alone do not stop
|
||||||
|
sibling threads from influencing the predictions of other
|
||||||
|
sibling threads. For that reason, STIBP is used on pro-
|
||||||
|
cessors that support it, and mitigate SMT on processors
|
||||||
|
that don't.
|
||||||
|
|
||||||
off - no mitigation
|
off - no mitigation
|
||||||
auto - automatically select a migitation
|
auto - automatically select a migitation
|
||||||
auto,nosmt - automatically select a mitigation,
|
auto,nosmt - automatically select a mitigation,
|
||||||
disabling SMT if necessary for
|
disabling SMT if necessary for
|
||||||
the full mitigation (only on Zen1
|
the full mitigation (only on Zen1
|
||||||
and older without STIBP).
|
and older without STIBP).
|
||||||
ibpb - mitigate short speculation windows on
|
ibpb - On AMD, mitigate short speculation
|
||||||
basic block boundaries too. Safe, highest
|
windows on basic block boundaries too.
|
||||||
perf impact.
|
Safe, highest perf impact. It also
|
||||||
unret - force enable untrained return thunks,
|
enables STIBP if present. Not suitable
|
||||||
only effective on AMD f15h-f17h
|
on Intel.
|
||||||
based systems.
|
ibpb,nosmt - Like "ibpb" above but will disable SMT
|
||||||
unret,nosmt - like unret, will disable SMT when STIBP
|
when STIBP is not available. This is
|
||||||
is not available.
|
the alternative for systems which do not
|
||||||
|
have STIBP.
|
||||||
|
unret - Force enable untrained return thunks,
|
||||||
|
only effective on AMD f15h-f17h based
|
||||||
|
systems.
|
||||||
|
unret,nosmt - Like unret, but will disable SMT when STIBP
|
||||||
|
is not available. This is the alternative for
|
||||||
|
systems which do not have STIBP.
|
||||||
|
|
||||||
Selecting 'auto' will choose a mitigation method at run
|
Selecting 'auto' will choose a mitigation method at run
|
||||||
time according to the CPU.
|
time according to the CPU.
|
||||||
@@ -5253,6 +5331,8 @@
|
|||||||
rodata= [KNL]
|
rodata= [KNL]
|
||||||
on Mark read-only kernel memory as read-only (default).
|
on Mark read-only kernel memory as read-only (default).
|
||||||
off Leave read-only kernel memory writable for debugging.
|
off Leave read-only kernel memory writable for debugging.
|
||||||
|
full Mark read-only kernel memory and aliases as read-only
|
||||||
|
[arm64]
|
||||||
|
|
||||||
rockchip.usb_uart
|
rockchip.usb_uart
|
||||||
Enable the uart passthrough on the designated usb port
|
Enable the uart passthrough on the designated usb port
|
||||||
@@ -5474,7 +5554,7 @@
|
|||||||
cache (risks via metadata attacks are mostly
|
cache (risks via metadata attacks are mostly
|
||||||
unchanged). Debug options disable merging on their
|
unchanged). Debug options disable merging on their
|
||||||
own.
|
own.
|
||||||
For more information see Documentation/vm/slub.rst.
|
For more information see Documentation/mm/slub.rst.
|
||||||
|
|
||||||
slab_max_order= [MM, SLAB]
|
slab_max_order= [MM, SLAB]
|
||||||
Determines the maximum allowed order for slabs.
|
Determines the maximum allowed order for slabs.
|
||||||
@@ -5488,13 +5568,13 @@
|
|||||||
slub_debug can create guard zones around objects and
|
slub_debug can create guard zones around objects and
|
||||||
may poison objects when not in use. Also tracks the
|
may poison objects when not in use. Also tracks the
|
||||||
last alloc / free. For more information see
|
last alloc / free. For more information see
|
||||||
Documentation/vm/slub.rst.
|
Documentation/mm/slub.rst.
|
||||||
|
|
||||||
slub_max_order= [MM, SLUB]
|
slub_max_order= [MM, SLUB]
|
||||||
Determines the maximum allowed order for slabs.
|
Determines the maximum allowed order for slabs.
|
||||||
A high setting may cause OOMs due to memory
|
A high setting may cause OOMs due to memory
|
||||||
fragmentation. For more information see
|
fragmentation. For more information see
|
||||||
Documentation/vm/slub.rst.
|
Documentation/mm/slub.rst.
|
||||||
|
|
||||||
slub_min_objects= [MM, SLUB]
|
slub_min_objects= [MM, SLUB]
|
||||||
The minimum number of objects per slab. SLUB will
|
The minimum number of objects per slab. SLUB will
|
||||||
@@ -5503,12 +5583,12 @@
|
|||||||
the number of objects indicated. The higher the number
|
the number of objects indicated. The higher the number
|
||||||
of objects the smaller the overhead of tracking slabs
|
of objects the smaller the overhead of tracking slabs
|
||||||
and the less frequently locks need to be acquired.
|
and the less frequently locks need to be acquired.
|
||||||
For more information see Documentation/vm/slub.rst.
|
For more information see Documentation/mm/slub.rst.
|
||||||
|
|
||||||
slub_min_order= [MM, SLUB]
|
slub_min_order= [MM, SLUB]
|
||||||
Determines the minimum page order for slabs. Must be
|
Determines the minimum page order for slabs. Must be
|
||||||
lower than slub_max_order.
|
lower than slub_max_order.
|
||||||
For more information see Documentation/vm/slub.rst.
|
For more information see Documentation/mm/slub.rst.
|
||||||
|
|
||||||
slub_merge [MM, SLUB]
|
slub_merge [MM, SLUB]
|
||||||
Same with slab_merge.
|
Same with slab_merge.
|
||||||
@@ -5955,8 +6035,11 @@
|
|||||||
it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
|
it if 0 is given (See Documentation/admin-guide/cgroup-v1/memory.rst)
|
||||||
|
|
||||||
swiotlb= [ARM,IA-64,PPC,MIPS,X86]
|
swiotlb= [ARM,IA-64,PPC,MIPS,X86]
|
||||||
Format: { <int> | force | noforce }
|
Format: { <int> [,<int>] | force | noforce }
|
||||||
<int> -- Number of I/O TLB slabs
|
<int> -- Number of I/O TLB slabs
|
||||||
|
<int> -- Second integer after comma. Number of swiotlb
|
||||||
|
areas with their own lock. Will be rounded up
|
||||||
|
to a power of 2.
|
||||||
force -- force using of bounce buffers even if they
|
force -- force using of bounce buffers even if they
|
||||||
wouldn't be automatically used by the kernel
|
wouldn't be automatically used by the kernel
|
||||||
noforce -- Never use bounce buffers (for debugging)
|
noforce -- Never use bounce buffers (for debugging)
|
||||||
|
|||||||
@@ -5,9 +5,13 @@ digraph board {
|
|||||||
n00000001 [label="{{} | Sensor A\n/dev/v4l-subdev0 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
|
n00000001 [label="{{} | Sensor A\n/dev/v4l-subdev0 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||||
n00000001:port0 -> n00000005:port0 [style=bold]
|
n00000001:port0 -> n00000005:port0 [style=bold]
|
||||||
n00000001:port0 -> n0000000b [style=bold]
|
n00000001:port0 -> n0000000b [style=bold]
|
||||||
|
n00000001 -> n00000002
|
||||||
|
n00000002 [label="{{} | Lens A\n/dev/v4l-subdev5 | {<port0>}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||||
n00000003 [label="{{} | Sensor B\n/dev/v4l-subdev1 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
|
n00000003 [label="{{} | Sensor B\n/dev/v4l-subdev1 | {<port0> 0}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||||
n00000003:port0 -> n00000008:port0 [style=bold]
|
n00000003:port0 -> n00000008:port0 [style=bold]
|
||||||
n00000003:port0 -> n0000000f [style=bold]
|
n00000003:port0 -> n0000000f [style=bold]
|
||||||
|
n00000003 -> n00000004
|
||||||
|
n00000004 [label="{{} | Lens B\n/dev/v4l-subdev6 | {<port0>}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||||
n00000005 [label="{{<port0> 0} | Debayer A\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
n00000005 [label="{{<port0> 0} | Debayer A\n/dev/v4l-subdev2 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||||
n00000005:port1 -> n00000015:port0
|
n00000005:port1 -> n00000015:port0
|
||||||
n00000008 [label="{{<port0> 0} | Debayer B\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
n00000008 [label="{{<port0> 0} | Debayer B\n/dev/v4l-subdev3 | {<port1> 1}}", shape=Mrecord, style=filled, fillcolor=green]
|
||||||
|
|||||||
@@ -53,6 +53,25 @@ vimc-sensor:
|
|||||||
|
|
||||||
* 1 Pad source
|
* 1 Pad source
|
||||||
|
|
||||||
|
vimc-lens:
|
||||||
|
Ancillary lens for a sensor. Supports auto focus control. Linked to
|
||||||
|
a vimc-sensor using an ancillary link. The lens supports FOCUS_ABSOLUTE
|
||||||
|
control.
|
||||||
|
|
||||||
|
.. code-block:: bash
|
||||||
|
|
||||||
|
media-ctl -p
|
||||||
|
...
|
||||||
|
- entity 28: Lens A (0 pad, 0 link)
|
||||||
|
type V4L2 subdev subtype Lens flags 0
|
||||||
|
device node name /dev/v4l-subdev6
|
||||||
|
- entity 29: Lens B (0 pad, 0 link)
|
||||||
|
type V4L2 subdev subtype Lens flags 0
|
||||||
|
device node name /dev/v4l-subdev7
|
||||||
|
v4l2-ctl -d /dev/v4l-subdev7 -C focus_absolute
|
||||||
|
focus_absolute: 0
|
||||||
|
|
||||||
|
|
||||||
vimc-debayer:
|
vimc-debayer:
|
||||||
Transforms images in bayer format into a non-bayer format.
|
Transforms images in bayer format into a non-bayer format.
|
||||||
Exposes:
|
Exposes:
|
||||||
|
|||||||
@@ -714,6 +714,20 @@ The Test Pattern Controls are all specific to video capture.
|
|||||||
|
|
||||||
does the same for the EAV (End of Active Video) code.
|
does the same for the EAV (End of Active Video) code.
|
||||||
|
|
||||||
|
- Insert Video Guard Band
|
||||||
|
|
||||||
|
adds 4 columns of pixels with the HDMI Video Guard Band code at the
|
||||||
|
left hand side of the image. This only works with 3 or 4 byte RGB pixel
|
||||||
|
formats. The RGB pixel value 0xab/0x55/0xab turns out to be equivalent
|
||||||
|
to the HDMI Video Guard Band code that precedes each active video line
|
||||||
|
(see section 5.2.2.1 in the HDMI 1.3 Specification). To test if a video
|
||||||
|
receiver has correct HDMI Video Guard Band processing, enable this
|
||||||
|
control and then move the image to the left hand side of the screen.
|
||||||
|
That will result in video lines that start with multiple pixels that
|
||||||
|
have the same value as the Video Guard Band that precedes them.
|
||||||
|
Receivers that will just keep skipping Video Guard Band values will
|
||||||
|
now fail and either loose sync or these video lines will shift.
|
||||||
|
|
||||||
|
|
||||||
Capture Feature Selection Controls
|
Capture Feature Selection Controls
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ processor. Each bank is referred to as a `node` and for each node Linux
|
|||||||
constructs an independent memory management subsystem. A node has its
|
constructs an independent memory management subsystem. A node has its
|
||||||
own set of zones, lists of free and used pages and various statistics
|
own set of zones, lists of free and used pages and various statistics
|
||||||
counters. You can find more details about NUMA in
|
counters. You can find more details about NUMA in
|
||||||
:ref:`Documentation/vm/numa.rst <numa>` and in
|
:ref:`Documentation/mm/numa.rst <numa>` and in
|
||||||
:ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`.
|
:ref:`Documentation/admin-guide/mm/numa_memory_policy.rst <numa_memory_policy>`.
|
||||||
|
|
||||||
Page cache
|
Page cache
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
Monitoring Data Accesses
|
Monitoring Data Accesses
|
||||||
========================
|
========================
|
||||||
|
|
||||||
:doc:`DAMON </vm/damon/index>` allows light-weight data access monitoring.
|
:doc:`DAMON </mm/damon/index>` allows light-weight data access monitoring.
|
||||||
Using DAMON, users can analyze the memory access patterns of their systems and
|
Using DAMON, users can analyze the memory access patterns of their systems and
|
||||||
optimize those.
|
optimize those.
|
||||||
|
|
||||||
@@ -14,3 +14,4 @@ optimize those.
|
|||||||
start
|
start
|
||||||
usage
|
usage
|
||||||
reclaim
|
reclaim
|
||||||
|
lru_sort
|
||||||
|
|||||||
294
Documentation/admin-guide/mm/damon/lru_sort.rst
Normal file
294
Documentation/admin-guide/mm/damon/lru_sort.rst
Normal file
@@ -0,0 +1,294 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
=============================
|
||||||
|
DAMON-based LRU-lists Sorting
|
||||||
|
=============================
|
||||||
|
|
||||||
|
DAMON-based LRU-lists Sorting (DAMON_LRU_SORT) is a static kernel module that
|
||||||
|
aimed to be used for proactive and lightweight data access pattern based
|
||||||
|
(de)prioritization of pages on their LRU-lists for making LRU-lists a more
|
||||||
|
trusworthy data access pattern source.
|
||||||
|
|
||||||
|
Where Proactive LRU-lists Sorting is Required?
|
||||||
|
==============================================
|
||||||
|
|
||||||
|
As page-granularity access checking overhead could be significant on huge
|
||||||
|
systems, LRU lists are normally not proactively sorted but partially and
|
||||||
|
reactively sorted for special events including specific user requests, system
|
||||||
|
calls and memory pressure. As a result, LRU lists are sometimes not so
|
||||||
|
perfectly prepared to be used as a trustworthy access pattern source for some
|
||||||
|
situations including reclamation target pages selection under sudden memory
|
||||||
|
pressure.
|
||||||
|
|
||||||
|
Because DAMON can identify access patterns of best-effort accuracy while
|
||||||
|
inducing only user-specified range of overhead, proactively running
|
||||||
|
DAMON_LRU_SORT could be helpful for making LRU lists more trustworthy access
|
||||||
|
pattern source with low and controlled overhead.
|
||||||
|
|
||||||
|
How It Works?
|
||||||
|
=============
|
||||||
|
|
||||||
|
DAMON_LRU_SORT finds hot pages (pages of memory regions that showing access
|
||||||
|
rates that higher than a user-specified threshold) and cold pages (pages of
|
||||||
|
memory regions that showing no access for a time that longer than a
|
||||||
|
user-specified threshold) using DAMON, and prioritizes hot pages while
|
||||||
|
deprioritizing cold pages on their LRU-lists. To avoid it consuming too much
|
||||||
|
CPU for the prioritizations, a CPU time usage limit can be configured. Under
|
||||||
|
the limit, it prioritizes and deprioritizes more hot and cold pages first,
|
||||||
|
respectively. System administrators can also configure under what situation
|
||||||
|
this scheme should automatically activated and deactivated with three memory
|
||||||
|
pressure watermarks.
|
||||||
|
|
||||||
|
Its default parameters for hotness/coldness thresholds and CPU quota limit are
|
||||||
|
conservatively chosen. That is, the module under its default parameters could
|
||||||
|
be widely used without harm for common situations while providing a level of
|
||||||
|
benefits for systems having clear hot/cold access patterns under memory
|
||||||
|
pressure while consuming only a limited small portion of CPU time.
|
||||||
|
|
||||||
|
Interface: Module Parameters
|
||||||
|
============================
|
||||||
|
|
||||||
|
To use this feature, you should first ensure your system is running on a kernel
|
||||||
|
that is built with ``CONFIG_DAMON_LRU_SORT=y``.
|
||||||
|
|
||||||
|
To let sysadmins enable or disable it and tune for the given system,
|
||||||
|
DAMON_LRU_SORT utilizes module parameters. That is, you can put
|
||||||
|
``damon_lru_sort.<parameter>=<value>`` on the kernel boot command line or write
|
||||||
|
proper values to ``/sys/modules/damon_lru_sort/parameters/<parameter>`` files.
|
||||||
|
|
||||||
|
Below are the description of each parameter.
|
||||||
|
|
||||||
|
enabled
|
||||||
|
-------
|
||||||
|
|
||||||
|
Enable or disable DAMON_LRU_SORT.
|
||||||
|
|
||||||
|
You can enable DAMON_LRU_SORT by setting the value of this parameter as ``Y``.
|
||||||
|
Setting it as ``N`` disables DAMON_LRU_SORT. Note that DAMON_LRU_SORT could do
|
||||||
|
no real monitoring and LRU-lists sorting due to the watermarks-based activation
|
||||||
|
condition. Refer to below descriptions for the watermarks parameter for this.
|
||||||
|
|
||||||
|
commit_inputs
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Make DAMON_LRU_SORT reads the input parameters again, except ``enabled``.
|
||||||
|
|
||||||
|
Input parameters that updated while DAMON_LRU_SORT is running are not applied
|
||||||
|
by default. Once this parameter is set as ``Y``, DAMON_LRU_SORT reads values
|
||||||
|
of parametrs except ``enabled`` again. Once the re-reading is done, this
|
||||||
|
parameter is set as ``N``. If invalid parameters are found while the
|
||||||
|
re-reading, DAMON_LRU_SORT will be disabled.
|
||||||
|
|
||||||
|
hot_thres_access_freq
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Access frequency threshold for hot memory regions identification in permil.
|
||||||
|
|
||||||
|
If a memory region is accessed in frequency of this or higher, DAMON_LRU_SORT
|
||||||
|
identifies the region as hot, and mark it as accessed on the LRU list, so that
|
||||||
|
it could not be reclaimed under memory pressure. 50% by default.
|
||||||
|
|
||||||
|
cold_min_age
|
||||||
|
------------
|
||||||
|
|
||||||
|
Time threshold for cold memory regions identification in microseconds.
|
||||||
|
|
||||||
|
If a memory region is not accessed for this or longer time, DAMON_LRU_SORT
|
||||||
|
identifies the region as cold, and mark it as unaccessed on the LRU list, so
|
||||||
|
that it could be reclaimed first under memory pressure. 120 seconds by
|
||||||
|
default.
|
||||||
|
|
||||||
|
quota_ms
|
||||||
|
--------
|
||||||
|
|
||||||
|
Limit of time for trying the LRU lists sorting in milliseconds.
|
||||||
|
|
||||||
|
DAMON_LRU_SORT tries to use only up to this time within a time window
|
||||||
|
(quota_reset_interval_ms) for trying LRU lists sorting. This can be used
|
||||||
|
for limiting CPU consumption of DAMON_LRU_SORT. If the value is zero, the
|
||||||
|
limit is disabled.
|
||||||
|
|
||||||
|
10 ms by default.
|
||||||
|
|
||||||
|
quota_reset_interval_ms
|
||||||
|
-----------------------
|
||||||
|
|
||||||
|
The time quota charge reset interval in milliseconds.
|
||||||
|
|
||||||
|
The charge reset interval for the quota of time (quota_ms). That is,
|
||||||
|
DAMON_LRU_SORT does not try LRU-lists sorting for more than quota_ms
|
||||||
|
milliseconds or quota_sz bytes within quota_reset_interval_ms milliseconds.
|
||||||
|
|
||||||
|
1 second by default.
|
||||||
|
|
||||||
|
wmarks_interval
|
||||||
|
---------------
|
||||||
|
|
||||||
|
The watermarks check time interval in microseconds.
|
||||||
|
|
||||||
|
Minimal time to wait before checking the watermarks, when DAMON_LRU_SORT is
|
||||||
|
enabled but inactive due to its watermarks rule. 5 seconds by default.
|
||||||
|
|
||||||
|
wmarks_high
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Free memory rate (per thousand) for the high watermark.
|
||||||
|
|
||||||
|
If free memory of the system in bytes per thousand bytes is higher than this,
|
||||||
|
DAMON_LRU_SORT becomes inactive, so it does nothing but periodically checks the
|
||||||
|
watermarks. 200 (20%) by default.
|
||||||
|
|
||||||
|
wmarks_mid
|
||||||
|
----------
|
||||||
|
|
||||||
|
Free memory rate (per thousand) for the middle watermark.
|
||||||
|
|
||||||
|
If free memory of the system in bytes per thousand bytes is between this and
|
||||||
|
the low watermark, DAMON_LRU_SORT becomes active, so starts the monitoring and
|
||||||
|
the LRU-lists sorting. 150 (15%) by default.
|
||||||
|
|
||||||
|
wmarks_low
|
||||||
|
----------
|
||||||
|
|
||||||
|
Free memory rate (per thousand) for the low watermark.
|
||||||
|
|
||||||
|
If free memory of the system in bytes per thousand bytes is lower than this,
|
||||||
|
DAMON_LRU_SORT becomes inactive, so it does nothing but periodically checks the
|
||||||
|
watermarks. 50 (5%) by default.
|
||||||
|
|
||||||
|
sample_interval
|
||||||
|
---------------
|
||||||
|
|
||||||
|
Sampling interval for the monitoring in microseconds.
|
||||||
|
|
||||||
|
The sampling interval of DAMON for the cold memory monitoring. Please refer to
|
||||||
|
the DAMON documentation (:doc:`usage`) for more detail. 5ms by default.
|
||||||
|
|
||||||
|
aggr_interval
|
||||||
|
-------------
|
||||||
|
|
||||||
|
Aggregation interval for the monitoring in microseconds.
|
||||||
|
|
||||||
|
The aggregation interval of DAMON for the cold memory monitoring. Please
|
||||||
|
refer to the DAMON documentation (:doc:`usage`) for more detail. 100ms by
|
||||||
|
default.
|
||||||
|
|
||||||
|
min_nr_regions
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Minimum number of monitoring regions.
|
||||||
|
|
||||||
|
The minimal number of monitoring regions of DAMON for the cold memory
|
||||||
|
monitoring. This can be used to set lower-bound of the monitoring quality.
|
||||||
|
But, setting this too high could result in increased monitoring overhead.
|
||||||
|
Please refer to the DAMON documentation (:doc:`usage`) for more detail. 10 by
|
||||||
|
default.
|
||||||
|
|
||||||
|
max_nr_regions
|
||||||
|
--------------
|
||||||
|
|
||||||
|
Maximum number of monitoring regions.
|
||||||
|
|
||||||
|
The maximum number of monitoring regions of DAMON for the cold memory
|
||||||
|
monitoring. This can be used to set upper-bound of the monitoring overhead.
|
||||||
|
However, setting this too low could result in bad monitoring quality. Please
|
||||||
|
refer to the DAMON documentation (:doc:`usage`) for more detail. 1000 by
|
||||||
|
defaults.
|
||||||
|
|
||||||
|
monitor_region_start
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Start of target memory region in physical address.
|
||||||
|
|
||||||
|
The start physical address of memory region that DAMON_LRU_SORT will do work
|
||||||
|
against. By default, biggest System RAM is used as the region.
|
||||||
|
|
||||||
|
monitor_region_end
|
||||||
|
------------------
|
||||||
|
|
||||||
|
End of target memory region in physical address.
|
||||||
|
|
||||||
|
The end physical address of memory region that DAMON_LRU_SORT will do work
|
||||||
|
against. By default, biggest System RAM is used as the region.
|
||||||
|
|
||||||
|
kdamond_pid
|
||||||
|
-----------
|
||||||
|
|
||||||
|
PID of the DAMON thread.
|
||||||
|
|
||||||
|
If DAMON_LRU_SORT is enabled, this becomes the PID of the worker thread. Else,
|
||||||
|
-1.
|
||||||
|
|
||||||
|
nr_lru_sort_tried_hot_regions
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Number of hot memory regions that tried to be LRU-sorted.
|
||||||
|
|
||||||
|
bytes_lru_sort_tried_hot_regions
|
||||||
|
--------------------------------
|
||||||
|
|
||||||
|
Total bytes of hot memory regions that tried to be LRU-sorted.
|
||||||
|
|
||||||
|
nr_lru_sorted_hot_regions
|
||||||
|
-------------------------
|
||||||
|
|
||||||
|
Number of hot memory regions that successfully be LRU-sorted.
|
||||||
|
|
||||||
|
bytes_lru_sorted_hot_regions
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
Total bytes of hot memory regions that successfully be LRU-sorted.
|
||||||
|
|
||||||
|
nr_hot_quota_exceeds
|
||||||
|
--------------------
|
||||||
|
|
||||||
|
Number of times that the time quota limit for hot regions have exceeded.
|
||||||
|
|
||||||
|
nr_lru_sort_tried_cold_regions
|
||||||
|
------------------------------
|
||||||
|
|
||||||
|
Number of cold memory regions that tried to be LRU-sorted.
|
||||||
|
|
||||||
|
bytes_lru_sort_tried_cold_regions
|
||||||
|
---------------------------------
|
||||||
|
|
||||||
|
Total bytes of cold memory regions that tried to be LRU-sorted.
|
||||||
|
|
||||||
|
nr_lru_sorted_cold_regions
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Number of cold memory regions that successfully be LRU-sorted.
|
||||||
|
|
||||||
|
bytes_lru_sorted_cold_regions
|
||||||
|
-----------------------------
|
||||||
|
|
||||||
|
Total bytes of cold memory regions that successfully be LRU-sorted.
|
||||||
|
|
||||||
|
nr_cold_quota_exceeds
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
Number of times that the time quota limit for cold regions have exceeded.
|
||||||
|
|
||||||
|
Example
|
||||||
|
=======
|
||||||
|
|
||||||
|
Below runtime example commands make DAMON_LRU_SORT to find memory regions
|
||||||
|
having >=50% access frequency and LRU-prioritize while LRU-deprioritizing
|
||||||
|
memory regions that not accessed for 120 seconds. The prioritization and
|
||||||
|
deprioritization is limited to be done using only up to 1% CPU time to avoid
|
||||||
|
DAMON_LRU_SORT consuming too much CPU time for the (de)prioritization. It also
|
||||||
|
asks DAMON_LRU_SORT to do nothing if the system's free memory rate is more than
|
||||||
|
50%, but start the real works if it becomes lower than 40%. If DAMON_RECLAIM
|
||||||
|
doesn't make progress and therefore the free memory rate becomes lower than
|
||||||
|
20%, it asks DAMON_LRU_SORT to do nothing again, so that we can fall back to
|
||||||
|
the LRU-list based page granularity reclamation. ::
|
||||||
|
|
||||||
|
# cd /sys/modules/damon_lru_sort/parameters
|
||||||
|
# echo 500 > hot_thres_access_freq
|
||||||
|
# echo 120000000 > cold_min_age
|
||||||
|
# echo 10 > quota_ms
|
||||||
|
# echo 1000 > quota_reset_interval_ms
|
||||||
|
# echo 500 > wmarks_high
|
||||||
|
# echo 400 > wmarks_mid
|
||||||
|
# echo 200 > wmarks_low
|
||||||
|
# echo Y > enabled
|
||||||
@@ -48,12 +48,6 @@ DAMON_RECLAIM utilizes module parameters. That is, you can put
|
|||||||
``damon_reclaim.<parameter>=<value>`` on the kernel boot command line or write
|
``damon_reclaim.<parameter>=<value>`` on the kernel boot command line or write
|
||||||
proper values to ``/sys/modules/damon_reclaim/parameters/<parameter>`` files.
|
proper values to ``/sys/modules/damon_reclaim/parameters/<parameter>`` files.
|
||||||
|
|
||||||
Note that the parameter values except ``enabled`` are applied only when
|
|
||||||
DAMON_RECLAIM starts. Therefore, if you want to apply new parameter values in
|
|
||||||
runtime and DAMON_RECLAIM is already enabled, you should disable and re-enable
|
|
||||||
it via ``enabled`` parameter file. Writing of the new values to proper
|
|
||||||
parameter values should be done before the re-enablement.
|
|
||||||
|
|
||||||
Below are the description of each parameter.
|
Below are the description of each parameter.
|
||||||
|
|
||||||
enabled
|
enabled
|
||||||
@@ -268,4 +262,4 @@ granularity reclamation. ::
|
|||||||
|
|
||||||
.. [1] https://research.google/pubs/pub48551/
|
.. [1] https://research.google/pubs/pub48551/
|
||||||
.. [2] https://lwn.net/Articles/787611/
|
.. [2] https://lwn.net/Articles/787611/
|
||||||
.. [3] https://www.kernel.org/doc/html/latest/vm/free_page_reporting.html
|
.. [3] https://www.kernel.org/doc/html/latest/mm/free_page_reporting.html
|
||||||
|
|||||||
@@ -30,11 +30,11 @@ DAMON provides below interfaces for different users.
|
|||||||
<sysfs_interface>`. This will be removed after next LTS kernel is released,
|
<sysfs_interface>`. This will be removed after next LTS kernel is released,
|
||||||
so users should move to the :ref:`sysfs interface <sysfs_interface>`.
|
so users should move to the :ref:`sysfs interface <sysfs_interface>`.
|
||||||
- *Kernel Space Programming Interface.*
|
- *Kernel Space Programming Interface.*
|
||||||
:doc:`This </vm/damon/api>` is for kernel space programmers. Using this,
|
:doc:`This </mm/damon/api>` is for kernel space programmers. Using this,
|
||||||
users can utilize every feature of DAMON most flexibly and efficiently by
|
users can utilize every feature of DAMON most flexibly and efficiently by
|
||||||
writing kernel space DAMON application programs for you. You can even extend
|
writing kernel space DAMON application programs for you. You can even extend
|
||||||
DAMON for various address spaces. For detail, please refer to the interface
|
DAMON for various address spaces. For detail, please refer to the interface
|
||||||
:doc:`document </vm/damon/api>`.
|
:doc:`document </mm/damon/api>`.
|
||||||
|
|
||||||
.. _sysfs_interface:
|
.. _sysfs_interface:
|
||||||
|
|
||||||
@@ -50,10 +50,10 @@ For a short example, users can monitor the virtual address space of a given
|
|||||||
workload as below. ::
|
workload as below. ::
|
||||||
|
|
||||||
# cd /sys/kernel/mm/damon/admin/
|
# cd /sys/kernel/mm/damon/admin/
|
||||||
# echo 1 > kdamonds/nr && echo 1 > kdamonds/0/contexts/nr
|
# echo 1 > kdamonds/nr_kdamonds && echo 1 > kdamonds/0/contexts/nr_contexts
|
||||||
# echo vaddr > kdamonds/0/contexts/0/operations
|
# echo vaddr > kdamonds/0/contexts/0/operations
|
||||||
# echo 1 > kdamonds/0/contexts/0/targets/nr
|
# echo 1 > kdamonds/0/contexts/0/targets/nr_targets
|
||||||
# echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid
|
# echo $(pidof <workload>) > kdamonds/0/contexts/0/targets/0/pid_target
|
||||||
# echo on > kdamonds/0/state
|
# echo on > kdamonds/0/state
|
||||||
|
|
||||||
Files Hierarchy
|
Files Hierarchy
|
||||||
@@ -185,7 +185,7 @@ controls the monitoring overhead, exist. You can set and get the values by
|
|||||||
writing to and rading from the files.
|
writing to and rading from the files.
|
||||||
|
|
||||||
For more details about the intervals and monitoring regions range, please refer
|
For more details about the intervals and monitoring regions range, please refer
|
||||||
to the Design document (:doc:`/vm/damon/design`).
|
to the Design document (:doc:`/mm/damon/design`).
|
||||||
|
|
||||||
contexts/<N>/targets/
|
contexts/<N>/targets/
|
||||||
---------------------
|
---------------------
|
||||||
@@ -264,6 +264,8 @@ that can be written to and read from the file and their meaning are as below.
|
|||||||
- ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
|
- ``pageout``: Call ``madvise()`` for the region with ``MADV_PAGEOUT``
|
||||||
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
|
- ``hugepage``: Call ``madvise()`` for the region with ``MADV_HUGEPAGE``
|
||||||
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
- ``nohugepage``: Call ``madvise()`` for the region with ``MADV_NOHUGEPAGE``
|
||||||
|
- ``lru_prio``: Prioritize the region on its LRU lists.
|
||||||
|
- ``lru_deprio``: Deprioritize the region on its LRU lists.
|
||||||
- ``stat``: Do nothing but count the statistics
|
- ``stat``: Do nothing but count the statistics
|
||||||
|
|
||||||
schemes/<N>/access_pattern/
|
schemes/<N>/access_pattern/
|
||||||
@@ -364,12 +366,12 @@ memory rate becomes larger than 60%, or lower than 30%". ::
|
|||||||
# echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
|
# echo 1 > kdamonds/0/contexts/0/schemes/nr_schemes
|
||||||
# cd kdamonds/0/contexts/0/schemes/0
|
# cd kdamonds/0/contexts/0/schemes/0
|
||||||
# # set the basic access pattern and the action
|
# # set the basic access pattern and the action
|
||||||
# echo 4096 > access_patterns/sz/min
|
# echo 4096 > access_pattern/sz/min
|
||||||
# echo 8192 > access_patterns/sz/max
|
# echo 8192 > access_pattern/sz/max
|
||||||
# echo 0 > access_patterns/nr_accesses/min
|
# echo 0 > access_pattern/nr_accesses/min
|
||||||
# echo 5 > access_patterns/nr_accesses/max
|
# echo 5 > access_pattern/nr_accesses/max
|
||||||
# echo 10 > access_patterns/age/min
|
# echo 10 > access_pattern/age/min
|
||||||
# echo 20 > access_patterns/age/max
|
# echo 20 > access_pattern/age/max
|
||||||
# echo pageout > action
|
# echo pageout > action
|
||||||
# # set quotas
|
# # set quotas
|
||||||
# echo 10 > quotas/ms
|
# echo 10 > quotas/ms
|
||||||
@@ -402,7 +404,7 @@ Attributes
|
|||||||
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
Users can get and set the ``sampling interval``, ``aggregation interval``,
|
||||||
``update interval``, and min/max number of monitoring target regions by
|
``update interval``, and min/max number of monitoring target regions by
|
||||||
reading from and writing to the ``attrs`` file. To know about the monitoring
|
reading from and writing to the ``attrs`` file. To know about the monitoring
|
||||||
attributes in detail, please refer to the :doc:`/vm/damon/design`. For
|
attributes in detail, please refer to the :doc:`/mm/damon/design`. For
|
||||||
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
example, below commands set those values to 5 ms, 100 ms, 1,000 ms, 10 and
|
||||||
1000, and then check it again::
|
1000, and then check it again::
|
||||||
|
|
||||||
|
|||||||
@@ -164,8 +164,8 @@ default_hugepagesz
|
|||||||
will all result in 256 2M huge pages being allocated. Valid default
|
will all result in 256 2M huge pages being allocated. Valid default
|
||||||
huge page size is architecture dependent.
|
huge page size is architecture dependent.
|
||||||
hugetlb_free_vmemmap
|
hugetlb_free_vmemmap
|
||||||
When CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is set, this enables optimizing
|
When CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP is set, this enables HugeTLB
|
||||||
unused vmemmap pages associated with each HugeTLB page.
|
Vmemmap Optimization (HVO).
|
||||||
|
|
||||||
When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
|
When multiple huge page sizes are supported, ``/proc/sys/vm/nr_hugepages``
|
||||||
indicates the current number of pre-allocated huge pages of the default size.
|
indicates the current number of pre-allocated huge pages of the default size.
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ the Linux memory management.
|
|||||||
numa_memory_policy
|
numa_memory_policy
|
||||||
numaperf
|
numaperf
|
||||||
pagemap
|
pagemap
|
||||||
|
shrinker_debugfs
|
||||||
soft-dirty
|
soft-dirty
|
||||||
swap_numa
|
swap_numa
|
||||||
transhuge
|
transhuge
|
||||||
|
|||||||
@@ -653,8 +653,8 @@ block might fail:
|
|||||||
- Concurrent activity that operates on the same physical memory area, such as
|
- Concurrent activity that operates on the same physical memory area, such as
|
||||||
allocating gigantic pages, can result in temporary offlining failures.
|
allocating gigantic pages, can result in temporary offlining failures.
|
||||||
|
|
||||||
- Out of memory when dissolving huge pages, especially when freeing unused
|
- Out of memory when dissolving huge pages, especially when HugeTLB Vmemmap
|
||||||
vmemmap pages associated with each hugetlb page is enabled.
|
Optimization (HVO) is enabled.
|
||||||
|
|
||||||
Offlining code may be able to migrate huge page contents, but may not be able
|
Offlining code may be able to migrate huge page contents, but may not be able
|
||||||
to dissolve the source huge page because it fails allocating (unmovable) pages
|
to dissolve the source huge page because it fails allocating (unmovable) pages
|
||||||
|
|||||||
135
Documentation/admin-guide/mm/shrinker_debugfs.rst
Normal file
135
Documentation/admin-guide/mm/shrinker_debugfs.rst
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
.. _shrinker_debugfs:
|
||||||
|
|
||||||
|
==========================
|
||||||
|
Shrinker Debugfs Interface
|
||||||
|
==========================
|
||||||
|
|
||||||
|
Shrinker debugfs interface provides a visibility into the kernel memory
|
||||||
|
shrinkers subsystem and allows to get information about individual shrinkers
|
||||||
|
and interact with them.
|
||||||
|
|
||||||
|
For each shrinker registered in the system a directory in **<debugfs>/shrinker/**
|
||||||
|
is created. The directory's name is composed from the shrinker's name and an
|
||||||
|
unique id: e.g. *kfree_rcu-0* or *sb-xfs:vda1-36*.
|
||||||
|
|
||||||
|
Each shrinker directory contains **count** and **scan** files, which allow to
|
||||||
|
trigger *count_objects()* and *scan_objects()* callbacks for each memcg and
|
||||||
|
numa node (if applicable).
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
------
|
||||||
|
|
||||||
|
1. *List registered shrinkers*
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
$ cd /sys/kernel/debug/shrinker/
|
||||||
|
$ ls
|
||||||
|
dquota-cache-16 sb-devpts-28 sb-proc-47 sb-tmpfs-42
|
||||||
|
mm-shadow-18 sb-devtmpfs-5 sb-proc-48 sb-tmpfs-43
|
||||||
|
mm-zspool:zram0-34 sb-hugetlbfs-17 sb-pstore-31 sb-tmpfs-44
|
||||||
|
rcu-kfree-0 sb-hugetlbfs-33 sb-rootfs-2 sb-tmpfs-49
|
||||||
|
sb-aio-20 sb-iomem-12 sb-securityfs-6 sb-tracefs-13
|
||||||
|
sb-anon_inodefs-15 sb-mqueue-21 sb-selinuxfs-22 sb-xfs:vda1-36
|
||||||
|
sb-bdev-3 sb-nsfs-4 sb-sockfs-8 sb-zsmalloc-19
|
||||||
|
sb-bpf-32 sb-pipefs-14 sb-sysfs-26 thp-deferred_split-10
|
||||||
|
sb-btrfs:vda2-24 sb-proc-25 sb-tmpfs-1 thp-zero-9
|
||||||
|
sb-cgroup2-30 sb-proc-39 sb-tmpfs-27 xfs-buf:vda1-37
|
||||||
|
sb-configfs-23 sb-proc-41 sb-tmpfs-29 xfs-inodegc:vda1-38
|
||||||
|
sb-dax-11 sb-proc-45 sb-tmpfs-35
|
||||||
|
sb-debugfs-7 sb-proc-46 sb-tmpfs-40
|
||||||
|
|
||||||
|
2. *Get information about a specific shrinker*
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
$ cd sb-btrfs\:vda2-24/
|
||||||
|
$ ls
|
||||||
|
count scan
|
||||||
|
|
||||||
|
3. *Count objects*
|
||||||
|
|
||||||
|
Each line in the output has the following format::
|
||||||
|
|
||||||
|
<cgroup inode id> <nr of objects on node 0> <nr of objects on node 1> ...
|
||||||
|
<cgroup inode id> <nr of objects on node 0> <nr of objects on node 1> ...
|
||||||
|
...
|
||||||
|
|
||||||
|
If there are no objects on all numa nodes, a line is omitted. If there
|
||||||
|
are no objects at all, the output might be empty.
|
||||||
|
|
||||||
|
If the shrinker is not memcg-aware or CONFIG_MEMCG is off, 0 is printed
|
||||||
|
as cgroup inode id. If the shrinker is not numa-aware, 0's are printed
|
||||||
|
for all nodes except the first one.
|
||||||
|
::
|
||||||
|
|
||||||
|
$ cat count
|
||||||
|
1 224 2
|
||||||
|
21 98 0
|
||||||
|
55 818 10
|
||||||
|
2367 2 0
|
||||||
|
2401 30 0
|
||||||
|
225 13 0
|
||||||
|
599 35 0
|
||||||
|
939 124 0
|
||||||
|
1041 3 0
|
||||||
|
1075 1 0
|
||||||
|
1109 1 0
|
||||||
|
1279 60 0
|
||||||
|
1313 7 0
|
||||||
|
1347 39 0
|
||||||
|
1381 3 0
|
||||||
|
1449 14 0
|
||||||
|
1483 63 0
|
||||||
|
1517 53 0
|
||||||
|
1551 6 0
|
||||||
|
1585 1 0
|
||||||
|
1619 6 0
|
||||||
|
1653 40 0
|
||||||
|
1687 11 0
|
||||||
|
1721 8 0
|
||||||
|
1755 4 0
|
||||||
|
1789 52 0
|
||||||
|
1823 888 0
|
||||||
|
1857 1 0
|
||||||
|
1925 2 0
|
||||||
|
1959 32 0
|
||||||
|
2027 22 0
|
||||||
|
2061 9 0
|
||||||
|
2469 799 0
|
||||||
|
2537 861 0
|
||||||
|
2639 1 0
|
||||||
|
2707 70 0
|
||||||
|
2775 4 0
|
||||||
|
2877 84 0
|
||||||
|
293 1 0
|
||||||
|
735 8 0
|
||||||
|
|
||||||
|
4. *Scan objects*
|
||||||
|
|
||||||
|
The expected input format::
|
||||||
|
|
||||||
|
<cgroup inode id> <numa id> <number of objects to scan>
|
||||||
|
|
||||||
|
For a non-memcg-aware shrinker or on a system with no memory
|
||||||
|
cgrups **0** should be passed as cgroup id.
|
||||||
|
::
|
||||||
|
|
||||||
|
$ cd /sys/kernel/debug/shrinker/
|
||||||
|
$ cd sb-btrfs\:vda2-24/
|
||||||
|
|
||||||
|
$ cat count | head -n 5
|
||||||
|
1 212 0
|
||||||
|
21 97 0
|
||||||
|
55 802 5
|
||||||
|
2367 2 0
|
||||||
|
225 13 0
|
||||||
|
|
||||||
|
$ echo "55 0 200" > scan
|
||||||
|
|
||||||
|
$ cat count | head -n 5
|
||||||
|
1 212 0
|
||||||
|
21 96 0
|
||||||
|
55 752 5
|
||||||
|
2367 2 0
|
||||||
|
225 13 0
|
||||||
@@ -38,8 +38,8 @@ acct
|
|||||||
|
|
||||||
If BSD-style process accounting is enabled these values control
|
If BSD-style process accounting is enabled these values control
|
||||||
its behaviour. If free space on filesystem where the log lives
|
its behaviour. If free space on filesystem where the log lives
|
||||||
goes below ``lowwater``% accounting suspends. If free space gets
|
goes below ``lowwater``\ % accounting suspends. If free space gets
|
||||||
above ``highwater``% accounting resumes. ``frequency`` determines
|
above ``highwater``\ % accounting resumes. ``frequency`` determines
|
||||||
how often do we check the amount of free space (value is in
|
how often do we check the amount of free space (value is in
|
||||||
seconds). Default:
|
seconds). Default:
|
||||||
|
|
||||||
@@ -592,6 +592,18 @@ to the guest kernel command line (see
|
|||||||
Documentation/admin-guide/kernel-parameters.rst).
|
Documentation/admin-guide/kernel-parameters.rst).
|
||||||
|
|
||||||
|
|
||||||
|
nmi_wd_lpm_factor (PPC only)
|
||||||
|
============================
|
||||||
|
|
||||||
|
Factor to apply to the NMI watchdog timeout (only when ``nmi_watchdog`` is
|
||||||
|
set to 1). This factor represents the percentage added to
|
||||||
|
``watchdog_thresh`` when calculating the NMI watchdog timeout during an
|
||||||
|
LPM. The soft lockup timeout is not impacted.
|
||||||
|
|
||||||
|
A value of 0 means no change. The default value is 200 meaning the NMI
|
||||||
|
watchdog is set to 30s (based on ``watchdog_thresh`` equal to 10).
|
||||||
|
|
||||||
|
|
||||||
numa_balancing
|
numa_balancing
|
||||||
==============
|
==============
|
||||||
|
|
||||||
|
|||||||
@@ -391,6 +391,18 @@ GRO has decided not to coalesce, it is placed on a per-NAPI list. This
|
|||||||
list is then passed to the stack when the number of segments reaches the
|
list is then passed to the stack when the number of segments reaches the
|
||||||
gro_normal_batch limit.
|
gro_normal_batch limit.
|
||||||
|
|
||||||
|
high_order_alloc_disable
|
||||||
|
------------------------
|
||||||
|
|
||||||
|
By default the allocator for page frags tries to use high order pages (order-3
|
||||||
|
on x86). While the default behavior gives good results in most cases, some users
|
||||||
|
might have hit a contention in page allocations/freeing. This was especially
|
||||||
|
true on older kernels (< 5.14) when high-order pages were not stored on per-cpu
|
||||||
|
lists. This allows to opt-in for order-0 allocation instead but is now mostly of
|
||||||
|
historical importance.
|
||||||
|
|
||||||
|
Default: 0
|
||||||
|
|
||||||
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
2. /proc/sys/net/unix - Parameters for Unix domain sockets
|
||||||
----------------------------------------------------------
|
----------------------------------------------------------
|
||||||
|
|
||||||
|
|||||||
@@ -565,13 +565,11 @@ See Documentation/admin-guide/mm/hugetlbpage.rst
|
|||||||
hugetlb_optimize_vmemmap
|
hugetlb_optimize_vmemmap
|
||||||
========================
|
========================
|
||||||
|
|
||||||
This knob is not available when memory_hotplug.memmap_on_memory (kernel parameter)
|
This knob is not available when the size of 'struct page' (a structure defined
|
||||||
is configured or the size of 'struct page' (a structure defined in
|
in include/linux/mm_types.h) is not power of two (an unusual system config could
|
||||||
include/linux/mm_types.h) is not power of two (an unusual system config could
|
|
||||||
result in this).
|
result in this).
|
||||||
|
|
||||||
Enable (set to 1) or disable (set to 0) the feature of optimizing vmemmap pages
|
Enable (set to 1) or disable (set to 0) HugeTLB Vmemmap Optimization (HVO).
|
||||||
associated with each HugeTLB page.
|
|
||||||
|
|
||||||
Once enabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
|
Once enabled, the vmemmap pages of subsequent allocation of HugeTLB pages from
|
||||||
buddy allocator will be optimized (7 pages per 2MB HugeTLB page and 4095 pages
|
buddy allocator will be optimized (7 pages per 2MB HugeTLB page and 4095 pages
|
||||||
@@ -760,7 +758,7 @@ and don't use much of it.
|
|||||||
|
|
||||||
The default value is 0.
|
The default value is 0.
|
||||||
|
|
||||||
See Documentation/vm/overcommit-accounting.rst and
|
See Documentation/mm/overcommit-accounting.rst and
|
||||||
mm/util.c::__vm_enough_memory() for more information.
|
mm/util.c::__vm_enough_memory() for more information.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -100,6 +100,7 @@ Bit Log Number Reason that got the kernel tainted
|
|||||||
15 _/K 32768 kernel has been live patched
|
15 _/K 32768 kernel has been live patched
|
||||||
16 _/X 65536 auxiliary taint, defined for and used by distros
|
16 _/X 65536 auxiliary taint, defined for and used by distros
|
||||||
17 _/T 131072 kernel was built with the struct randomization plugin
|
17 _/T 131072 kernel was built with the struct randomization plugin
|
||||||
|
18 _/N 262144 an in-kernel test has been run
|
||||||
=== === ====== ========================================================
|
=== === ====== ========================================================
|
||||||
|
|
||||||
Note: The character ``_`` is representing a blank in this table to make reading
|
Note: The character ``_`` is representing a blank in this table to make reading
|
||||||
|
|||||||
69
Documentation/arm/google/chromebook-boot-flow.rst
Normal file
69
Documentation/arm/google/chromebook-boot-flow.rst
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
======================================
|
||||||
|
Chromebook Boot Flow
|
||||||
|
======================================
|
||||||
|
|
||||||
|
Most recent Chromebooks that use device tree are using the opensource
|
||||||
|
depthcharge_ bootloader. Depthcharge_ expects the OS to be packaged as a `FIT
|
||||||
|
Image`_ which contains an OS image as well as a collection of device trees. It
|
||||||
|
is up to depthcharge_ to pick the right device tree from the `FIT Image`_ and
|
||||||
|
provide it to the OS.
|
||||||
|
|
||||||
|
The scheme that depthcharge_ uses to pick the device tree takes into account
|
||||||
|
three variables:
|
||||||
|
|
||||||
|
- Board name, specified at depthcharge_ compile time. This is $(BOARD) below.
|
||||||
|
- Board revision number, determined at runtime (perhaps by reading GPIO
|
||||||
|
strappings, perhaps via some other method). This is $(REV) below.
|
||||||
|
- SKU number, read from GPIO strappings at boot time. This is $(SKU) below.
|
||||||
|
|
||||||
|
For recent Chromebooks, depthcharge_ creates a match list that looks like this:
|
||||||
|
|
||||||
|
- google,$(BOARD)-rev$(REV)-sku$(SKU)
|
||||||
|
- google,$(BOARD)-rev$(REV)
|
||||||
|
- google,$(BOARD)-sku$(SKU)
|
||||||
|
- google,$(BOARD)
|
||||||
|
|
||||||
|
Note that some older Chromebooks use a slightly different list that may
|
||||||
|
not include SKU matching or may prioritize SKU/rev differently.
|
||||||
|
|
||||||
|
Note that for some boards there may be extra board-specific logic to inject
|
||||||
|
extra compatibles into the list, but this is uncommon.
|
||||||
|
|
||||||
|
Depthcharge_ will look through all device trees in the `FIT Image`_ trying to
|
||||||
|
find one that matches the most specific compatible. It will then look
|
||||||
|
through all device trees in the `FIT Image`_ trying to find the one that
|
||||||
|
matches the *second most* specific compatible, etc.
|
||||||
|
|
||||||
|
When searching for a device tree, depthcharge_ doesn't care where the
|
||||||
|
compatible string falls within a device tree's root compatible string array.
|
||||||
|
As an example, if we're on board "lazor", rev 4, SKU 0 and we have two device
|
||||||
|
trees:
|
||||||
|
|
||||||
|
- "google,lazor-rev5-sku0", "google,lazor-rev4-sku0", "qcom,sc7180"
|
||||||
|
- "google,lazor", "qcom,sc7180"
|
||||||
|
|
||||||
|
Then depthcharge_ will pick the first device tree even though
|
||||||
|
"google,lazor-rev4-sku0" was the second compatible listed in that device tree.
|
||||||
|
This is because it is a more specific compatible than "google,lazor".
|
||||||
|
|
||||||
|
It should be noted that depthcharge_ does not have any smarts to try to
|
||||||
|
match board or SKU revisions that are "close by". That is to say that
|
||||||
|
if depthcharge_ knows it's on "rev4" of a board but there is no "rev4"
|
||||||
|
device tree then depthcharge_ *won't* look for a "rev3" device tree.
|
||||||
|
|
||||||
|
In general when any significant changes are made to a board the board
|
||||||
|
revision number is increased even if none of those changes need to
|
||||||
|
be reflected in the device tree. Thus it's fairly common to see device
|
||||||
|
trees with multiple revisions.
|
||||||
|
|
||||||
|
It should be noted that, taking into account the above system that
|
||||||
|
depthcharge_ has, the most flexibility is achieved if the device tree
|
||||||
|
supporting the newest revision(s) of a board omits the "-rev{REV}"
|
||||||
|
compatible strings. When this is done then if you get a new board
|
||||||
|
revision and try to run old software on it then we'll at pick the
|
||||||
|
newest device tree we know about.
|
||||||
|
|
||||||
|
.. _depthcharge: https://source.chromium.org/chromiumos/chromiumos/codesearch/+/main:src/platform/depthcharge/
|
||||||
|
.. _`FIT Image`: https://doc.coreboot.org/lib/payloads/fit.html
|
||||||
@@ -31,6 +31,8 @@ SoC-specific documents
|
|||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
|
google/chromebook-boot-flow
|
||||||
|
|
||||||
ixp4xx
|
ixp4xx
|
||||||
|
|
||||||
marvell
|
marvell
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
|
||||||
=======================
|
=======================
|
||||||
S3C24XX CPUfreq support
|
S3C24XX CPUfreq support
|
||||||
=======================
|
=======================
|
||||||
@@ -73,4 +75,3 @@ Document Author
|
|||||||
---------------
|
---------------
|
||||||
|
|
||||||
Ben Dooks, Copyright 2009 Simtec Electronics
|
Ben Dooks, Copyright 2009 Simtec Electronics
|
||||||
Licensed under GPLv2
|
|
||||||
|
|||||||
@@ -171,138 +171,105 @@ HWCAP_PACG
|
|||||||
Documentation/arm64/pointer-authentication.rst.
|
Documentation/arm64/pointer-authentication.rst.
|
||||||
|
|
||||||
HWCAP2_DCPODP
|
HWCAP2_DCPODP
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
|
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
|
||||||
|
|
||||||
HWCAP2_SVE2
|
HWCAP2_SVE2
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVEAES
|
HWCAP2_SVEAES
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVEPMULL
|
HWCAP2_SVEPMULL
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
|
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
|
||||||
|
|
||||||
HWCAP2_SVEBITPERM
|
HWCAP2_SVEBITPERM
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVESHA3
|
HWCAP2_SVESHA3
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVESM4
|
HWCAP2_SVESM4
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
|
||||||
|
|
||||||
HWCAP2_FLAGM2
|
HWCAP2_FLAGM2
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
|
Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010.
|
||||||
|
|
||||||
HWCAP2_FRINT
|
HWCAP2_FRINT
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
|
Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVEI8MM
|
HWCAP2_SVEI8MM
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.I8MM == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVEF32MM
|
HWCAP2_SVEF32MM
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.F32MM == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVEF64MM
|
HWCAP2_SVEF64MM
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.F64MM == 0b0001.
|
||||||
|
|
||||||
HWCAP2_SVEBF16
|
HWCAP2_SVEBF16
|
||||||
|
|
||||||
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
|
Functionality implied by ID_AA64ZFR0_EL1.BF16 == 0b0001.
|
||||||
|
|
||||||
HWCAP2_I8MM
|
HWCAP2_I8MM
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
|
Functionality implied by ID_AA64ISAR1_EL1.I8MM == 0b0001.
|
||||||
|
|
||||||
HWCAP2_BF16
|
HWCAP2_BF16
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0001.
|
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0001.
|
||||||
|
|
||||||
HWCAP2_DGH
|
HWCAP2_DGH
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR1_EL1.DGH == 0b0001.
|
Functionality implied by ID_AA64ISAR1_EL1.DGH == 0b0001.
|
||||||
|
|
||||||
HWCAP2_RNG
|
HWCAP2_RNG
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001.
|
Functionality implied by ID_AA64ISAR0_EL1.RNDR == 0b0001.
|
||||||
|
|
||||||
HWCAP2_BTI
|
HWCAP2_BTI
|
||||||
|
|
||||||
Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001.
|
Functionality implied by ID_AA64PFR0_EL1.BT == 0b0001.
|
||||||
|
|
||||||
HWCAP2_MTE
|
HWCAP2_MTE
|
||||||
|
|
||||||
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
|
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0010, as described
|
||||||
by Documentation/arm64/memory-tagging-extension.rst.
|
by Documentation/arm64/memory-tagging-extension.rst.
|
||||||
|
|
||||||
HWCAP2_ECV
|
HWCAP2_ECV
|
||||||
|
|
||||||
Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001.
|
Functionality implied by ID_AA64MMFR0_EL1.ECV == 0b0001.
|
||||||
|
|
||||||
HWCAP2_AFP
|
HWCAP2_AFP
|
||||||
|
|
||||||
Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001.
|
Functionality implied by ID_AA64MFR1_EL1.AFP == 0b0001.
|
||||||
|
|
||||||
HWCAP2_RPRES
|
HWCAP2_RPRES
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
|
Functionality implied by ID_AA64ISAR2_EL1.RPRES == 0b0001.
|
||||||
|
|
||||||
HWCAP2_MTE3
|
HWCAP2_MTE3
|
||||||
|
|
||||||
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
|
Functionality implied by ID_AA64PFR1_EL1.MTE == 0b0011, as described
|
||||||
by Documentation/arm64/memory-tagging-extension.rst.
|
by Documentation/arm64/memory-tagging-extension.rst.
|
||||||
|
|
||||||
HWCAP2_SME
|
HWCAP2_SME
|
||||||
|
|
||||||
Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
|
Functionality implied by ID_AA64PFR1_EL1.SME == 0b0001, as described
|
||||||
by Documentation/arm64/sme.rst.
|
by Documentation/arm64/sme.rst.
|
||||||
|
|
||||||
HWCAP2_SME_I16I64
|
HWCAP2_SME_I16I64
|
||||||
|
|
||||||
Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
|
Functionality implied by ID_AA64SMFR0_EL1.I16I64 == 0b1111.
|
||||||
|
|
||||||
HWCAP2_SME_F64F64
|
HWCAP2_SME_F64F64
|
||||||
|
|
||||||
Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
|
Functionality implied by ID_AA64SMFR0_EL1.F64F64 == 0b1.
|
||||||
|
|
||||||
HWCAP2_SME_I8I32
|
HWCAP2_SME_I8I32
|
||||||
|
|
||||||
Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
|
Functionality implied by ID_AA64SMFR0_EL1.I8I32 == 0b1111.
|
||||||
|
|
||||||
HWCAP2_SME_F16F32
|
HWCAP2_SME_F16F32
|
||||||
|
|
||||||
Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
|
Functionality implied by ID_AA64SMFR0_EL1.F16F32 == 0b1.
|
||||||
|
|
||||||
HWCAP2_SME_B16F32
|
HWCAP2_SME_B16F32
|
||||||
|
|
||||||
Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
|
Functionality implied by ID_AA64SMFR0_EL1.B16F32 == 0b1.
|
||||||
|
|
||||||
HWCAP2_SME_F32F32
|
HWCAP2_SME_F32F32
|
||||||
|
|
||||||
Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
|
Functionality implied by ID_AA64SMFR0_EL1.F32F32 == 0b1.
|
||||||
|
|
||||||
HWCAP2_SME_FA64
|
HWCAP2_SME_FA64
|
||||||
|
|
||||||
Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
|
Functionality implied by ID_AA64SMFR0_EL1.FA64 == 0b1.
|
||||||
|
|
||||||
HWCAP2_WFXT
|
HWCAP2_WFXT
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
|
Functionality implied by ID_AA64ISAR2_EL1.WFXT == 0b0010.
|
||||||
|
|
||||||
HWCAP2_EBF16
|
HWCAP2_EBF16
|
||||||
|
|
||||||
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
|
Functionality implied by ID_AA64ISAR1_EL1.BF16 == 0b0010.
|
||||||
|
|
||||||
4. Unused AT_HWCAP bits
|
4. Unused AT_HWCAP bits
|
||||||
|
|||||||
@@ -52,6 +52,8 @@ stable kernels.
|
|||||||
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
|
| Allwinner | A64/R18 | UNKNOWN1 | SUN50I_ERRATUM_UNKNOWN1 |
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
|
| ARM | Cortex-A510 | #2457168 | ARM64_ERRATUM_2457168 |
|
||||||
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
|
| ARM | Cortex-A510 | #2064142 | ARM64_ERRATUM_2064142 |
|
||||||
+----------------+-----------------+-----------------+-----------------------------+
|
+----------------+-----------------+-----------------+-----------------------------+
|
||||||
| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 |
|
| ARM | Cortex-A510 | #2038923 | ARM64_ERRATUM_2038923 |
|
||||||
|
|||||||
@@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is:
|
|||||||
|
|
||||||
- RMW operations that have a return value are fully ordered.
|
- RMW operations that have a return value are fully ordered.
|
||||||
|
|
||||||
- RMW operations that are conditional are unordered on FAILURE,
|
- RMW operations that are conditional are fully ordered.
|
||||||
otherwise the above rules apply. In the case of test_and_{}_bit() operations,
|
|
||||||
if the bit in memory is unchanged by the operation then it is deemed to have
|
|
||||||
failed.
|
|
||||||
|
|
||||||
Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and
|
Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics,
|
||||||
clear_bit_unlock() which has RELEASE semantics.
|
clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has
|
||||||
|
ACQUIRE semantics.
|
||||||
|
|
||||||
Since a platform only has a single means of achieving atomic operations
|
Since a platform only has a single means of achieving atomic operations
|
||||||
the same barriers as for atomic_t are used, see atomic_t.txt.
|
the same barriers as for atomic_t are used, see atomic_t.txt.
|
||||||
|
|||||||
@@ -23,3 +23,4 @@ Block
|
|||||||
stat
|
stat
|
||||||
switching-sched
|
switching-sched
|
||||||
writeback_cache_control
|
writeback_cache_control
|
||||||
|
ublk
|
||||||
|
|||||||
@@ -72,6 +72,28 @@ submit_queues=[1..nr_cpus]: Default: 1
|
|||||||
hw_queue_depth=[0..qdepth]: Default: 64
|
hw_queue_depth=[0..qdepth]: Default: 64
|
||||||
The hardware queue depth of the device.
|
The hardware queue depth of the device.
|
||||||
|
|
||||||
|
memory_backed=[0/1]: Default: 0
|
||||||
|
Whether or not to use a memory buffer to respond to IO requests
|
||||||
|
|
||||||
|
= =============================================
|
||||||
|
0 Transfer no data in response to IO requests
|
||||||
|
1 Use a memory buffer to respond to IO requests
|
||||||
|
= =============================================
|
||||||
|
|
||||||
|
discard=[0/1]: Default: 0
|
||||||
|
Support discard operations (requires memory-backed null_blk device).
|
||||||
|
|
||||||
|
= =====================================
|
||||||
|
0 Do not support discard operations
|
||||||
|
1 Enable support for discard operations
|
||||||
|
= =====================================
|
||||||
|
|
||||||
|
cache_size=[Size in MB]: Default: 0
|
||||||
|
Cache size in MB for memory-backed device.
|
||||||
|
|
||||||
|
mbps=[Maximum bandwidth in MB/s]: Default: 0 (no limit)
|
||||||
|
Bandwidth limit for device performance.
|
||||||
|
|
||||||
Multi-queue specific parameters
|
Multi-queue specific parameters
|
||||||
-------------------------------
|
-------------------------------
|
||||||
|
|
||||||
|
|||||||
253
Documentation/block/ublk.rst
Normal file
253
Documentation/block/ublk.rst
Normal file
@@ -0,0 +1,253 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
===========================================
|
||||||
|
Userspace block device driver (ublk driver)
|
||||||
|
===========================================
|
||||||
|
|
||||||
|
Overview
|
||||||
|
========
|
||||||
|
|
||||||
|
ublk is a generic framework for implementing block device logic from userspace.
|
||||||
|
The motivation behind it is that moving virtual block drivers into userspace,
|
||||||
|
such as loop, nbd and similar can be very helpful. It can help to implement
|
||||||
|
new virtual block device such as ublk-qcow2 (there are several attempts of
|
||||||
|
implementing qcow2 driver in kernel).
|
||||||
|
|
||||||
|
Userspace block devices are attractive because:
|
||||||
|
|
||||||
|
- They can be written many programming languages.
|
||||||
|
- They can use libraries that are not available in the kernel.
|
||||||
|
- They can be debugged with tools familiar to application developers.
|
||||||
|
- Crashes do not kernel panic the machine.
|
||||||
|
- Bugs are likely to have a lower security impact than bugs in kernel
|
||||||
|
code.
|
||||||
|
- They can be installed and updated independently of the kernel.
|
||||||
|
- They can be used to simulate block device easily with user specified
|
||||||
|
parameters/setting for test/debug purpose
|
||||||
|
|
||||||
|
ublk block device (``/dev/ublkb*``) is added by ublk driver. Any IO request
|
||||||
|
on the device will be forwarded to ublk userspace program. For convenience,
|
||||||
|
in this document, ``ublk server`` refers to generic ublk userspace
|
||||||
|
program. ``ublksrv`` [#userspace]_ is one of such implementation. It
|
||||||
|
provides ``libublksrv`` [#userspace_lib]_ library for developing specific
|
||||||
|
user block device conveniently, while also generic type block device is
|
||||||
|
included, such as loop and null. Richard W.M. Jones wrote userspace nbd device
|
||||||
|
``nbdublk`` [#userspace_nbdublk]_ based on ``libublksrv`` [#userspace_lib]_.
|
||||||
|
|
||||||
|
After the IO is handled by userspace, the result is committed back to the
|
||||||
|
driver, thus completing the request cycle. This way, any specific IO handling
|
||||||
|
logic is totally done by userspace, such as loop's IO handling, NBD's IO
|
||||||
|
communication, or qcow2's IO mapping.
|
||||||
|
|
||||||
|
``/dev/ublkb*`` is driven by blk-mq request-based driver. Each request is
|
||||||
|
assigned by one queue wide unique tag. ublk server assigns unique tag to each
|
||||||
|
IO too, which is 1:1 mapped with IO of ``/dev/ublkb*``.
|
||||||
|
|
||||||
|
Both the IO request forward and IO handling result committing are done via
|
||||||
|
``io_uring`` passthrough command; that is why ublk is also one io_uring based
|
||||||
|
block driver. It has been observed that using io_uring passthrough command can
|
||||||
|
give better IOPS than block IO; which is why ublk is one of high performance
|
||||||
|
implementation of userspace block device: not only IO request communication is
|
||||||
|
done by io_uring, but also the preferred IO handling in ublk server is io_uring
|
||||||
|
based approach too.
|
||||||
|
|
||||||
|
ublk provides control interface to set/get ublk block device parameters.
|
||||||
|
The interface is extendable and kabi compatible: basically any ublk request
|
||||||
|
queue's parameter or ublk generic feature parameters can be set/get via the
|
||||||
|
interface. Thus, ublk is generic userspace block device framework.
|
||||||
|
For example, it is easy to setup a ublk device with specified block
|
||||||
|
parameters from userspace.
|
||||||
|
|
||||||
|
Using ublk
|
||||||
|
==========
|
||||||
|
|
||||||
|
ublk requires userspace ublk server to handle real block device logic.
|
||||||
|
|
||||||
|
Below is example of using ``ublksrv`` to provide ublk-based loop device.
|
||||||
|
|
||||||
|
- add a device::
|
||||||
|
|
||||||
|
ublk add -t loop -f ublk-loop.img
|
||||||
|
|
||||||
|
- format with xfs, then use it::
|
||||||
|
|
||||||
|
mkfs.xfs /dev/ublkb0
|
||||||
|
mount /dev/ublkb0 /mnt
|
||||||
|
# do anything. all IOs are handled by io_uring
|
||||||
|
...
|
||||||
|
umount /mnt
|
||||||
|
|
||||||
|
- list the devices with their info::
|
||||||
|
|
||||||
|
ublk list
|
||||||
|
|
||||||
|
- delete the device::
|
||||||
|
|
||||||
|
ublk del -a
|
||||||
|
ublk del -n $ublk_dev_id
|
||||||
|
|
||||||
|
See usage details in README of ``ublksrv`` [#userspace_readme]_.
|
||||||
|
|
||||||
|
Design
|
||||||
|
======
|
||||||
|
|
||||||
|
Control plane
|
||||||
|
-------------
|
||||||
|
|
||||||
|
ublk driver provides global misc device node (``/dev/ublk-control``) for
|
||||||
|
managing and controlling ublk devices with help of several control commands:
|
||||||
|
|
||||||
|
- ``UBLK_CMD_ADD_DEV``
|
||||||
|
|
||||||
|
Add a ublk char device (``/dev/ublkc*``) which is talked with ublk server
|
||||||
|
WRT IO command communication. Basic device info is sent together with this
|
||||||
|
command. It sets UAPI structure of ``ublksrv_ctrl_dev_info``,
|
||||||
|
such as ``nr_hw_queues``, ``queue_depth``, and max IO request buffer size,
|
||||||
|
for which the info is negotiated with the driver and sent back to the server.
|
||||||
|
When this command is completed, the basic device info is immutable.
|
||||||
|
|
||||||
|
- ``UBLK_CMD_SET_PARAMS`` / ``UBLK_CMD_GET_PARAMS``
|
||||||
|
|
||||||
|
Set or get parameters of the device, which can be either generic feature
|
||||||
|
related, or request queue limit related, but can't be IO logic specific,
|
||||||
|
because the driver does not handle any IO logic. This command has to be
|
||||||
|
sent before sending ``UBLK_CMD_START_DEV``.
|
||||||
|
|
||||||
|
- ``UBLK_CMD_START_DEV``
|
||||||
|
|
||||||
|
After the server prepares userspace resources (such as creating per-queue
|
||||||
|
pthread & io_uring for handling ublk IO), this command is sent to the
|
||||||
|
driver for allocating & exposing ``/dev/ublkb*``. Parameters set via
|
||||||
|
``UBLK_CMD_SET_PARAMS`` are applied for creating the device.
|
||||||
|
|
||||||
|
- ``UBLK_CMD_STOP_DEV``
|
||||||
|
|
||||||
|
Halt IO on ``/dev/ublkb*`` and remove the device. When this command returns,
|
||||||
|
ublk server will release resources (such as destroying per-queue pthread &
|
||||||
|
io_uring).
|
||||||
|
|
||||||
|
- ``UBLK_CMD_DEL_DEV``
|
||||||
|
|
||||||
|
Remove ``/dev/ublkc*``. When this command returns, the allocated ublk device
|
||||||
|
number can be reused.
|
||||||
|
|
||||||
|
- ``UBLK_CMD_GET_QUEUE_AFFINITY``
|
||||||
|
|
||||||
|
When ``/dev/ublkc`` is added, the driver creates block layer tagset, so
|
||||||
|
that each queue's affinity info is available. The server sends
|
||||||
|
``UBLK_CMD_GET_QUEUE_AFFINITY`` to retrieve queue affinity info. It can
|
||||||
|
set up the per-queue context efficiently, such as bind affine CPUs with IO
|
||||||
|
pthread and try to allocate buffers in IO thread context.
|
||||||
|
|
||||||
|
- ``UBLK_CMD_GET_DEV_INFO``
|
||||||
|
|
||||||
|
For retrieving device info via ``ublksrv_ctrl_dev_info``. It is the server's
|
||||||
|
responsibility to save IO target specific info in userspace.
|
||||||
|
|
||||||
|
Data plane
|
||||||
|
----------
|
||||||
|
|
||||||
|
ublk server needs to create per-queue IO pthread & io_uring for handling IO
|
||||||
|
commands via io_uring passthrough. The per-queue IO pthread
|
||||||
|
focuses on IO handling and shouldn't handle any control & management
|
||||||
|
tasks.
|
||||||
|
|
||||||
|
The's IO is assigned by a unique tag, which is 1:1 mapping with IO
|
||||||
|
request of ``/dev/ublkb*``.
|
||||||
|
|
||||||
|
UAPI structure of ``ublksrv_io_desc`` is defined for describing each IO from
|
||||||
|
the driver. A fixed mmaped area (array) on ``/dev/ublkc*`` is provided for
|
||||||
|
exporting IO info to the server; such as IO offset, length, OP/flags and
|
||||||
|
buffer address. Each ``ublksrv_io_desc`` instance can be indexed via queue id
|
||||||
|
and IO tag directly.
|
||||||
|
|
||||||
|
The following IO commands are communicated via io_uring passthrough command,
|
||||||
|
and each command is only for forwarding the IO and committing the result
|
||||||
|
with specified IO tag in the command data:
|
||||||
|
|
||||||
|
- ``UBLK_IO_FETCH_REQ``
|
||||||
|
|
||||||
|
Sent from the server IO pthread for fetching future incoming IO requests
|
||||||
|
destined to ``/dev/ublkb*``. This command is sent only once from the server
|
||||||
|
IO pthread for ublk driver to setup IO forward environment.
|
||||||
|
|
||||||
|
- ``UBLK_IO_COMMIT_AND_FETCH_REQ``
|
||||||
|
|
||||||
|
When an IO request is destined to ``/dev/ublkb*``, the driver stores
|
||||||
|
the IO's ``ublksrv_io_desc`` to the specified mapped area; then the
|
||||||
|
previous received IO command of this IO tag (either ``UBLK_IO_FETCH_REQ``
|
||||||
|
or ``UBLK_IO_COMMIT_AND_FETCH_REQ)`` is completed, so the server gets
|
||||||
|
the IO notification via io_uring.
|
||||||
|
|
||||||
|
After the server handles the IO, its result is committed back to the
|
||||||
|
driver by sending ``UBLK_IO_COMMIT_AND_FETCH_REQ`` back. Once ublkdrv
|
||||||
|
received this command, it parses the result and complete the request to
|
||||||
|
``/dev/ublkb*``. In the meantime setup environment for fetching future
|
||||||
|
requests with the same IO tag. That is, ``UBLK_IO_COMMIT_AND_FETCH_REQ``
|
||||||
|
is reused for both fetching request and committing back IO result.
|
||||||
|
|
||||||
|
- ``UBLK_IO_NEED_GET_DATA``
|
||||||
|
|
||||||
|
With ``UBLK_F_NEED_GET_DATA`` enabled, the WRITE request will be firstly
|
||||||
|
issued to ublk server without data copy. Then, IO backend of ublk server
|
||||||
|
receives the request and it can allocate data buffer and embed its addr
|
||||||
|
inside this new io command. After the kernel driver gets the command,
|
||||||
|
data copy is done from request pages to this backend's buffer. Finally,
|
||||||
|
backend receives the request again with data to be written and it can
|
||||||
|
truly handle the request.
|
||||||
|
|
||||||
|
``UBLK_IO_NEED_GET_DATA`` adds one additional round-trip and one
|
||||||
|
io_uring_enter() syscall. Any user thinks that it may lower performance
|
||||||
|
should not enable UBLK_F_NEED_GET_DATA. ublk server pre-allocates IO
|
||||||
|
buffer for each IO by default. Any new project should try to use this
|
||||||
|
buffer to communicate with ublk driver. However, existing project may
|
||||||
|
break or not able to consume the new buffer interface; that's why this
|
||||||
|
command is added for backwards compatibility so that existing projects
|
||||||
|
can still consume existing buffers.
|
||||||
|
|
||||||
|
- data copy between ublk server IO buffer and ublk block IO request
|
||||||
|
|
||||||
|
The driver needs to copy the block IO request pages into the server buffer
|
||||||
|
(pages) first for WRITE before notifying the server of the coming IO, so
|
||||||
|
that the server can handle WRITE request.
|
||||||
|
|
||||||
|
When the server handles READ request and sends
|
||||||
|
``UBLK_IO_COMMIT_AND_FETCH_REQ`` to the server, ublkdrv needs to copy
|
||||||
|
the server buffer (pages) read to the IO request pages.
|
||||||
|
|
||||||
|
Future development
|
||||||
|
==================
|
||||||
|
|
||||||
|
Container-aware ublk deivice
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
ublk driver doesn't handle any IO logic. Its function is well defined
|
||||||
|
for now and very limited userspace interfaces are needed, which is also
|
||||||
|
well defined too. It is possible to make ublk devices container-aware block
|
||||||
|
devices in future as Stefan Hajnoczi suggested [#stefan]_, by removing
|
||||||
|
ADMIN privilege.
|
||||||
|
|
||||||
|
Zero copy
|
||||||
|
---------
|
||||||
|
|
||||||
|
Zero copy is a generic requirement for nbd, fuse or similar drivers. A
|
||||||
|
problem [#xiaoguang]_ Xiaoguang mentioned is that pages mapped to userspace
|
||||||
|
can't be remapped any more in kernel with existing mm interfaces. This can
|
||||||
|
occurs when destining direct IO to ``/dev/ublkb*``. Also, he reported that
|
||||||
|
big requests (IO size >= 256 KB) may benefit a lot from zero copy.
|
||||||
|
|
||||||
|
|
||||||
|
References
|
||||||
|
==========
|
||||||
|
|
||||||
|
.. [#userspace] https://github.com/ming1/ubdsrv
|
||||||
|
|
||||||
|
.. [#userspace_lib] https://github.com/ming1/ubdsrv/tree/master/lib
|
||||||
|
|
||||||
|
.. [#userspace_nbdublk] https://gitlab.com/rwmjones/libnbd/-/tree/nbdublk
|
||||||
|
|
||||||
|
.. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README
|
||||||
|
|
||||||
|
.. [#stefan] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
|
||||||
|
|
||||||
|
.. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/
|
||||||
@@ -214,6 +214,12 @@ A: NO. Tracepoints are tied to internal implementation details hence they are
|
|||||||
subject to change and can break with newer kernels. BPF programs need to change
|
subject to change and can break with newer kernels. BPF programs need to change
|
||||||
accordingly when this happens.
|
accordingly when this happens.
|
||||||
|
|
||||||
|
Q: Are places where kprobes can attach part of the stable ABI?
|
||||||
|
--------------------------------------------------------------
|
||||||
|
A: NO. The places to which kprobes can attach are internal implementation
|
||||||
|
details, which means that they are subject to change and can break with
|
||||||
|
newer kernels. BPF programs need to change accordingly when this happens.
|
||||||
|
|
||||||
Q: How much stack space a BPF program uses?
|
Q: How much stack space a BPF program uses?
|
||||||
-------------------------------------------
|
-------------------------------------------
|
||||||
A: Currently all program types are limited to 512 bytes of stack
|
A: Currently all program types are limited to 512 bytes of stack
|
||||||
@@ -273,3 +279,22 @@ cc (congestion-control) implementations. If any of these kernel
|
|||||||
functions has changed, both the in-tree and out-of-tree kernel tcp cc
|
functions has changed, both the in-tree and out-of-tree kernel tcp cc
|
||||||
implementations have to be changed. The same goes for the bpf
|
implementations have to be changed. The same goes for the bpf
|
||||||
programs and they have to be adjusted accordingly.
|
programs and they have to be adjusted accordingly.
|
||||||
|
|
||||||
|
Q: Attaching to arbitrary kernel functions is an ABI?
|
||||||
|
-----------------------------------------------------
|
||||||
|
Q: BPF programs can be attached to many kernel functions. Do these
|
||||||
|
kernel functions become part of the ABI?
|
||||||
|
|
||||||
|
A: NO.
|
||||||
|
|
||||||
|
The kernel function prototypes will change, and BPF programs attaching to
|
||||||
|
them will need to change. The BPF compile-once-run-everywhere (CO-RE)
|
||||||
|
should be used in order to make it easier to adapt your BPF programs to
|
||||||
|
different versions of the kernel.
|
||||||
|
|
||||||
|
Q: Marking a function with BTF_ID makes that function an ABI?
|
||||||
|
-------------------------------------------------------------
|
||||||
|
A: NO.
|
||||||
|
|
||||||
|
The BTF_ID macro does not cause a function to become part of the ABI
|
||||||
|
any more than does the EXPORT_SYMBOL_GPL macro.
|
||||||
|
|||||||
@@ -74,7 +74,7 @@ sequentially and type id is assigned to each recognized type starting from id
|
|||||||
#define BTF_KIND_ARRAY 3 /* Array */
|
#define BTF_KIND_ARRAY 3 /* Array */
|
||||||
#define BTF_KIND_STRUCT 4 /* Struct */
|
#define BTF_KIND_STRUCT 4 /* Struct */
|
||||||
#define BTF_KIND_UNION 5 /* Union */
|
#define BTF_KIND_UNION 5 /* Union */
|
||||||
#define BTF_KIND_ENUM 6 /* Enumeration */
|
#define BTF_KIND_ENUM 6 /* Enumeration up to 32-bit values */
|
||||||
#define BTF_KIND_FWD 7 /* Forward */
|
#define BTF_KIND_FWD 7 /* Forward */
|
||||||
#define BTF_KIND_TYPEDEF 8 /* Typedef */
|
#define BTF_KIND_TYPEDEF 8 /* Typedef */
|
||||||
#define BTF_KIND_VOLATILE 9 /* Volatile */
|
#define BTF_KIND_VOLATILE 9 /* Volatile */
|
||||||
@@ -87,6 +87,7 @@ sequentially and type id is assigned to each recognized type starting from id
|
|||||||
#define BTF_KIND_FLOAT 16 /* Floating point */
|
#define BTF_KIND_FLOAT 16 /* Floating point */
|
||||||
#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
|
#define BTF_KIND_DECL_TAG 17 /* Decl Tag */
|
||||||
#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
|
#define BTF_KIND_TYPE_TAG 18 /* Type Tag */
|
||||||
|
#define BTF_KIND_ENUM64 19 /* Enumeration up to 64-bit values */
|
||||||
|
|
||||||
Note that the type section encodes debug info, not just pure types.
|
Note that the type section encodes debug info, not just pure types.
|
||||||
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
|
``BTF_KIND_FUNC`` is not a type, and it represents a defined subprogram.
|
||||||
@@ -101,10 +102,10 @@ Each type contains the following common data::
|
|||||||
* bits 24-28: kind (e.g. int, ptr, array...etc)
|
* bits 24-28: kind (e.g. int, ptr, array...etc)
|
||||||
* bits 29-30: unused
|
* bits 29-30: unused
|
||||||
* bit 31: kind_flag, currently used by
|
* bit 31: kind_flag, currently used by
|
||||||
* struct, union and fwd
|
* struct, union, fwd, enum and enum64.
|
||||||
*/
|
*/
|
||||||
__u32 info;
|
__u32 info;
|
||||||
/* "size" is used by INT, ENUM, STRUCT and UNION.
|
/* "size" is used by INT, ENUM, STRUCT, UNION and ENUM64.
|
||||||
* "size" tells the size of the type it is describing.
|
* "size" tells the size of the type it is describing.
|
||||||
*
|
*
|
||||||
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
|
* "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT,
|
||||||
@@ -281,10 +282,10 @@ modes exist:
|
|||||||
|
|
||||||
``struct btf_type`` encoding requirement:
|
``struct btf_type`` encoding requirement:
|
||||||
* ``name_off``: 0 or offset to a valid C identifier
|
* ``name_off``: 0 or offset to a valid C identifier
|
||||||
* ``info.kind_flag``: 0
|
* ``info.kind_flag``: 0 for unsigned, 1 for signed
|
||||||
* ``info.kind``: BTF_KIND_ENUM
|
* ``info.kind``: BTF_KIND_ENUM
|
||||||
* ``info.vlen``: number of enum values
|
* ``info.vlen``: number of enum values
|
||||||
* ``size``: 4
|
* ``size``: 1/2/4/8
|
||||||
|
|
||||||
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
|
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum``.::
|
||||||
|
|
||||||
@@ -297,6 +298,10 @@ The ``btf_enum`` encoding:
|
|||||||
* ``name_off``: offset to a valid C identifier
|
* ``name_off``: offset to a valid C identifier
|
||||||
* ``val``: any value
|
* ``val``: any value
|
||||||
|
|
||||||
|
If the original enum value is signed and the size is less than 4,
|
||||||
|
that value will be sign extended into 4 bytes. If the size is 8,
|
||||||
|
the value will be truncated into 4 bytes.
|
||||||
|
|
||||||
2.2.7 BTF_KIND_FWD
|
2.2.7 BTF_KIND_FWD
|
||||||
~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
@@ -364,7 +369,8 @@ No additional type data follow ``btf_type``.
|
|||||||
* ``name_off``: offset to a valid C identifier
|
* ``name_off``: offset to a valid C identifier
|
||||||
* ``info.kind_flag``: 0
|
* ``info.kind_flag``: 0
|
||||||
* ``info.kind``: BTF_KIND_FUNC
|
* ``info.kind``: BTF_KIND_FUNC
|
||||||
* ``info.vlen``: 0
|
* ``info.vlen``: linkage information (BTF_FUNC_STATIC, BTF_FUNC_GLOBAL
|
||||||
|
or BTF_FUNC_EXTERN)
|
||||||
* ``type``: a BTF_KIND_FUNC_PROTO type
|
* ``type``: a BTF_KIND_FUNC_PROTO type
|
||||||
|
|
||||||
No additional type data follow ``btf_type``.
|
No additional type data follow ``btf_type``.
|
||||||
@@ -375,6 +381,9 @@ type. The BTF_KIND_FUNC may in turn be referenced by a func_info in the
|
|||||||
:ref:`BTF_Ext_Section` (ELF) or in the arguments to :ref:`BPF_Prog_Load`
|
:ref:`BTF_Ext_Section` (ELF) or in the arguments to :ref:`BPF_Prog_Load`
|
||||||
(ABI).
|
(ABI).
|
||||||
|
|
||||||
|
Currently, only linkage values of BTF_FUNC_STATIC and BTF_FUNC_GLOBAL are
|
||||||
|
supported in the kernel.
|
||||||
|
|
||||||
2.2.13 BTF_KIND_FUNC_PROTO
|
2.2.13 BTF_KIND_FUNC_PROTO
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
@@ -493,7 +502,7 @@ the attribute is applied to a ``struct``/``union`` member or
|
|||||||
a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
|
a ``func`` argument, and ``btf_decl_tag.component_idx`` should be a
|
||||||
valid index (starting from 0) pointing to a member or an argument.
|
valid index (starting from 0) pointing to a member or an argument.
|
||||||
|
|
||||||
2.2.17 BTF_KIND_TYPE_TAG
|
2.2.18 BTF_KIND_TYPE_TAG
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
``struct btf_type`` encoding requirement:
|
``struct btf_type`` encoding requirement:
|
||||||
@@ -516,6 +525,32 @@ type_tag, then zero or more const/volatile/restrict/typedef
|
|||||||
and finally the base type. The base type is one of
|
and finally the base type. The base type is one of
|
||||||
int, ptr, array, struct, union, enum, func_proto and float types.
|
int, ptr, array, struct, union, enum, func_proto and float types.
|
||||||
|
|
||||||
|
2.2.19 BTF_KIND_ENUM64
|
||||||
|
~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
|
``struct btf_type`` encoding requirement:
|
||||||
|
* ``name_off``: 0 or offset to a valid C identifier
|
||||||
|
* ``info.kind_flag``: 0 for unsigned, 1 for signed
|
||||||
|
* ``info.kind``: BTF_KIND_ENUM64
|
||||||
|
* ``info.vlen``: number of enum values
|
||||||
|
* ``size``: 1/2/4/8
|
||||||
|
|
||||||
|
``btf_type`` is followed by ``info.vlen`` number of ``struct btf_enum64``.::
|
||||||
|
|
||||||
|
struct btf_enum64 {
|
||||||
|
__u32 name_off;
|
||||||
|
__u32 val_lo32;
|
||||||
|
__u32 val_hi32;
|
||||||
|
};
|
||||||
|
|
||||||
|
The ``btf_enum64`` encoding:
|
||||||
|
* ``name_off``: offset to a valid C identifier
|
||||||
|
* ``val_lo32``: lower 32-bit value for a 64-bit value
|
||||||
|
* ``val_hi32``: high 32-bit value for a 64-bit value
|
||||||
|
|
||||||
|
If the original enum value is signed and the size is less than 8,
|
||||||
|
that value will be sign extended into 8 bytes.
|
||||||
|
|
||||||
3. BTF Kernel API
|
3. BTF Kernel API
|
||||||
=================
|
=================
|
||||||
|
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ that goes into great technical depth about the BPF Architecture.
|
|||||||
faq
|
faq
|
||||||
syscall_api
|
syscall_api
|
||||||
helpers
|
helpers
|
||||||
|
kfuncs
|
||||||
programs
|
programs
|
||||||
maps
|
maps
|
||||||
bpf_prog_run
|
bpf_prog_run
|
||||||
|
|||||||
@@ -127,7 +127,7 @@ BPF_XOR | BPF_K | BPF_ALU64 means::
|
|||||||
Byte swap instructions
|
Byte swap instructions
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
The byte swap instructions use an instruction class of ``BFP_ALU`` and a 4-bit
|
The byte swap instructions use an instruction class of ``BPF_ALU`` and a 4-bit
|
||||||
code field of ``BPF_END``.
|
code field of ``BPF_END``.
|
||||||
|
|
||||||
The byte swap instructions operate on the destination register
|
The byte swap instructions operate on the destination register
|
||||||
@@ -351,7 +351,7 @@ These instructions have seven implicit operands:
|
|||||||
* Register R0 is an implicit output which contains the data fetched from
|
* Register R0 is an implicit output which contains the data fetched from
|
||||||
the packet.
|
the packet.
|
||||||
* Registers R1-R5 are scratch registers that are clobbered after a call to
|
* Registers R1-R5 are scratch registers that are clobbered after a call to
|
||||||
``BPF_ABS | BPF_LD`` or ``BPF_IND`` | BPF_LD instructions.
|
``BPF_ABS | BPF_LD`` or ``BPF_IND | BPF_LD`` instructions.
|
||||||
|
|
||||||
These instructions have an implicit program exit condition as well. When an
|
These instructions have an implicit program exit condition as well. When an
|
||||||
eBPF program is trying to access the data beyond the packet boundary, the
|
eBPF program is trying to access the data beyond the packet boundary, the
|
||||||
|
|||||||
170
Documentation/bpf/kfuncs.rst
Normal file
170
Documentation/bpf/kfuncs.rst
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
=============================
|
||||||
|
BPF Kernel Functions (kfuncs)
|
||||||
|
=============================
|
||||||
|
|
||||||
|
1. Introduction
|
||||||
|
===============
|
||||||
|
|
||||||
|
BPF Kernel Functions or more commonly known as kfuncs are functions in the Linux
|
||||||
|
kernel which are exposed for use by BPF programs. Unlike normal BPF helpers,
|
||||||
|
kfuncs do not have a stable interface and can change from one kernel release to
|
||||||
|
another. Hence, BPF programs need to be updated in response to changes in the
|
||||||
|
kernel.
|
||||||
|
|
||||||
|
2. Defining a kfunc
|
||||||
|
===================
|
||||||
|
|
||||||
|
There are two ways to expose a kernel function to BPF programs, either make an
|
||||||
|
existing function in the kernel visible, or add a new wrapper for BPF. In both
|
||||||
|
cases, care must be taken that BPF program can only call such function in a
|
||||||
|
valid context. To enforce this, visibility of a kfunc can be per program type.
|
||||||
|
|
||||||
|
If you are not creating a BPF wrapper for existing kernel function, skip ahead
|
||||||
|
to :ref:`BPF_kfunc_nodef`.
|
||||||
|
|
||||||
|
2.1 Creating a wrapper kfunc
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
When defining a wrapper kfunc, the wrapper function should have extern linkage.
|
||||||
|
This prevents the compiler from optimizing away dead code, as this wrapper kfunc
|
||||||
|
is not invoked anywhere in the kernel itself. It is not necessary to provide a
|
||||||
|
prototype in a header for the wrapper kfunc.
|
||||||
|
|
||||||
|
An example is given below::
|
||||||
|
|
||||||
|
/* Disables missing prototype warnings */
|
||||||
|
__diag_push();
|
||||||
|
__diag_ignore_all("-Wmissing-prototypes",
|
||||||
|
"Global kfuncs as their definitions will be in BTF");
|
||||||
|
|
||||||
|
struct task_struct *bpf_find_get_task_by_vpid(pid_t nr)
|
||||||
|
{
|
||||||
|
return find_get_task_by_vpid(nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
__diag_pop();
|
||||||
|
|
||||||
|
A wrapper kfunc is often needed when we need to annotate parameters of the
|
||||||
|
kfunc. Otherwise one may directly make the kfunc visible to the BPF program by
|
||||||
|
registering it with the BPF subsystem. See :ref:`BPF_kfunc_nodef`.
|
||||||
|
|
||||||
|
2.2 Annotating kfunc parameters
|
||||||
|
-------------------------------
|
||||||
|
|
||||||
|
Similar to BPF helpers, there is sometime need for additional context required
|
||||||
|
by the verifier to make the usage of kernel functions safer and more useful.
|
||||||
|
Hence, we can annotate a parameter by suffixing the name of the argument of the
|
||||||
|
kfunc with a __tag, where tag may be one of the supported annotations.
|
||||||
|
|
||||||
|
2.2.1 __sz Annotation
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
This annotation is used to indicate a memory and size pair in the argument list.
|
||||||
|
An example is given below::
|
||||||
|
|
||||||
|
void bpf_memzero(void *mem, int mem__sz)
|
||||||
|
{
|
||||||
|
...
|
||||||
|
}
|
||||||
|
|
||||||
|
Here, the verifier will treat first argument as a PTR_TO_MEM, and second
|
||||||
|
argument as its size. By default, without __sz annotation, the size of the type
|
||||||
|
of the pointer is used. Without __sz annotation, a kfunc cannot accept a void
|
||||||
|
pointer.
|
||||||
|
|
||||||
|
.. _BPF_kfunc_nodef:
|
||||||
|
|
||||||
|
2.3 Using an existing kernel function
|
||||||
|
-------------------------------------
|
||||||
|
|
||||||
|
When an existing function in the kernel is fit for consumption by BPF programs,
|
||||||
|
it can be directly registered with the BPF subsystem. However, care must still
|
||||||
|
be taken to review the context in which it will be invoked by the BPF program
|
||||||
|
and whether it is safe to do so.
|
||||||
|
|
||||||
|
2.4 Annotating kfuncs
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
In addition to kfuncs' arguments, verifier may need more information about the
|
||||||
|
type of kfunc(s) being registered with the BPF subsystem. To do so, we define
|
||||||
|
flags on a set of kfuncs as follows::
|
||||||
|
|
||||||
|
BTF_SET8_START(bpf_task_set)
|
||||||
|
BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
|
||||||
|
BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
|
||||||
|
BTF_SET8_END(bpf_task_set)
|
||||||
|
|
||||||
|
This set encodes the BTF ID of each kfunc listed above, and encodes the flags
|
||||||
|
along with it. Ofcourse, it is also allowed to specify no flags.
|
||||||
|
|
||||||
|
2.4.1 KF_ACQUIRE flag
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The KF_ACQUIRE flag is used to indicate that the kfunc returns a pointer to a
|
||||||
|
refcounted object. The verifier will then ensure that the pointer to the object
|
||||||
|
is eventually released using a release kfunc, or transferred to a map using a
|
||||||
|
referenced kptr (by invoking bpf_kptr_xchg). If not, the verifier fails the
|
||||||
|
loading of the BPF program until no lingering references remain in all possible
|
||||||
|
explored states of the program.
|
||||||
|
|
||||||
|
2.4.2 KF_RET_NULL flag
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
The KF_RET_NULL flag is used to indicate that the pointer returned by the kfunc
|
||||||
|
may be NULL. Hence, it forces the user to do a NULL check on the pointer
|
||||||
|
returned from the kfunc before making use of it (dereferencing or passing to
|
||||||
|
another helper). This flag is often used in pairing with KF_ACQUIRE flag, but
|
||||||
|
both are orthogonal to each other.
|
||||||
|
|
||||||
|
2.4.3 KF_RELEASE flag
|
||||||
|
---------------------
|
||||||
|
|
||||||
|
The KF_RELEASE flag is used to indicate that the kfunc releases the pointer
|
||||||
|
passed in to it. There can be only one referenced pointer that can be passed in.
|
||||||
|
All copies of the pointer being released are invalidated as a result of invoking
|
||||||
|
kfunc with this flag.
|
||||||
|
|
||||||
|
2.4.4 KF_KPTR_GET flag
|
||||||
|
----------------------
|
||||||
|
|
||||||
|
The KF_KPTR_GET flag is used to indicate that the kfunc takes the first argument
|
||||||
|
as a pointer to kptr, safely increments the refcount of the object it points to,
|
||||||
|
and returns a reference to the user. The rest of the arguments may be normal
|
||||||
|
arguments of a kfunc. The KF_KPTR_GET flag should be used in conjunction with
|
||||||
|
KF_ACQUIRE and KF_RET_NULL flags.
|
||||||
|
|
||||||
|
2.4.5 KF_TRUSTED_ARGS flag
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
The KF_TRUSTED_ARGS flag is used for kfuncs taking pointer arguments. It
|
||||||
|
indicates that the all pointer arguments will always be refcounted, and have
|
||||||
|
their offset set to 0. It can be used to enforce that a pointer to a refcounted
|
||||||
|
object acquired from a kfunc or BPF helper is passed as an argument to this
|
||||||
|
kfunc without any modifications (e.g. pointer arithmetic) such that it is
|
||||||
|
trusted and points to the original object. This flag is often used for kfuncs
|
||||||
|
that operate (change some property, perform some operation) on an object that
|
||||||
|
was obtained using an acquire kfunc. Such kfuncs need an unchanged pointer to
|
||||||
|
ensure the integrity of the operation being performed on the expected object.
|
||||||
|
|
||||||
|
2.5 Registering the kfuncs
|
||||||
|
--------------------------
|
||||||
|
|
||||||
|
Once the kfunc is prepared for use, the final step to making it visible is
|
||||||
|
registering it with the BPF subsystem. Registration is done per BPF program
|
||||||
|
type. An example is shown below::
|
||||||
|
|
||||||
|
BTF_SET8_START(bpf_task_set)
|
||||||
|
BTF_ID_FLAGS(func, bpf_get_task_pid, KF_ACQUIRE | KF_RET_NULL)
|
||||||
|
BTF_ID_FLAGS(func, bpf_put_pid, KF_RELEASE)
|
||||||
|
BTF_SET8_END(bpf_task_set)
|
||||||
|
|
||||||
|
static const struct btf_kfunc_id_set bpf_task_kfunc_set = {
|
||||||
|
.owner = THIS_MODULE,
|
||||||
|
.set = &bpf_task_set,
|
||||||
|
};
|
||||||
|
|
||||||
|
static int init_subsystem(void)
|
||||||
|
{
|
||||||
|
return register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_task_kfunc_set);
|
||||||
|
}
|
||||||
|
late_initcall(init_subsystem);
|
||||||
@@ -9,8 +9,8 @@ described here. It's recommended to follow these conventions whenever a
|
|||||||
new function or type is added to keep libbpf API clean and consistent.
|
new function or type is added to keep libbpf API clean and consistent.
|
||||||
|
|
||||||
All types and functions provided by libbpf API should have one of the
|
All types and functions provided by libbpf API should have one of the
|
||||||
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``,
|
following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``btf_dump_``,
|
||||||
``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``.
|
``ring_buffer_``, ``perf_buffer_``.
|
||||||
|
|
||||||
System call wrappers
|
System call wrappers
|
||||||
--------------------
|
--------------------
|
||||||
@@ -59,15 +59,6 @@ Auxiliary functions and types that don't fit well in any of categories
|
|||||||
described above should have ``libbpf_`` prefix, e.g.
|
described above should have ``libbpf_`` prefix, e.g.
|
||||||
``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
|
``libbpf_get_error`` or ``libbpf_prog_type_by_name``.
|
||||||
|
|
||||||
AF_XDP functions
|
|
||||||
-------------------
|
|
||||||
|
|
||||||
AF_XDP functions should have an ``xsk_`` prefix, e.g.
|
|
||||||
``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists
|
|
||||||
of both low-level ring access functions and high-level configuration
|
|
||||||
functions. These can be mixed and matched. Note that these functions
|
|
||||||
are not reentrant for performance reasons.
|
|
||||||
|
|
||||||
ABI
|
ABI
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
185
Documentation/bpf/map_hash.rst
Normal file
185
Documentation/bpf/map_hash.rst
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
.. SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
.. Copyright (C) 2022 Red Hat, Inc.
|
||||||
|
|
||||||
|
===============================================
|
||||||
|
BPF_MAP_TYPE_HASH, with PERCPU and LRU Variants
|
||||||
|
===============================================
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
- ``BPF_MAP_TYPE_HASH`` was introduced in kernel version 3.19
|
||||||
|
- ``BPF_MAP_TYPE_PERCPU_HASH`` was introduced in version 4.6
|
||||||
|
- Both ``BPF_MAP_TYPE_LRU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
|
||||||
|
were introduced in version 4.10
|
||||||
|
|
||||||
|
``BPF_MAP_TYPE_HASH`` and ``BPF_MAP_TYPE_PERCPU_HASH`` provide general
|
||||||
|
purpose hash map storage. Both the key and the value can be structs,
|
||||||
|
allowing for composite keys and values.
|
||||||
|
|
||||||
|
The kernel is responsible for allocating and freeing key/value pairs, up
|
||||||
|
to the max_entries limit that you specify. Hash maps use pre-allocation
|
||||||
|
of hash table elements by default. The ``BPF_F_NO_PREALLOC`` flag can be
|
||||||
|
used to disable pre-allocation when it is too memory expensive.
|
||||||
|
|
||||||
|
``BPF_MAP_TYPE_PERCPU_HASH`` provides a separate value slot per
|
||||||
|
CPU. The per-cpu values are stored internally in an array.
|
||||||
|
|
||||||
|
The ``BPF_MAP_TYPE_LRU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
|
||||||
|
variants add LRU semantics to their respective hash tables. An LRU hash
|
||||||
|
will automatically evict the least recently used entries when the hash
|
||||||
|
table reaches capacity. An LRU hash maintains an internal LRU list that
|
||||||
|
is used to select elements for eviction. This internal LRU list is
|
||||||
|
shared across CPUs but it is possible to request a per CPU LRU list with
|
||||||
|
the ``BPF_F_NO_COMMON_LRU`` flag when calling ``bpf_map_create``.
|
||||||
|
|
||||||
|
Usage
|
||||||
|
=====
|
||||||
|
|
||||||
|
.. c:function::
|
||||||
|
long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
|
||||||
|
|
||||||
|
Hash entries can be added or updated using the ``bpf_map_update_elem()``
|
||||||
|
helper. This helper replaces existing elements atomically. The ``flags``
|
||||||
|
parameter can be used to control the update behaviour:
|
||||||
|
|
||||||
|
- ``BPF_ANY`` will create a new element or update an existing element
|
||||||
|
- ``BPF_NOEXIST`` will create a new element only if one did not already
|
||||||
|
exist
|
||||||
|
- ``BPF_EXIST`` will update an existing element
|
||||||
|
|
||||||
|
``bpf_map_update_elem()`` returns 0 on success, or negative error in
|
||||||
|
case of failure.
|
||||||
|
|
||||||
|
.. c:function::
|
||||||
|
void *bpf_map_lookup_elem(struct bpf_map *map, const void *key)
|
||||||
|
|
||||||
|
Hash entries can be retrieved using the ``bpf_map_lookup_elem()``
|
||||||
|
helper. This helper returns a pointer to the value associated with
|
||||||
|
``key``, or ``NULL`` if no entry was found.
|
||||||
|
|
||||||
|
.. c:function::
|
||||||
|
long bpf_map_delete_elem(struct bpf_map *map, const void *key)
|
||||||
|
|
||||||
|
Hash entries can be deleted using the ``bpf_map_delete_elem()``
|
||||||
|
helper. This helper will return 0 on success, or negative error in case
|
||||||
|
of failure.
|
||||||
|
|
||||||
|
Per CPU Hashes
|
||||||
|
--------------
|
||||||
|
|
||||||
|
For ``BPF_MAP_TYPE_PERCPU_HASH`` and ``BPF_MAP_TYPE_LRU_PERCPU_HASH``
|
||||||
|
the ``bpf_map_update_elem()`` and ``bpf_map_lookup_elem()`` helpers
|
||||||
|
automatically access the hash slot for the current CPU.
|
||||||
|
|
||||||
|
.. c:function::
|
||||||
|
void *bpf_map_lookup_percpu_elem(struct bpf_map *map, const void *key, u32 cpu)
|
||||||
|
|
||||||
|
The ``bpf_map_lookup_percpu_elem()`` helper can be used to lookup the
|
||||||
|
value in the hash slot for a specific CPU. Returns value associated with
|
||||||
|
``key`` on ``cpu`` , or ``NULL`` if no entry was found or ``cpu`` is
|
||||||
|
invalid.
|
||||||
|
|
||||||
|
Concurrency
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Values stored in ``BPF_MAP_TYPE_HASH`` can be accessed concurrently by
|
||||||
|
programs running on different CPUs. Since Kernel version 5.1, the BPF
|
||||||
|
infrastructure provides ``struct bpf_spin_lock`` to synchronise access.
|
||||||
|
See ``tools/testing/selftests/bpf/progs/test_spin_lock.c``.
|
||||||
|
|
||||||
|
Userspace
|
||||||
|
---------
|
||||||
|
|
||||||
|
.. c:function::
|
||||||
|
int bpf_map_get_next_key(int fd, const void *cur_key, void *next_key)
|
||||||
|
|
||||||
|
In userspace, it is possible to iterate through the keys of a hash using
|
||||||
|
libbpf's ``bpf_map_get_next_key()`` function. The first key can be fetched by
|
||||||
|
calling ``bpf_map_get_next_key()`` with ``cur_key`` set to
|
||||||
|
``NULL``. Subsequent calls will fetch the next key that follows the
|
||||||
|
current key. ``bpf_map_get_next_key()`` returns 0 on success, -ENOENT if
|
||||||
|
cur_key is the last key in the hash, or negative error in case of
|
||||||
|
failure.
|
||||||
|
|
||||||
|
Note that if ``cur_key`` gets deleted then ``bpf_map_get_next_key()``
|
||||||
|
will instead return the *first* key in the hash table which is
|
||||||
|
undesirable. It is recommended to use batched lookup if there is going
|
||||||
|
to be key deletion intermixed with ``bpf_map_get_next_key()``.
|
||||||
|
|
||||||
|
Examples
|
||||||
|
========
|
||||||
|
|
||||||
|
Please see the ``tools/testing/selftests/bpf`` directory for functional
|
||||||
|
examples. The code snippets below demonstrates API usage.
|
||||||
|
|
||||||
|
This example shows how to declare an LRU Hash with a struct key and a
|
||||||
|
struct value.
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
#include <linux/bpf.h>
|
||||||
|
#include <bpf/bpf_helpers.h>
|
||||||
|
|
||||||
|
struct key {
|
||||||
|
__u32 srcip;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct value {
|
||||||
|
__u64 packets;
|
||||||
|
__u64 bytes;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct {
|
||||||
|
__uint(type, BPF_MAP_TYPE_LRU_HASH);
|
||||||
|
__uint(max_entries, 32);
|
||||||
|
__type(key, struct key);
|
||||||
|
__type(value, struct value);
|
||||||
|
} packet_stats SEC(".maps");
|
||||||
|
|
||||||
|
This example shows how to create or update hash values using atomic
|
||||||
|
instructions:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
static void update_stats(__u32 srcip, int bytes)
|
||||||
|
{
|
||||||
|
struct key key = {
|
||||||
|
.srcip = srcip,
|
||||||
|
};
|
||||||
|
struct value *value = bpf_map_lookup_elem(&packet_stats, &key);
|
||||||
|
|
||||||
|
if (value) {
|
||||||
|
__sync_fetch_and_add(&value->packets, 1);
|
||||||
|
__sync_fetch_and_add(&value->bytes, bytes);
|
||||||
|
} else {
|
||||||
|
struct value newval = { 1, bytes };
|
||||||
|
|
||||||
|
bpf_map_update_elem(&packet_stats, &key, &newval, BPF_NOEXIST);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Userspace walking the map elements from the map declared above:
|
||||||
|
|
||||||
|
.. code-block:: c
|
||||||
|
|
||||||
|
#include <bpf/libbpf.h>
|
||||||
|
#include <bpf/bpf.h>
|
||||||
|
|
||||||
|
static void walk_hash_elements(int map_fd)
|
||||||
|
{
|
||||||
|
struct key *cur_key = NULL;
|
||||||
|
struct key next_key;
|
||||||
|
struct value value;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
err = bpf_map_get_next_key(map_fd, cur_key, &next_key);
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
|
||||||
|
bpf_map_lookup_elem(map_fd, &next_key, &value);
|
||||||
|
|
||||||
|
// Use key and value here
|
||||||
|
|
||||||
|
cur_key = &next_key;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -86,6 +86,7 @@ if major >= 3:
|
|||||||
"__used",
|
"__used",
|
||||||
"__weak",
|
"__weak",
|
||||||
"noinline",
|
"noinline",
|
||||||
|
"__fix_address",
|
||||||
|
|
||||||
# include/linux/memblock.h:
|
# include/linux/memblock.h:
|
||||||
"__init_memblock",
|
"__init_memblock",
|
||||||
|
|||||||
@@ -1,220 +0,0 @@
|
|||||||
==========================================================
|
|
||||||
How to access I/O mapped memory from within device drivers
|
|
||||||
==========================================================
|
|
||||||
|
|
||||||
:Author: Linus
|
|
||||||
|
|
||||||
.. warning::
|
|
||||||
|
|
||||||
The virt_to_bus() and bus_to_virt() functions have been
|
|
||||||
superseded by the functionality provided by the PCI DMA interface
|
|
||||||
(see Documentation/core-api/dma-api-howto.rst). They continue
|
|
||||||
to be documented below for historical purposes, but new code
|
|
||||||
must not use them. --davidm 00/12/12
|
|
||||||
|
|
||||||
::
|
|
||||||
|
|
||||||
[ This is a mail message in response to a query on IO mapping, thus the
|
|
||||||
strange format for a "document" ]
|
|
||||||
|
|
||||||
The AHA-1542 is a bus-master device, and your patch makes the driver give the
|
|
||||||
controller the physical address of the buffers, which is correct on x86
|
|
||||||
(because all bus master devices see the physical memory mappings directly).
|
|
||||||
|
|
||||||
However, on many setups, there are actually **three** different ways of looking
|
|
||||||
at memory addresses, and in this case we actually want the third, the
|
|
||||||
so-called "bus address".
|
|
||||||
|
|
||||||
Essentially, the three ways of addressing memory are (this is "real memory",
|
|
||||||
that is, normal RAM--see later about other details):
|
|
||||||
|
|
||||||
- CPU untranslated. This is the "physical" address. Physical address
|
|
||||||
0 is what the CPU sees when it drives zeroes on the memory bus.
|
|
||||||
|
|
||||||
- CPU translated address. This is the "virtual" address, and is
|
|
||||||
completely internal to the CPU itself with the CPU doing the appropriate
|
|
||||||
translations into "CPU untranslated".
|
|
||||||
|
|
||||||
- bus address. This is the address of memory as seen by OTHER devices,
|
|
||||||
not the CPU. Now, in theory there could be many different bus
|
|
||||||
addresses, with each device seeing memory in some device-specific way, but
|
|
||||||
happily most hardware designers aren't actually actively trying to make
|
|
||||||
things any more complex than necessary, so you can assume that all
|
|
||||||
external hardware sees the memory the same way.
|
|
||||||
|
|
||||||
Now, on normal PCs the bus address is exactly the same as the physical
|
|
||||||
address, and things are very simple indeed. However, they are that simple
|
|
||||||
because the memory and the devices share the same address space, and that is
|
|
||||||
not generally necessarily true on other PCI/ISA setups.
|
|
||||||
|
|
||||||
Now, just as an example, on the PReP (PowerPC Reference Platform), the
|
|
||||||
CPU sees a memory map something like this (this is from memory)::
|
|
||||||
|
|
||||||
0-2 GB "real memory"
|
|
||||||
2 GB-3 GB "system IO" (inb/out and similar accesses on x86)
|
|
||||||
3 GB-4 GB "IO memory" (shared memory over the IO bus)
|
|
||||||
|
|
||||||
Now, that looks simple enough. However, when you look at the same thing from
|
|
||||||
the viewpoint of the devices, you have the reverse, and the physical memory
|
|
||||||
address 0 actually shows up as address 2 GB for any IO master.
|
|
||||||
|
|
||||||
So when the CPU wants any bus master to write to physical memory 0, it
|
|
||||||
has to give the master address 0x80000000 as the memory address.
|
|
||||||
|
|
||||||
So, for example, depending on how the kernel is actually mapped on the
|
|
||||||
PPC, you can end up with a setup like this::
|
|
||||||
|
|
||||||
physical address: 0
|
|
||||||
virtual address: 0xC0000000
|
|
||||||
bus address: 0x80000000
|
|
||||||
|
|
||||||
where all the addresses actually point to the same thing. It's just seen
|
|
||||||
through different translations..
|
|
||||||
|
|
||||||
Similarly, on the Alpha, the normal translation is::
|
|
||||||
|
|
||||||
physical address: 0
|
|
||||||
virtual address: 0xfffffc0000000000
|
|
||||||
bus address: 0x40000000
|
|
||||||
|
|
||||||
(but there are also Alphas where the physical address and the bus address
|
|
||||||
are the same).
|
|
||||||
|
|
||||||
Anyway, the way to look up all these translations, you do::
|
|
||||||
|
|
||||||
#include <asm/io.h>
|
|
||||||
|
|
||||||
phys_addr = virt_to_phys(virt_addr);
|
|
||||||
virt_addr = phys_to_virt(phys_addr);
|
|
||||||
bus_addr = virt_to_bus(virt_addr);
|
|
||||||
virt_addr = bus_to_virt(bus_addr);
|
|
||||||
|
|
||||||
Now, when do you need these?
|
|
||||||
|
|
||||||
You want the **virtual** address when you are actually going to access that
|
|
||||||
pointer from the kernel. So you can have something like this::
|
|
||||||
|
|
||||||
/*
|
|
||||||
* this is the hardware "mailbox" we use to communicate with
|
|
||||||
* the controller. The controller sees this directly.
|
|
||||||
*/
|
|
||||||
struct mailbox {
|
|
||||||
__u32 status;
|
|
||||||
__u32 bufstart;
|
|
||||||
__u32 buflen;
|
|
||||||
..
|
|
||||||
} mbox;
|
|
||||||
|
|
||||||
unsigned char * retbuffer;
|
|
||||||
|
|
||||||
/* get the address from the controller */
|
|
||||||
retbuffer = bus_to_virt(mbox.bufstart);
|
|
||||||
switch (retbuffer[0]) {
|
|
||||||
case STATUS_OK:
|
|
||||||
...
|
|
||||||
|
|
||||||
on the other hand, you want the bus address when you have a buffer that
|
|
||||||
you want to give to the controller::
|
|
||||||
|
|
||||||
/* ask the controller to read the sense status into "sense_buffer" */
|
|
||||||
mbox.bufstart = virt_to_bus(&sense_buffer);
|
|
||||||
mbox.buflen = sizeof(sense_buffer);
|
|
||||||
mbox.status = 0;
|
|
||||||
notify_controller(&mbox);
|
|
||||||
|
|
||||||
And you generally **never** want to use the physical address, because you can't
|
|
||||||
use that from the CPU (the CPU only uses translated virtual addresses), and
|
|
||||||
you can't use it from the bus master.
|
|
||||||
|
|
||||||
So why do we care about the physical address at all? We do need the physical
|
|
||||||
address in some cases, it's just not very often in normal code. The physical
|
|
||||||
address is needed if you use memory mappings, for example, because the
|
|
||||||
"remap_pfn_range()" mm function wants the physical address of the memory to
|
|
||||||
be remapped as measured in units of pages, a.k.a. the pfn (the memory
|
|
||||||
management layer doesn't know about devices outside the CPU, so it
|
|
||||||
shouldn't need to know about "bus addresses" etc).
|
|
||||||
|
|
||||||
.. note::
|
|
||||||
|
|
||||||
The above is only one part of the whole equation. The above
|
|
||||||
only talks about "real memory", that is, CPU memory (RAM).
|
|
||||||
|
|
||||||
There is a completely different type of memory too, and that's the "shared
|
|
||||||
memory" on the PCI or ISA bus. That's generally not RAM (although in the case
|
|
||||||
of a video graphics card it can be normal DRAM that is just used for a frame
|
|
||||||
buffer), but can be things like a packet buffer in a network card etc.
|
|
||||||
|
|
||||||
This memory is called "PCI memory" or "shared memory" or "IO memory" or
|
|
||||||
whatever, and there is only one way to access it: the readb/writeb and
|
|
||||||
related functions. You should never take the address of such memory, because
|
|
||||||
there is really nothing you can do with such an address: it's not
|
|
||||||
conceptually in the same memory space as "real memory" at all, so you cannot
|
|
||||||
just dereference a pointer. (Sadly, on x86 it **is** in the same memory space,
|
|
||||||
so on x86 it actually works to just deference a pointer, but it's not
|
|
||||||
portable).
|
|
||||||
|
|
||||||
For such memory, you can do things like:
|
|
||||||
|
|
||||||
- reading::
|
|
||||||
|
|
||||||
/*
|
|
||||||
* read first 32 bits from ISA memory at 0xC0000, aka
|
|
||||||
* C000:0000 in DOS terms
|
|
||||||
*/
|
|
||||||
unsigned int signature = isa_readl(0xC0000);
|
|
||||||
|
|
||||||
- remapping and writing::
|
|
||||||
|
|
||||||
/*
|
|
||||||
* remap framebuffer PCI memory area at 0xFC000000,
|
|
||||||
* size 1MB, so that we can access it: We can directly
|
|
||||||
* access only the 640k-1MB area, so anything else
|
|
||||||
* has to be remapped.
|
|
||||||
*/
|
|
||||||
void __iomem *baseptr = ioremap(0xFC000000, 1024*1024);
|
|
||||||
|
|
||||||
/* write a 'A' to the offset 10 of the area */
|
|
||||||
writeb('A',baseptr+10);
|
|
||||||
|
|
||||||
/* unmap when we unload the driver */
|
|
||||||
iounmap(baseptr);
|
|
||||||
|
|
||||||
- copying and clearing::
|
|
||||||
|
|
||||||
/* get the 6-byte Ethernet address at ISA address E000:0040 */
|
|
||||||
memcpy_fromio(kernel_buffer, 0xE0040, 6);
|
|
||||||
/* write a packet to the driver */
|
|
||||||
memcpy_toio(0xE1000, skb->data, skb->len);
|
|
||||||
/* clear the frame buffer */
|
|
||||||
memset_io(0xA0000, 0, 0x10000);
|
|
||||||
|
|
||||||
OK, that just about covers the basics of accessing IO portably. Questions?
|
|
||||||
Comments? You may think that all the above is overly complex, but one day you
|
|
||||||
might find yourself with a 500 MHz Alpha in front of you, and then you'll be
|
|
||||||
happy that your driver works ;)
|
|
||||||
|
|
||||||
Note that kernel versions 2.0.x (and earlier) mistakenly called the
|
|
||||||
ioremap() function "vremap()". ioremap() is the proper name, but I
|
|
||||||
didn't think straight when I wrote it originally. People who have to
|
|
||||||
support both can do something like::
|
|
||||||
|
|
||||||
/* support old naming silliness */
|
|
||||||
#if LINUX_VERSION_CODE < 0x020100
|
|
||||||
#define ioremap vremap
|
|
||||||
#define iounmap vfree
|
|
||||||
#endif
|
|
||||||
|
|
||||||
at the top of their source files, and then they can use the right names
|
|
||||||
even on 2.0.x systems.
|
|
||||||
|
|
||||||
And the above sounds worse than it really is. Most real drivers really
|
|
||||||
don't do all that complex things (or rather: the complexity is not so
|
|
||||||
much in the actual IO accesses as in error handling and timeouts etc).
|
|
||||||
It's generally not hard to fix drivers, and in many cases the code
|
|
||||||
actually looks better afterwards::
|
|
||||||
|
|
||||||
unsigned long signature = *(unsigned int *) 0xC0000;
|
|
||||||
vs
|
|
||||||
unsigned long signature = readl(0xC0000);
|
|
||||||
|
|
||||||
I think the second version actually is more readable, no?
|
|
||||||
@@ -707,20 +707,6 @@ to use the dma_sync_*() interfaces::
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Drivers converted fully to this interface should not use virt_to_bus() any
|
|
||||||
longer, nor should they use bus_to_virt(). Some drivers have to be changed a
|
|
||||||
little bit, because there is no longer an equivalent to bus_to_virt() in the
|
|
||||||
dynamic DMA mapping scheme - you have to always store the DMA addresses
|
|
||||||
returned by the dma_alloc_coherent(), dma_pool_alloc(), and dma_map_single()
|
|
||||||
calls (dma_map_sg() stores them in the scatterlist itself if the platform
|
|
||||||
supports dynamic DMA mapping in hardware) in your driver structures and/or
|
|
||||||
in the card registers.
|
|
||||||
|
|
||||||
All drivers should be using these interfaces with no exceptions. It
|
|
||||||
is planned to completely remove virt_to_bus() and bus_to_virt() as
|
|
||||||
they are entirely deprecated. Some ports already do not provide these
|
|
||||||
as it is impossible to correctly support them.
|
|
||||||
|
|
||||||
Handling Errors
|
Handling Errors
|
||||||
===============
|
===============
|
||||||
|
|
||||||
|
|||||||
@@ -204,6 +204,20 @@ Returns the maximum size of a mapping for the device. The size parameter
|
|||||||
of the mapping functions like dma_map_single(), dma_map_page() and
|
of the mapping functions like dma_map_single(), dma_map_page() and
|
||||||
others should not be larger than the returned value.
|
others should not be larger than the returned value.
|
||||||
|
|
||||||
|
::
|
||||||
|
|
||||||
|
size_t
|
||||||
|
dma_opt_mapping_size(struct device *dev);
|
||||||
|
|
||||||
|
Returns the maximum optimal size of a mapping for the device.
|
||||||
|
|
||||||
|
Mapping larger buffers may take much longer in certain scenarios. In
|
||||||
|
addition, for high-rate short-lived streaming mappings, the upfront time
|
||||||
|
spent on the mapping may account for an appreciable part of the total
|
||||||
|
request lifetime. As such, if splitting larger requests incurs no
|
||||||
|
significant performance penalty, then device drivers are advised to
|
||||||
|
limit total DMA streaming mappings length to the returned value.
|
||||||
|
|
||||||
::
|
::
|
||||||
|
|
||||||
bool
|
bool
|
||||||
|
|||||||
@@ -17,6 +17,9 @@ solution to the problem to avoid everybody inventing their own. The IDR
|
|||||||
provides the ability to map an ID to a pointer, while the IDA provides
|
provides the ability to map an ID to a pointer, while the IDA provides
|
||||||
only ID allocation, and as a result is much more memory-efficient.
|
only ID allocation, and as a result is much more memory-efficient.
|
||||||
|
|
||||||
|
The IDR interface is deprecated; please use the :doc:`XArray <xarray>`
|
||||||
|
instead.
|
||||||
|
|
||||||
IDR usage
|
IDR usage
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
|||||||
@@ -41,7 +41,6 @@ Library functionality that is used throughout the kernel.
|
|||||||
rbtree
|
rbtree
|
||||||
generic-radix-tree
|
generic-radix-tree
|
||||||
packing
|
packing
|
||||||
bus-virt-phys-mapping
|
|
||||||
this_cpu_ops
|
this_cpu_ops
|
||||||
timekeeping
|
timekeeping
|
||||||
errseq
|
errseq
|
||||||
@@ -87,7 +86,7 @@ Memory management
|
|||||||
=================
|
=================
|
||||||
|
|
||||||
How to allocate and use memory in the kernel. Note that there is a lot
|
How to allocate and use memory in the kernel. Note that there is a lot
|
||||||
more memory-management documentation in Documentation/vm/index.rst.
|
more memory-management documentation in Documentation/mm/index.rst.
|
||||||
|
|
||||||
.. toctree::
|
.. toctree::
|
||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|||||||
@@ -22,16 +22,16 @@ Memory Allocation Controls
|
|||||||
.. kernel-doc:: include/linux/gfp.h
|
.. kernel-doc:: include/linux/gfp.h
|
||||||
:internal:
|
:internal:
|
||||||
|
|
||||||
.. kernel-doc:: include/linux/gfp.h
|
.. kernel-doc:: include/linux/gfp_types.h
|
||||||
:doc: Page mobility and placement hints
|
:doc: Page mobility and placement hints
|
||||||
|
|
||||||
.. kernel-doc:: include/linux/gfp.h
|
.. kernel-doc:: include/linux/gfp_types.h
|
||||||
:doc: Watermark modifiers
|
:doc: Watermark modifiers
|
||||||
|
|
||||||
.. kernel-doc:: include/linux/gfp.h
|
.. kernel-doc:: include/linux/gfp_types.h
|
||||||
:doc: Reclaim modifiers
|
:doc: Reclaim modifiers
|
||||||
|
|
||||||
.. kernel-doc:: include/linux/gfp.h
|
.. kernel-doc:: include/linux/gfp_types.h
|
||||||
:doc: Useful GFP flag combinations
|
:doc: Useful GFP flag combinations
|
||||||
|
|
||||||
The Slab Cache
|
The Slab Cache
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ The wiki documentation always refers to the linux-next version of the script.
|
|||||||
|
|
||||||
For Semantic Patch Language(SmPL) grammar documentation refer to:
|
For Semantic Patch Language(SmPL) grammar documentation refer to:
|
||||||
|
|
||||||
http://coccinelle.lip6.fr/documentation.php
|
https://coccinelle.gitlabpages.inria.fr/website/docs/main_grammar.html
|
||||||
|
|
||||||
Using Coccinelle on the Linux kernel
|
Using Coccinelle on the Linux kernel
|
||||||
------------------------------------
|
------------------------------------
|
||||||
|
|||||||
@@ -174,7 +174,6 @@ mapping:
|
|||||||
|
|
||||||
- ``kmemleak_alloc_phys``
|
- ``kmemleak_alloc_phys``
|
||||||
- ``kmemleak_free_part_phys``
|
- ``kmemleak_free_part_phys``
|
||||||
- ``kmemleak_not_leak_phys``
|
|
||||||
- ``kmemleak_ignore_phys``
|
- ``kmemleak_ignore_phys``
|
||||||
|
|
||||||
Dealing with false positives/negatives
|
Dealing with false positives/negatives
|
||||||
|
|||||||
@@ -208,6 +208,14 @@ In general, the rules for selftests are
|
|||||||
Contributing new tests (details)
|
Contributing new tests (details)
|
||||||
================================
|
================================
|
||||||
|
|
||||||
|
* In your Makefile, use facilities from lib.mk by including it instead of
|
||||||
|
reinventing the wheel. Specify flags and binaries generation flags on
|
||||||
|
need basis before including lib.mk. ::
|
||||||
|
|
||||||
|
CFLAGS = $(KHDR_INCLUDES)
|
||||||
|
TEST_GEN_PROGS := close_range_test
|
||||||
|
include ../lib.mk
|
||||||
|
|
||||||
* Use TEST_GEN_XXX if such binaries or files are generated during
|
* Use TEST_GEN_XXX if such binaries or files are generated during
|
||||||
compiling.
|
compiling.
|
||||||
|
|
||||||
@@ -230,13 +238,30 @@ Contributing new tests (details)
|
|||||||
* First use the headers inside the kernel source and/or git repo, and then the
|
* First use the headers inside the kernel source and/or git repo, and then the
|
||||||
system headers. Headers for the kernel release as opposed to headers
|
system headers. Headers for the kernel release as opposed to headers
|
||||||
installed by the distro on the system should be the primary focus to be able
|
installed by the distro on the system should be the primary focus to be able
|
||||||
to find regressions.
|
to find regressions. Use KHDR_INCLUDES in Makefile to include headers from
|
||||||
|
the kernel source.
|
||||||
|
|
||||||
* If a test needs specific kernel config options enabled, add a config file in
|
* If a test needs specific kernel config options enabled, add a config file in
|
||||||
the test directory to enable them.
|
the test directory to enable them.
|
||||||
|
|
||||||
e.g: tools/testing/selftests/android/config
|
e.g: tools/testing/selftests/android/config
|
||||||
|
|
||||||
|
* Create a .gitignore file inside test directory and add all generated objects
|
||||||
|
in it.
|
||||||
|
|
||||||
|
* Add new test name in TARGETS in selftests/Makefile::
|
||||||
|
|
||||||
|
TARGETS += android
|
||||||
|
|
||||||
|
* All changes should pass::
|
||||||
|
|
||||||
|
kselftest-{all,install,clean,gen_tar}
|
||||||
|
kselftest-{all,install,clean,gen_tar} O=abo_path
|
||||||
|
kselftest-{all,install,clean,gen_tar} O=rel_path
|
||||||
|
make -C tools/testing/selftests {all,install,clean,gen_tar}
|
||||||
|
make -C tools/testing/selftests {all,install,clean,gen_tar} O=abs_path
|
||||||
|
make -C tools/testing/selftests {all,install,clean,gen_tar} O=rel_path
|
||||||
|
|
||||||
Test Module
|
Test Module
|
||||||
===========
|
===========
|
||||||
|
|
||||||
@@ -250,6 +275,14 @@ assist writing kernel modules that are for use with kselftest:
|
|||||||
- ``tools/testing/selftests/kselftest_module.h``
|
- ``tools/testing/selftests/kselftest_module.h``
|
||||||
- ``tools/testing/selftests/kselftest/module.sh``
|
- ``tools/testing/selftests/kselftest/module.sh``
|
||||||
|
|
||||||
|
Note that test modules should taint the kernel with TAINT_TEST. This will
|
||||||
|
happen automatically for modules which are in the ``tools/testing/``
|
||||||
|
directory, or for modules which use the ``kselftest_module.h`` header above.
|
||||||
|
Otherwise, you'll need to add ``MODULE_INFO(test, "Y")`` to your module
|
||||||
|
source. selftests which do not load modules typically should not taint the
|
||||||
|
kernel, but in cases where a non-test module is loaded, TEST_TAINT can be
|
||||||
|
applied from userspace by writing to ``/proc/sys/kernel/tainted``.
|
||||||
|
|
||||||
How to use
|
How to use
|
||||||
----------
|
----------
|
||||||
|
|
||||||
@@ -308,6 +341,7 @@ A bare bones test module might look like this:
|
|||||||
KSTM_MODULE_LOADERS(test_foo);
|
KSTM_MODULE_LOADERS(test_foo);
|
||||||
MODULE_AUTHOR("John Developer <jd@fooman.org>");
|
MODULE_AUTHOR("John Developer <jd@fooman.org>");
|
||||||
MODULE_LICENSE("GPL");
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_INFO(test, "Y");
|
||||||
|
|
||||||
Example test script
|
Example test script
|
||||||
-------------------
|
-------------------
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user