mirror of
https://github.com/torvalds/linux.git
synced 2024-11-13 23:51:39 +00:00
Merge branch 'for-next/perf' into for-next/core
* for-next/perf: (21 commits) arm_pmu: Drop redundant armpmu->map_event() in armpmu_event_init() drivers/perf: hisi: Add TLP filter support Documentation: perf: Indent filter options list of hisi-pcie-pmu docs: perf: Fix PMU instance name of hisi-pcie-pmu drivers/perf: hisi: Fix some event id for hisi-pcie-pmu arm64/perf: Replace PMU version number '0' with ID_AA64DFR0_EL1_PMUVer_NI perf/amlogic: Remove unused header inclusions of <linux/version.h> perf/amlogic: Fix build error for x86_64 allmodconfig dt-binding: perf: Add Amlogic DDR PMU docs/perf: Add documentation for the Amlogic G12 DDR PMU perf/amlogic: Add support for Amlogic meson G12 SoC DDR PMU driver MAINTAINERS: Update HiSilicon PMU maintainers perf: arm_cspmu: Fix module cyclic dependency perf: arm_cspmu: Fix build failure on x86_64 perf: arm_cspmu: Fix modular builds due to missing MODULE_LICENSE()s perf: arm_cspmu: Add support for NVIDIA SCF and MCF attribute perf: arm_cspmu: Add support for ARM CoreSight PMU driver perf/smmuv3: Fix hotplug callback leak in arm_smmu_pmu_init() perf/arm_dmc620: Fix hotplug callback leak in dmc620_pmu_init() drivers: perf: marvell_cn10k: Fix hotplug callback leak in tad_pmu_init() ...
This commit is contained in:
commit
10162e78ea
@ -15,10 +15,10 @@ HiSilicon PCIe PMU driver
|
||||
The PCIe PMU driver registers a perf PMU with the name of its sicl-id and PCIe
|
||||
Core id.::
|
||||
|
||||
/sys/bus/event_source/hisi_pcie<sicl>_<core>
|
||||
/sys/bus/event_source/hisi_pcie<sicl>_core<core>
|
||||
|
||||
PMU driver provides description of available events and filter options in sysfs,
|
||||
see /sys/bus/event_source/devices/hisi_pcie<sicl>_<core>.
|
||||
see /sys/bus/event_source/devices/hisi_pcie<sicl>_core<core>.
|
||||
|
||||
The "format" directory describes all formats of the config (events) and config1
|
||||
(filter options) fields of the perf_event_attr structure. The "events" directory
|
||||
@ -33,13 +33,13 @@ monitored by PMU.
|
||||
Example usage of perf::
|
||||
|
||||
$# perf list
|
||||
hisi_pcie0_0/rx_mwr_latency/ [kernel PMU event]
|
||||
hisi_pcie0_0/rx_mwr_cnt/ [kernel PMU event]
|
||||
hisi_pcie0_core0/rx_mwr_latency/ [kernel PMU event]
|
||||
hisi_pcie0_core0/rx_mwr_cnt/ [kernel PMU event]
|
||||
------------------------------------------
|
||||
|
||||
$# perf stat -e hisi_pcie0_0/rx_mwr_latency/
|
||||
$# perf stat -e hisi_pcie0_0/rx_mwr_cnt/
|
||||
$# perf stat -g -e hisi_pcie0_0/rx_mwr_latency/ -e hisi_pcie0_0/rx_mwr_cnt/
|
||||
$# perf stat -e hisi_pcie0_core0/rx_mwr_latency/
|
||||
$# perf stat -e hisi_pcie0_core0/rx_mwr_cnt/
|
||||
$# perf stat -g -e hisi_pcie0_core0/rx_mwr_latency/ -e hisi_pcie0_core0/rx_mwr_cnt/
|
||||
|
||||
The current driver does not support sampling. So "perf record" is unsupported.
|
||||
Also attach to a task is unsupported for PCIe PMU.
|
||||
@ -48,59 +48,83 @@ Filter options
|
||||
--------------
|
||||
|
||||
1. Target filter
|
||||
PMU could only monitor the performance of traffic downstream target Root Ports
|
||||
or downstream target Endpoint. PCIe PMU driver support "port" and "bdf"
|
||||
interfaces for users, and these two interfaces aren't supported at the same
|
||||
time.
|
||||
|
||||
-port
|
||||
"port" filter can be used in all PCIe PMU events, target Root Port can be
|
||||
selected by configuring the 16-bits-bitmap "port". Multi ports can be selected
|
||||
for AP-layer-events, and only one port can be selected for TL/DL-layer-events.
|
||||
PMU could only monitor the performance of traffic downstream target Root
|
||||
Ports or downstream target Endpoint. PCIe PMU driver support "port" and
|
||||
"bdf" interfaces for users, and these two interfaces aren't supported at the
|
||||
same time.
|
||||
|
||||
For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of bitmap
|
||||
should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4 lanes),
|
||||
bit8 is set, port=0x100; if these two Root Ports are both monitored, port=0x101.
|
||||
- port
|
||||
|
||||
Example usage of perf::
|
||||
"port" filter can be used in all PCIe PMU events, target Root Port can be
|
||||
selected by configuring the 16-bits-bitmap "port". Multi ports can be
|
||||
selected for AP-layer-events, and only one port can be selected for
|
||||
TL/DL-layer-events.
|
||||
|
||||
$# perf stat -e hisi_pcie0_0/rx_mwr_latency,port=0x1/ sleep 5
|
||||
For example, if target Root Port is 0000:00:00.0 (x8 lanes), bit0 of
|
||||
bitmap should be set, port=0x1; if target Root Port is 0000:00:04.0 (x4
|
||||
lanes), bit8 is set, port=0x100; if these two Root Ports are both
|
||||
monitored, port=0x101.
|
||||
|
||||
-bdf
|
||||
Example usage of perf::
|
||||
|
||||
"bdf" filter can only be used in bandwidth events, target Endpoint is selected
|
||||
by configuring BDF to "bdf". Counter only counts the bandwidth of message
|
||||
requested by target Endpoint.
|
||||
$# perf stat -e hisi_pcie0_core0/rx_mwr_latency,port=0x1/ sleep 5
|
||||
|
||||
For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
|
||||
- bdf
|
||||
|
||||
Example usage of perf::
|
||||
"bdf" filter can only be used in bandwidth events, target Endpoint is
|
||||
selected by configuring BDF to "bdf". Counter only counts the bandwidth of
|
||||
message requested by target Endpoint.
|
||||
|
||||
$# perf stat -e hisi_pcie0_0/rx_mrd_flux,bdf=0x3900/ sleep 5
|
||||
For example, "bdf=0x3900" means BDF of target Endpoint is 0000:39:00.0.
|
||||
|
||||
Example usage of perf::
|
||||
|
||||
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,bdf=0x3900/ sleep 5
|
||||
|
||||
2. Trigger filter
|
||||
Event statistics start when the first time TLP length is greater/smaller
|
||||
than trigger condition. You can set the trigger condition by writing "trig_len",
|
||||
and set the trigger mode by writing "trig_mode". This filter can only be used
|
||||
in bandwidth events.
|
||||
|
||||
For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
|
||||
means statistics start when TLP length > trigger condition, "trig_mode=1"
|
||||
means start when TLP length < condition.
|
||||
Event statistics start when the first time TLP length is greater/smaller
|
||||
than trigger condition. You can set the trigger condition by writing
|
||||
"trig_len", and set the trigger mode by writing "trig_mode". This filter can
|
||||
only be used in bandwidth events.
|
||||
|
||||
Example usage of perf::
|
||||
For example, "trig_len=4" means trigger condition is 2^4 DW, "trig_mode=0"
|
||||
means statistics start when TLP length > trigger condition, "trig_mode=1"
|
||||
means start when TLP length < condition.
|
||||
|
||||
$# perf stat -e hisi_pcie0_0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
|
||||
Example usage of perf::
|
||||
|
||||
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,trig_len=0x4,trig_mode=1/ sleep 5
|
||||
|
||||
3. Threshold filter
|
||||
Counter counts when TLP length within the specified range. You can set the
|
||||
threshold by writing "thr_len", and set the threshold mode by writing
|
||||
"thr_mode". This filter can only be used in bandwidth events.
|
||||
|
||||
For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
|
||||
counter counts when TLP length >= threshold, and "thr_mode=1" means counts
|
||||
when TLP length < threshold.
|
||||
Counter counts when TLP length within the specified range. You can set the
|
||||
threshold by writing "thr_len", and set the threshold mode by writing
|
||||
"thr_mode". This filter can only be used in bandwidth events.
|
||||
|
||||
Example usage of perf::
|
||||
For example, "thr_len=4" means threshold is 2^4 DW, "thr_mode=0" means
|
||||
counter counts when TLP length >= threshold, and "thr_mode=1" means counts
|
||||
when TLP length < threshold.
|
||||
|
||||
$# perf stat -e hisi_pcie0_0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
|
||||
Example usage of perf::
|
||||
|
||||
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,thr_len=0x4,thr_mode=1/ sleep 5
|
||||
|
||||
4. TLP Length filter
|
||||
|
||||
When counting bandwidth, the data can be composed of certain parts of TLP
|
||||
packets. You can specify it through "len_mode":
|
||||
|
||||
- 2'b00: Reserved (Do not use this since the behaviour is undefined)
|
||||
- 2'b01: Bandwidth of TLP payloads
|
||||
- 2'b10: Bandwidth of TLP headers
|
||||
- 2'b11: Bandwidth of both TLP payloads and headers
|
||||
|
||||
For example, "len_mode=2" means only counting the bandwidth of TLP headers
|
||||
and "len_mode=3" means the final bandwidth data is composed of both TLP
|
||||
headers and payloads. Default value if not specified is 2'b11.
|
||||
|
||||
Example usage of perf::
|
||||
|
||||
$# perf stat -e hisi_pcie0_core0/rx_mrd_flux,len_mode=0x1/ sleep 5
|
||||
|
@ -19,3 +19,5 @@ Performance monitor support
|
||||
arm_dsu_pmu
|
||||
thunderx2-pmu
|
||||
alibaba_pmu
|
||||
nvidia-pmu
|
||||
meson-ddr-pmu
|
||||
|
70
Documentation/admin-guide/perf/meson-ddr-pmu.rst
Normal file
70
Documentation/admin-guide/perf/meson-ddr-pmu.rst
Normal file
@ -0,0 +1,70 @@
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================================================
|
||||
Amlogic SoC DDR Bandwidth Performance Monitoring Unit (PMU)
|
||||
===========================================================
|
||||
|
||||
The Amlogic Meson G12 SoC contains a bandwidth monitor inside DRAM controller.
|
||||
The monitor includes 4 channels. Each channel can count the request accessing
|
||||
DRAM. The channel can count up to 3 AXI port simultaneously. It can be helpful
|
||||
to show if the performance bottleneck is on DDR bandwidth.
|
||||
|
||||
Currently, this driver supports the following 5 perf events:
|
||||
|
||||
+ meson_ddr_bw/total_rw_bytes/
|
||||
+ meson_ddr_bw/chan_1_rw_bytes/
|
||||
+ meson_ddr_bw/chan_2_rw_bytes/
|
||||
+ meson_ddr_bw/chan_3_rw_bytes/
|
||||
+ meson_ddr_bw/chan_4_rw_bytes/
|
||||
|
||||
meson_ddr_bw/chan_{1,2,3,4}_rw_bytes/ events are channel-specific events.
|
||||
Each channel support filtering, which can let the channel to monitor
|
||||
individual IP module in SoC.
|
||||
|
||||
Below are DDR access request event filter keywords:
|
||||
|
||||
+ arm - from CPU
|
||||
+ vpu_read1 - from OSD + VPP read
|
||||
+ gpu - from 3D GPU
|
||||
+ pcie - from PCIe controller
|
||||
+ hdcp - from HDCP controller
|
||||
+ hevc_front - from HEVC codec front end
|
||||
+ usb3_0 - from USB3.0 controller
|
||||
+ hevc_back - from HEVC codec back end
|
||||
+ h265enc - from HEVC encoder
|
||||
+ vpu_read2 - from DI read
|
||||
+ vpu_write1 - from VDIN write
|
||||
+ vpu_write2 - from di write
|
||||
+ vdec - from legacy codec video decoder
|
||||
+ hcodec - from H264 encoder
|
||||
+ ge2d - from ge2d
|
||||
+ spicc1 - from SPI controller 1
|
||||
+ usb0 - from USB2.0 controller 0
|
||||
+ dma - from system DMA controller 1
|
||||
+ arb0 - from arb0
|
||||
+ sd_emmc_b - from SD eMMC b controller
|
||||
+ usb1 - from USB2.0 controller 1
|
||||
+ audio - from Audio module
|
||||
+ sd_emmc_c - from SD eMMC c controller
|
||||
+ spicc2 - from SPI controller 2
|
||||
+ ethernet - from Ethernet controller
|
||||
|
||||
|
||||
Examples:
|
||||
|
||||
+ Show the total DDR bandwidth per seconds:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
perf stat -a -e meson_ddr_bw/total_rw_bytes/ -I 1000 sleep 10
|
||||
|
||||
|
||||
+ Show individual DDR bandwidth from CPU and GPU respectively, as well as
|
||||
sum of them:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
perf stat -a -e meson_ddr_bw/chan_1_rw_bytes,arm=1/ -I 1000 sleep 10
|
||||
perf stat -a -e meson_ddr_bw/chan_2_rw_bytes,gpu=1/ -I 1000 sleep 10
|
||||
perf stat -a -e meson_ddr_bw/chan_3_rw_bytes,arm=1,gpu=1/ -I 1000 sleep 10
|
||||
|
299
Documentation/admin-guide/perf/nvidia-pmu.rst
Normal file
299
Documentation/admin-guide/perf/nvidia-pmu.rst
Normal file
@ -0,0 +1,299 @@
|
||||
=========================================================
|
||||
NVIDIA Tegra SoC Uncore Performance Monitoring Unit (PMU)
|
||||
=========================================================
|
||||
|
||||
The NVIDIA Tegra SoC includes various system PMUs to measure key performance
|
||||
metrics like memory bandwidth, latency, and utilization:
|
||||
|
||||
* Scalable Coherency Fabric (SCF)
|
||||
* NVLink-C2C0
|
||||
* NVLink-C2C1
|
||||
* CNVLink
|
||||
* PCIE
|
||||
|
||||
PMU Driver
|
||||
----------
|
||||
|
||||
The PMUs in this document are based on ARM CoreSight PMU Architecture as
|
||||
described in document: ARM IHI 0091. Since this is a standard architecture, the
|
||||
PMUs are managed by a common driver "arm-cs-arch-pmu". This driver describes
|
||||
the available events and configuration of each PMU in sysfs. Please see the
|
||||
sections below to get the sysfs path of each PMU. Like other uncore PMU drivers,
|
||||
the driver provides "cpumask" sysfs attribute to show the CPU id used to handle
|
||||
the PMU event. There is also "associated_cpus" sysfs attribute, which contains a
|
||||
list of CPUs associated with the PMU instance.
|
||||
|
||||
.. _SCF_PMU_Section:
|
||||
|
||||
SCF PMU
|
||||
-------
|
||||
|
||||
The SCF PMU monitors system level cache events, CPU traffic, and
|
||||
strongly-ordered (SO) PCIE write traffic to local/remote memory. Please see
|
||||
:ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about the PMU
|
||||
traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_scf_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 in socket 0::
|
||||
|
||||
perf stat -a -e nvidia_scf_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x0 in socket 1::
|
||||
|
||||
perf stat -a -e nvidia_scf_pmu_1/event=0x0/
|
||||
|
||||
NVLink-C2C0 PMU
|
||||
--------------------
|
||||
|
||||
The NVLink-C2C0 PMU monitors incoming traffic from a GPU/CPU connected with
|
||||
NVLink-C2C (Chip-2-Chip) interconnect. The type of traffic captured by this PMU
|
||||
varies dependent on the chip configuration:
|
||||
|
||||
* NVIDIA Grace Hopper Superchip: Hopper GPU is connected with Grace SoC.
|
||||
|
||||
In this config, the PMU captures GPU ATS translated or EGM traffic from the GPU.
|
||||
|
||||
* NVIDIA Grace CPU Superchip: two Grace CPU SoCs are connected.
|
||||
|
||||
In this config, the PMU captures read and relaxed ordered (RO) writes from
|
||||
PCIE device of the remote SoC.
|
||||
|
||||
Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
|
||||
the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c0_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 from the GPU/CPU connected with socket 0::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x0 from the GPU/CPU connected with socket 1::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_1/event=0x0/
|
||||
|
||||
* Count event id 0x0 from the GPU/CPU connected with socket 2::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_2/event=0x0/
|
||||
|
||||
* Count event id 0x0 from the GPU/CPU connected with socket 3::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c0_pmu_3/event=0x0/
|
||||
|
||||
NVLink-C2C1 PMU
|
||||
-------------------
|
||||
|
||||
The NVLink-C2C1 PMU monitors incoming traffic from a GPU connected with
|
||||
NVLink-C2C (Chip-2-Chip) interconnect. This PMU captures untranslated GPU
|
||||
traffic, in contrast with NvLink-C2C0 PMU that captures ATS translated traffic.
|
||||
Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section` for more info about
|
||||
the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_nvlink_c2c1_pmu_<socket-id>.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 0::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_0/event=0x0/
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 1::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_1/event=0x0/
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 2::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_2/event=0x0/
|
||||
|
||||
* Count event id 0x0 from the GPU connected with socket 3::
|
||||
|
||||
perf stat -a -e nvidia_nvlink_c2c1_pmu_3/event=0x0/
|
||||
|
||||
CNVLink PMU
|
||||
---------------
|
||||
|
||||
The CNVLink PMU monitors traffic from GPU and PCIE device on remote sockets
|
||||
to local memory. For PCIE traffic, this PMU captures read and relaxed ordered
|
||||
(RO) write traffic. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
|
||||
for more info about the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>.
|
||||
|
||||
Each SoC socket can be connected to one or more sockets via CNVLink. The user can
|
||||
use "rem_socket" bitmap parameter to select the remote socket(s) to monitor.
|
||||
Each bit represents the socket number, e.g. "rem_socket=0xE" corresponds to
|
||||
socket 1 to 3.
|
||||
/sys/bus/event_sources/devices/nvidia_cnvlink_pmu_<socket-id>/format/rem_socket
|
||||
shows the valid bits that can be set in the "rem_socket" parameter.
|
||||
|
||||
The PMU can not distinguish the remote traffic initiator, therefore it does not
|
||||
provide filter to select the traffic source to monitor. It reports combined
|
||||
traffic from remote GPU and PCIE devices.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 for the traffic from remote socket 1, 2, and 3 to socket 0::
|
||||
|
||||
perf stat -a -e nvidia_cnvlink_pmu_0/event=0x0,rem_socket=0xE/
|
||||
|
||||
* Count event id 0x0 for the traffic from remote socket 0, 2, and 3 to socket 1::
|
||||
|
||||
perf stat -a -e nvidia_cnvlink_pmu_1/event=0x0,rem_socket=0xD/
|
||||
|
||||
* Count event id 0x0 for the traffic from remote socket 0, 1, and 3 to socket 2::
|
||||
|
||||
perf stat -a -e nvidia_cnvlink_pmu_2/event=0x0,rem_socket=0xB/
|
||||
|
||||
* Count event id 0x0 for the traffic from remote socket 0, 1, and 2 to socket 3::
|
||||
|
||||
perf stat -a -e nvidia_cnvlink_pmu_3/event=0x0,rem_socket=0x7/
|
||||
|
||||
|
||||
PCIE PMU
|
||||
------------
|
||||
|
||||
The PCIE PMU monitors all read/write traffic from PCIE root ports to
|
||||
local/remote memory. Please see :ref:`NVIDIA_Uncore_PMU_Traffic_Coverage_Section`
|
||||
for more info about the PMU traffic coverage.
|
||||
|
||||
The events and configuration options of this PMU device are described in sysfs,
|
||||
see /sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>.
|
||||
|
||||
Each SoC socket can support multiple root ports. The user can use
|
||||
"root_port" bitmap parameter to select the port(s) to monitor, i.e.
|
||||
"root_port=0xF" corresponds to root port 0 to 3.
|
||||
/sys/bus/event_sources/devices/nvidia_pcie_pmu_<socket-id>/format/root_port
|
||||
shows the valid bits that can be set in the "root_port" parameter.
|
||||
|
||||
Example usage:
|
||||
|
||||
* Count event id 0x0 from root port 0 and 1 of socket 0::
|
||||
|
||||
perf stat -a -e nvidia_pcie_pmu_0/event=0x0,root_port=0x3/
|
||||
|
||||
* Count event id 0x0 from root port 0 and 1 of socket 1::
|
||||
|
||||
perf stat -a -e nvidia_pcie_pmu_1/event=0x0,root_port=0x3/
|
||||
|
||||
.. _NVIDIA_Uncore_PMU_Traffic_Coverage_Section:
|
||||
|
||||
Traffic Coverage
|
||||
----------------
|
||||
|
||||
The PMU traffic coverage may vary dependent on the chip configuration:
|
||||
|
||||
* **NVIDIA Grace Hopper Superchip**: Hopper GPU is connected with Grace SoC.
|
||||
|
||||
Example configuration with two Grace SoCs::
|
||||
|
||||
********************************* *********************************
|
||||
* SOCKET-A * * SOCKET-B *
|
||||
* * * *
|
||||
* :::::::: * * :::::::: *
|
||||
* : PCIE : * * : PCIE : *
|
||||
* :::::::: * * :::::::: *
|
||||
* | * * | *
|
||||
* | * * | *
|
||||
* ::::::: ::::::::: * * ::::::::: ::::::: *
|
||||
* : : : : * * : : : : *
|
||||
* : GPU :<--NVLink-->: Grace :<---CNVLink--->: Grace :<--NVLink-->: GPU : *
|
||||
* : : C2C : SoC : * * : SoC : C2C : : *
|
||||
* ::::::: ::::::::: * * ::::::::: ::::::: *
|
||||
* | | * * | | *
|
||||
* | | * * | | *
|
||||
* &&&&&&&& &&&&&&&& * * &&&&&&&& &&&&&&&& *
|
||||
* & GMEM & & CMEM & * * & CMEM & & GMEM & *
|
||||
* &&&&&&&& &&&&&&&& * * &&&&&&&& &&&&&&&& *
|
||||
* * * *
|
||||
********************************* *********************************
|
||||
|
||||
GMEM = GPU Memory (e.g. HBM)
|
||||
CMEM = CPU Memory (e.g. LPDDR5X)
|
||||
|
||||
|
|
||||
| Following table contains traffic coverage of Grace SoC PMU in socket-A:
|
||||
|
||||
::
|
||||
|
||||
+--------------+-------+-----------+-----------+-----+----------+----------+
|
||||
| | Source |
|
||||
+ +-------+-----------+-----------+-----+----------+----------+
|
||||
| Destination | |GPU ATS |GPU Not-ATS| | Socket-B | Socket-B |
|
||||
| |PCI R/W|Translated,|Translated | CPU | CPU/PCIE1| GPU/PCIE2|
|
||||
| | |EGM | | | | |
|
||||
+==============+=======+===========+===========+=====+==========+==========+
|
||||
| Local | PCIE |NVLink-C2C0|NVLink-C2C1| SCF | SCF PMU | CNVLink |
|
||||
| SYSRAM/CMEM | PMU |PMU |PMU | PMU | | PMU |
|
||||
+--------------+-------+-----------+-----------+-----+----------+----------+
|
||||
| Local GMEM | PCIE | N/A |NVLink-C2C1| SCF | SCF PMU | CNVLink |
|
||||
| | PMU | |PMU | PMU | | PMU |
|
||||
+--------------+-------+-----------+-----------+-----+----------+----------+
|
||||
| Remote | PCIE |NVLink-C2C0|NVLink-C2C1| SCF | | |
|
||||
| SYSRAM/CMEM | PMU |PMU |PMU | PMU | N/A | N/A |
|
||||
| over CNVLink | | | | | | |
|
||||
+--------------+-------+-----------+-----------+-----+----------+----------+
|
||||
| Remote GMEM | PCIE |NVLink-C2C0|NVLink-C2C1| SCF | | |
|
||||
| over CNVLink | PMU |PMU |PMU | PMU | N/A | N/A |
|
||||
+--------------+-------+-----------+-----------+-----+----------+----------+
|
||||
|
||||
PCIE1 traffic represents strongly ordered (SO) writes.
|
||||
PCIE2 traffic represents reads and relaxed ordered (RO) writes.
|
||||
|
||||
* **NVIDIA Grace CPU Superchip**: two Grace CPU SoCs are connected.
|
||||
|
||||
Example configuration with two Grace SoCs::
|
||||
|
||||
******************* *******************
|
||||
* SOCKET-A * * SOCKET-B *
|
||||
* * * *
|
||||
* :::::::: * * :::::::: *
|
||||
* : PCIE : * * : PCIE : *
|
||||
* :::::::: * * :::::::: *
|
||||
* | * * | *
|
||||
* | * * | *
|
||||
* ::::::::: * * ::::::::: *
|
||||
* : : * * : : *
|
||||
* : Grace :<--------NVLink------->: Grace : *
|
||||
* : SoC : * C2C * : SoC : *
|
||||
* ::::::::: * * ::::::::: *
|
||||
* | * * | *
|
||||
* | * * | *
|
||||
* &&&&&&&& * * &&&&&&&& *
|
||||
* & CMEM & * * & CMEM & *
|
||||
* &&&&&&&& * * &&&&&&&& *
|
||||
* * * *
|
||||
******************* *******************
|
||||
|
||||
GMEM = GPU Memory (e.g. HBM)
|
||||
CMEM = CPU Memory (e.g. LPDDR5X)
|
||||
|
||||
|
|
||||
| Following table contains traffic coverage of Grace SoC PMU in socket-A:
|
||||
|
||||
::
|
||||
|
||||
+-----------------+-----------+---------+----------+-------------+
|
||||
| | Source |
|
||||
+ +-----------+---------+----------+-------------+
|
||||
| Destination | | | Socket-B | Socket-B |
|
||||
| | PCI R/W | CPU | CPU/PCIE1| PCIE2 |
|
||||
| | | | | |
|
||||
+=================+===========+=========+==========+=============+
|
||||
| Local | PCIE PMU | SCF PMU | SCF PMU | NVLink-C2C0 |
|
||||
| SYSRAM/CMEM | | | | PMU |
|
||||
+-----------------+-----------+---------+----------+-------------+
|
||||
| Remote | | | | |
|
||||
| SYSRAM/CMEM | PCIE PMU | SCF PMU | N/A | N/A |
|
||||
| over NVLink-C2C | | | | |
|
||||
+-----------------+-----------+---------+----------+-------------+
|
||||
|
||||
PCIE1 traffic represents strongly ordered (SO) writes.
|
||||
PCIE2 traffic represents reads and relaxed ordered (RO) writes.
|
@ -0,0 +1,54 @@
|
||||
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
|
||||
%YAML 1.2
|
||||
---
|
||||
$id: http://devicetree.org/schemas/perf/amlogic,g12-ddr-pmu.yaml#
|
||||
$schema: http://devicetree.org/meta-schemas/core.yaml#
|
||||
|
||||
title: Amlogic G12 DDR performance monitor
|
||||
|
||||
maintainers:
|
||||
- Jiucheng Xu <jiucheng.xu@amlogic.com>
|
||||
|
||||
description: |
|
||||
Amlogic G12 series SoC integrate DDR bandwidth monitor.
|
||||
A timer is inside and can generate interrupt when timeout.
|
||||
The bandwidth is counted in the timer ISR. Different platform
|
||||
has different subset of event format attribute.
|
||||
|
||||
properties:
|
||||
compatible:
|
||||
enum:
|
||||
- amlogic,g12a-ddr-pmu
|
||||
- amlogic,g12b-ddr-pmu
|
||||
- amlogic,sm1-ddr-pmu
|
||||
|
||||
reg:
|
||||
items:
|
||||
- description: DMC bandwidth register space.
|
||||
- description: DMC PLL register space.
|
||||
|
||||
interrupts:
|
||||
items:
|
||||
- description: The IRQ of the inside timer timeout.
|
||||
|
||||
required:
|
||||
- compatible
|
||||
- reg
|
||||
- interrupts
|
||||
|
||||
additionalProperties: false
|
||||
|
||||
examples:
|
||||
- |
|
||||
#include <dt-bindings/interrupt-controller/arm-gic.h>
|
||||
pmu {
|
||||
#address-cells=<2>;
|
||||
#size-cells=<2>;
|
||||
|
||||
pmu@ff638000 {
|
||||
compatible = "amlogic,g12a-ddr-pmu";
|
||||
reg = <0x0 0xff638000 0x0 0x100>,
|
||||
<0x0 0xff638c00 0x0 0x100>;
|
||||
interrupts = <GIC_SPI 52 IRQ_TYPE_EDGE_RISING>;
|
||||
};
|
||||
};
|
12
MAINTAINERS
12
MAINTAINERS
@ -1093,6 +1093,16 @@ S: Maintained
|
||||
F: Documentation/hid/amd-sfh*
|
||||
F: drivers/hid/amd-sfh-hid/
|
||||
|
||||
AMLOGIC DDR PMU DRIVER
|
||||
M: Jiucheng Xu <jiucheng.xu@amlogic.com>
|
||||
L: linux-amlogic@lists.infradead.org
|
||||
S: Supported
|
||||
W: http://www.amlogic.com
|
||||
F: Documentation/admin-guide/perf/meson-ddr-pmu.rst
|
||||
F: Documentation/devicetree/bindings/perf/amlogic,g12-ddr-pmu.yaml
|
||||
F: drivers/perf/amlogic/
|
||||
F: include/soc/amlogic/
|
||||
|
||||
AMPHION VPU CODEC V4L2 DRIVER
|
||||
M: Ming Qian <ming.qian@nxp.com>
|
||||
M: Shijie Qin <shijie.qin@nxp.com>
|
||||
@ -9248,7 +9258,7 @@ F: drivers/misc/hisi_hikey_usb.c
|
||||
|
||||
HISILICON PMU DRIVER
|
||||
M: Shaokun Zhang <zhangshaokun@hisilicon.com>
|
||||
M: Qi Liu <liuqi115@huawei.com>
|
||||
M: Jonathan Cameron <jonathan.cameron@huawei.com>
|
||||
S: Supported
|
||||
W: http://www.hisilicon.com
|
||||
F: Documentation/admin-guide/perf/hisi-pcie-pmu.rst
|
||||
|
@ -1146,7 +1146,8 @@ static void __armv8pmu_probe_pmu(void *info)
|
||||
dfr0 = read_sysreg(id_aa64dfr0_el1);
|
||||
pmuver = cpuid_feature_extract_unsigned_field(dfr0,
|
||||
ID_AA64DFR0_EL1_PMUVer_SHIFT);
|
||||
if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF || pmuver == 0)
|
||||
if (pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF ||
|
||||
pmuver == ID_AA64DFR0_EL1_PMUVer_NI)
|
||||
return;
|
||||
|
||||
cpu_pmu->pmuver = pmuver;
|
||||
|
@ -199,4 +199,8 @@ config MARVELL_CN10K_DDR_PMU
|
||||
Enable perf support for Marvell DDR Performance monitoring
|
||||
event on CN10K platform.
|
||||
|
||||
source "drivers/perf/arm_cspmu/Kconfig"
|
||||
|
||||
source "drivers/perf/amlogic/Kconfig"
|
||||
|
||||
endmenu
|
||||
|
@ -21,3 +21,5 @@ obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
|
||||
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
|
||||
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o
|
||||
obj-$(CONFIG_ALIBABA_UNCORE_DRW_PMU) += alibaba_uncore_drw_pmu.o
|
||||
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu/
|
||||
obj-$(CONFIG_MESON_DDR_PMU) += amlogic/
|
||||
|
10
drivers/perf/amlogic/Kconfig
Normal file
10
drivers/perf/amlogic/Kconfig
Normal file
@ -0,0 +1,10 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
config MESON_DDR_PMU
|
||||
tristate "Amlogic DDR Bandwidth Performance Monitor"
|
||||
depends on ARCH_MESON || COMPILE_TEST
|
||||
help
|
||||
Provides support for the DDR performance monitor
|
||||
in Amlogic SoCs, which can give information about
|
||||
memory throughput and other related events. It
|
||||
supports multiple channels to monitor the memory
|
||||
bandwidth simultaneously.
|
5
drivers/perf/amlogic/Makefile
Normal file
5
drivers/perf/amlogic/Makefile
Normal file
@ -0,0 +1,5 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
|
||||
obj-$(CONFIG_MESON_DDR_PMU) += meson_ddr_pmu_g12.o
|
||||
|
||||
meson_ddr_pmu_g12-y := meson_ddr_pmu_core.o meson_g12_ddr_pmu.o
|
561
drivers/perf/amlogic/meson_ddr_pmu_core.c
Normal file
561
drivers/perf/amlogic/meson_ddr_pmu_core.c
Normal file
@ -0,0 +1,561 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2022 Amlogic, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/irqreturn.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/of_device.h>
|
||||
#include <linux/of_irq.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/sysfs.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <soc/amlogic/meson_ddr_pmu.h>
|
||||
|
||||
struct ddr_pmu {
|
||||
struct pmu pmu;
|
||||
struct dmc_info info;
|
||||
struct dmc_counter counters; /* save counters from hw */
|
||||
bool pmu_enabled;
|
||||
struct device *dev;
|
||||
char *name;
|
||||
struct hlist_node node;
|
||||
enum cpuhp_state cpuhp_state;
|
||||
int cpu; /* for cpu hotplug */
|
||||
};
|
||||
|
||||
#define DDR_PERF_DEV_NAME "meson_ddr_bw"
|
||||
#define MAX_AXI_PORTS_OF_CHANNEL 4 /* A DMC channel can monitor max 4 axi ports */
|
||||
|
||||
#define to_ddr_pmu(p) container_of(p, struct ddr_pmu, pmu)
|
||||
#define dmc_info_to_pmu(p) container_of(p, struct ddr_pmu, info)
|
||||
|
||||
static void dmc_pmu_enable(struct ddr_pmu *pmu)
|
||||
{
|
||||
if (!pmu->pmu_enabled)
|
||||
pmu->info.hw_info->enable(&pmu->info);
|
||||
|
||||
pmu->pmu_enabled = true;
|
||||
}
|
||||
|
||||
static void dmc_pmu_disable(struct ddr_pmu *pmu)
|
||||
{
|
||||
if (pmu->pmu_enabled)
|
||||
pmu->info.hw_info->disable(&pmu->info);
|
||||
|
||||
pmu->pmu_enabled = false;
|
||||
}
|
||||
|
||||
static void meson_ddr_set_axi_filter(struct perf_event *event, u8 axi_id)
|
||||
{
|
||||
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
|
||||
int chann;
|
||||
|
||||
if (event->attr.config > ALL_CHAN_COUNTER_ID &&
|
||||
event->attr.config < COUNTER_MAX_ID) {
|
||||
chann = event->attr.config - CHAN1_COUNTER_ID;
|
||||
|
||||
pmu->info.hw_info->set_axi_filter(&pmu->info, axi_id, chann);
|
||||
}
|
||||
}
|
||||
|
||||
static void ddr_cnt_addition(struct dmc_counter *sum,
|
||||
struct dmc_counter *add1,
|
||||
struct dmc_counter *add2,
|
||||
int chann_nr)
|
||||
{
|
||||
int i;
|
||||
u64 cnt1, cnt2;
|
||||
|
||||
sum->all_cnt = add1->all_cnt + add2->all_cnt;
|
||||
sum->all_req = add1->all_req + add2->all_req;
|
||||
for (i = 0; i < chann_nr; i++) {
|
||||
cnt1 = add1->channel_cnt[i];
|
||||
cnt2 = add2->channel_cnt[i];
|
||||
|
||||
sum->channel_cnt[i] = cnt1 + cnt2;
|
||||
}
|
||||
}
|
||||
|
||||
static void meson_ddr_perf_event_update(struct perf_event *event)
|
||||
{
|
||||
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
|
||||
u64 new_raw_count = 0;
|
||||
struct dmc_counter dc = {0}, sum_dc = {0};
|
||||
int idx;
|
||||
int chann_nr = pmu->info.hw_info->chann_nr;
|
||||
|
||||
/* get the remain counters in register. */
|
||||
pmu->info.hw_info->get_counters(&pmu->info, &dc);
|
||||
|
||||
ddr_cnt_addition(&sum_dc, &pmu->counters, &dc, chann_nr);
|
||||
|
||||
switch (event->attr.config) {
|
||||
case ALL_CHAN_COUNTER_ID:
|
||||
new_raw_count = sum_dc.all_cnt;
|
||||
break;
|
||||
case CHAN1_COUNTER_ID:
|
||||
case CHAN2_COUNTER_ID:
|
||||
case CHAN3_COUNTER_ID:
|
||||
case CHAN4_COUNTER_ID:
|
||||
case CHAN5_COUNTER_ID:
|
||||
case CHAN6_COUNTER_ID:
|
||||
case CHAN7_COUNTER_ID:
|
||||
case CHAN8_COUNTER_ID:
|
||||
idx = event->attr.config - CHAN1_COUNTER_ID;
|
||||
new_raw_count = sum_dc.channel_cnt[idx];
|
||||
break;
|
||||
}
|
||||
|
||||
local64_set(&event->count, new_raw_count);
|
||||
}
|
||||
|
||||
static int meson_ddr_perf_event_init(struct perf_event *event)
|
||||
{
|
||||
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
|
||||
u64 config1 = event->attr.config1;
|
||||
u64 config2 = event->attr.config2;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (event->cpu < 0)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* check if the number of parameters is too much */
|
||||
if (event->attr.config != ALL_CHAN_COUNTER_ID &&
|
||||
hweight64(config1) + hweight64(config2) > MAX_AXI_PORTS_OF_CHANNEL)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
event->cpu = pmu->cpu;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void meson_ddr_perf_event_start(struct perf_event *event, int flags)
|
||||
{
|
||||
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
|
||||
|
||||
memset(&pmu->counters, 0, sizeof(pmu->counters));
|
||||
dmc_pmu_enable(pmu);
|
||||
}
|
||||
|
||||
static int meson_ddr_perf_event_add(struct perf_event *event, int flags)
|
||||
{
|
||||
u64 config1 = event->attr.config1;
|
||||
u64 config2 = event->attr.config2;
|
||||
int i;
|
||||
|
||||
for_each_set_bit(i, (const unsigned long *)&config1, sizeof(config1))
|
||||
meson_ddr_set_axi_filter(event, i);
|
||||
|
||||
for_each_set_bit(i, (const unsigned long *)&config2, sizeof(config2))
|
||||
meson_ddr_set_axi_filter(event, i + 64);
|
||||
|
||||
if (flags & PERF_EF_START)
|
||||
meson_ddr_perf_event_start(event, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void meson_ddr_perf_event_stop(struct perf_event *event, int flags)
|
||||
{
|
||||
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
|
||||
|
||||
if (flags & PERF_EF_UPDATE)
|
||||
meson_ddr_perf_event_update(event);
|
||||
|
||||
dmc_pmu_disable(pmu);
|
||||
}
|
||||
|
||||
static void meson_ddr_perf_event_del(struct perf_event *event, int flags)
|
||||
{
|
||||
meson_ddr_perf_event_stop(event, PERF_EF_UPDATE);
|
||||
}
|
||||
|
||||
static ssize_t meson_ddr_perf_cpumask_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct ddr_pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
|
||||
}
|
||||
|
||||
static struct device_attribute meson_ddr_perf_cpumask_attr =
|
||||
__ATTR(cpumask, 0444, meson_ddr_perf_cpumask_show, NULL);
|
||||
|
||||
static struct attribute *meson_ddr_perf_cpumask_attrs[] = {
|
||||
&meson_ddr_perf_cpumask_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group ddr_perf_cpumask_attr_group = {
|
||||
.attrs = meson_ddr_perf_cpumask_attrs,
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
pmu_event_show(struct device *dev, struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct perf_pmu_events_attr *pmu_attr;
|
||||
|
||||
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
|
||||
return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
event_show_unit(struct device *dev, struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
return sysfs_emit(page, "MB\n");
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
event_show_scale(struct device *dev, struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
/* one count = 16byte = 1.52587890625e-05 MB */
|
||||
return sysfs_emit(page, "1.52587890625e-05\n");
|
||||
}
|
||||
|
||||
#define AML_DDR_PMU_EVENT_ATTR(_name, _id) \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, pmu_event_show, NULL), \
|
||||
.id = _id, \
|
||||
}
|
||||
|
||||
#define AML_DDR_PMU_EVENT_UNIT_ATTR(_name) \
|
||||
__ATTR(_name.unit, 0444, event_show_unit, NULL)
|
||||
|
||||
#define AML_DDR_PMU_EVENT_SCALE_ATTR(_name) \
|
||||
__ATTR(_name.scale, 0444, event_show_scale, NULL)
|
||||
|
||||
static struct device_attribute event_unit_attrs[] = {
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(total_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_1_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_2_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_3_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_4_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_5_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_6_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_7_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_UNIT_ATTR(chan_8_rw_bytes),
|
||||
};
|
||||
|
||||
static struct device_attribute event_scale_attrs[] = {
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(total_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_1_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_2_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_3_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_4_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_5_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_6_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_7_rw_bytes),
|
||||
AML_DDR_PMU_EVENT_SCALE_ATTR(chan_8_rw_bytes),
|
||||
};
|
||||
|
||||
static struct perf_pmu_events_attr event_attrs[] = {
|
||||
AML_DDR_PMU_EVENT_ATTR(total_rw_bytes, ALL_CHAN_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_1_rw_bytes, CHAN1_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_2_rw_bytes, CHAN2_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_3_rw_bytes, CHAN3_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_4_rw_bytes, CHAN4_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_5_rw_bytes, CHAN5_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_6_rw_bytes, CHAN6_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_7_rw_bytes, CHAN7_COUNTER_ID),
|
||||
AML_DDR_PMU_EVENT_ATTR(chan_8_rw_bytes, CHAN8_COUNTER_ID),
|
||||
};
|
||||
|
||||
/* three attrs are combined an event */
|
||||
static struct attribute *ddr_perf_events_attrs[COUNTER_MAX_ID * 3];
|
||||
|
||||
static struct attribute_group ddr_perf_events_attr_group = {
|
||||
.name = "events",
|
||||
.attrs = ddr_perf_events_attrs,
|
||||
};
|
||||
|
||||
static umode_t meson_ddr_perf_format_attr_visible(struct kobject *kobj,
|
||||
struct attribute *attr,
|
||||
int n)
|
||||
{
|
||||
struct pmu *pmu = dev_get_drvdata(kobj_to_dev(kobj));
|
||||
struct ddr_pmu *ddr_pmu = to_ddr_pmu(pmu);
|
||||
const u64 *capability = ddr_pmu->info.hw_info->capability;
|
||||
struct device_attribute *dev_attr;
|
||||
int id;
|
||||
char value[20]; // config1:xxx, 20 is enough
|
||||
|
||||
dev_attr = container_of(attr, struct device_attribute, attr);
|
||||
dev_attr->show(NULL, NULL, value);
|
||||
|
||||
if (sscanf(value, "config1:%d", &id) == 1)
|
||||
return capability[0] & (1ULL << id) ? attr->mode : 0;
|
||||
|
||||
if (sscanf(value, "config2:%d", &id) == 1)
|
||||
return capability[1] & (1ULL << id) ? attr->mode : 0;
|
||||
|
||||
return attr->mode;
|
||||
}
|
||||
|
||||
static struct attribute_group ddr_perf_format_attr_group = {
|
||||
.name = "format",
|
||||
.is_visible = meson_ddr_perf_format_attr_visible,
|
||||
};
|
||||
|
||||
static ssize_t meson_ddr_perf_identifier_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *page)
|
||||
{
|
||||
struct ddr_pmu *pmu = dev_get_drvdata(dev);
|
||||
|
||||
return sysfs_emit(page, "%s\n", pmu->name);
|
||||
}
|
||||
|
||||
static struct device_attribute meson_ddr_perf_identifier_attr =
|
||||
__ATTR(identifier, 0444, meson_ddr_perf_identifier_show, NULL);
|
||||
|
||||
static struct attribute *meson_ddr_perf_identifier_attrs[] = {
|
||||
&meson_ddr_perf_identifier_attr.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static const struct attribute_group ddr_perf_identifier_attr_group = {
|
||||
.attrs = meson_ddr_perf_identifier_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *attr_groups[] = {
|
||||
&ddr_perf_events_attr_group,
|
||||
&ddr_perf_format_attr_group,
|
||||
&ddr_perf_cpumask_attr_group,
|
||||
&ddr_perf_identifier_attr_group,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static irqreturn_t dmc_irq_handler(int irq, void *dev_id)
|
||||
{
|
||||
struct dmc_info *info = dev_id;
|
||||
struct ddr_pmu *pmu;
|
||||
struct dmc_counter counters, *sum_cnter;
|
||||
int i;
|
||||
|
||||
pmu = dmc_info_to_pmu(info);
|
||||
|
||||
if (info->hw_info->irq_handler(info, &counters) != 0)
|
||||
goto out;
|
||||
|
||||
sum_cnter = &pmu->counters;
|
||||
sum_cnter->all_cnt += counters.all_cnt;
|
||||
sum_cnter->all_req += counters.all_req;
|
||||
|
||||
for (i = 0; i < pmu->info.hw_info->chann_nr; i++)
|
||||
sum_cnter->channel_cnt[i] += counters.channel_cnt[i];
|
||||
|
||||
if (pmu->pmu_enabled)
|
||||
/*
|
||||
* the timer interrupt only supprt
|
||||
* one shot mode, we have to re-enable
|
||||
* it in ISR to support continue mode.
|
||||
*/
|
||||
info->hw_info->enable(info);
|
||||
|
||||
dev_dbg(pmu->dev, "counts: %llu %llu %llu, %llu, %llu, %llu\t\t"
|
||||
"sum: %llu %llu %llu, %llu, %llu, %llu\n",
|
||||
counters.all_req,
|
||||
counters.all_cnt,
|
||||
counters.channel_cnt[0],
|
||||
counters.channel_cnt[1],
|
||||
counters.channel_cnt[2],
|
||||
counters.channel_cnt[3],
|
||||
|
||||
pmu->counters.all_req,
|
||||
pmu->counters.all_cnt,
|
||||
pmu->counters.channel_cnt[0],
|
||||
pmu->counters.channel_cnt[1],
|
||||
pmu->counters.channel_cnt[2],
|
||||
pmu->counters.channel_cnt[3]);
|
||||
out:
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int ddr_perf_offline_cpu(unsigned int cpu, struct hlist_node *node)
|
||||
{
|
||||
struct ddr_pmu *pmu = hlist_entry_safe(node, struct ddr_pmu, node);
|
||||
int target;
|
||||
|
||||
if (cpu != pmu->cpu)
|
||||
return 0;
|
||||
|
||||
target = cpumask_any_but(cpu_online_mask, cpu);
|
||||
if (target >= nr_cpu_ids)
|
||||
return 0;
|
||||
|
||||
perf_pmu_migrate_context(&pmu->pmu, cpu, target);
|
||||
pmu->cpu = target;
|
||||
|
||||
WARN_ON(irq_set_affinity(pmu->info.irq_num, cpumask_of(pmu->cpu)));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void fill_event_attr(struct ddr_pmu *pmu)
|
||||
{
|
||||
int i, j, k;
|
||||
struct attribute **dst = ddr_perf_events_attrs;
|
||||
|
||||
j = 0;
|
||||
k = 0;
|
||||
|
||||
/* fill ALL_CHAN_COUNTER_ID event */
|
||||
dst[j++] = &event_attrs[k].attr.attr;
|
||||
dst[j++] = &event_unit_attrs[k].attr;
|
||||
dst[j++] = &event_scale_attrs[k].attr;
|
||||
|
||||
k++;
|
||||
|
||||
/* fill each channel event */
|
||||
for (i = 0; i < pmu->info.hw_info->chann_nr; i++, k++) {
|
||||
dst[j++] = &event_attrs[k].attr.attr;
|
||||
dst[j++] = &event_unit_attrs[k].attr;
|
||||
dst[j++] = &event_scale_attrs[k].attr;
|
||||
}
|
||||
|
||||
dst[j] = NULL; /* mark end */
|
||||
}
|
||||
|
||||
static void fmt_attr_fill(struct attribute **fmt_attr)
|
||||
{
|
||||
ddr_perf_format_attr_group.attrs = fmt_attr;
|
||||
}
|
||||
|
||||
static int ddr_pmu_parse_dt(struct platform_device *pdev,
|
||||
struct dmc_info *info)
|
||||
{
|
||||
void __iomem *base;
|
||||
int i, ret;
|
||||
|
||||
info->hw_info = of_device_get_match_data(&pdev->dev);
|
||||
|
||||
for (i = 0; i < info->hw_info->dmc_nr; i++) {
|
||||
/* resource 0 for ddr register base */
|
||||
base = devm_platform_ioremap_resource(pdev, i);
|
||||
if (IS_ERR(base))
|
||||
return PTR_ERR(base);
|
||||
|
||||
info->ddr_reg[i] = base;
|
||||
}
|
||||
|
||||
/* resource i for pll register base */
|
||||
base = devm_platform_ioremap_resource(pdev, i);
|
||||
if (IS_ERR(base))
|
||||
return PTR_ERR(base);
|
||||
|
||||
info->pll_reg = base;
|
||||
|
||||
ret = platform_get_irq(pdev, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
info->irq_num = ret;
|
||||
|
||||
ret = devm_request_irq(&pdev->dev, info->irq_num, dmc_irq_handler,
|
||||
IRQF_NOBALANCING, dev_name(&pdev->dev),
|
||||
(void *)info);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int meson_ddr_pmu_create(struct platform_device *pdev)
|
||||
{
|
||||
int ret;
|
||||
char *name;
|
||||
struct ddr_pmu *pmu;
|
||||
|
||||
pmu = devm_kzalloc(&pdev->dev, sizeof(struct ddr_pmu), GFP_KERNEL);
|
||||
if (!pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
*pmu = (struct ddr_pmu) {
|
||||
.pmu = {
|
||||
.module = THIS_MODULE,
|
||||
.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.attr_groups = attr_groups,
|
||||
.event_init = meson_ddr_perf_event_init,
|
||||
.add = meson_ddr_perf_event_add,
|
||||
.del = meson_ddr_perf_event_del,
|
||||
.start = meson_ddr_perf_event_start,
|
||||
.stop = meson_ddr_perf_event_stop,
|
||||
.read = meson_ddr_perf_event_update,
|
||||
},
|
||||
};
|
||||
|
||||
ret = ddr_pmu_parse_dt(pdev, &pmu->info);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
fmt_attr_fill(pmu->info.hw_info->fmt_attr);
|
||||
|
||||
pmu->cpu = smp_processor_id();
|
||||
|
||||
name = devm_kasprintf(&pdev->dev, GFP_KERNEL, DDR_PERF_DEV_NAME);
|
||||
if (!name)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, name, NULL,
|
||||
ddr_perf_offline_cpu);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
pmu->cpuhp_state = ret;
|
||||
|
||||
/* Register the pmu instance for cpu hotplug */
|
||||
ret = cpuhp_state_add_instance_nocalls(pmu->cpuhp_state, &pmu->node);
|
||||
if (ret)
|
||||
goto cpuhp_instance_err;
|
||||
|
||||
fill_event_attr(pmu);
|
||||
|
||||
ret = perf_pmu_register(&pmu->pmu, name, -1);
|
||||
if (ret)
|
||||
goto pmu_register_err;
|
||||
|
||||
pmu->name = name;
|
||||
pmu->dev = &pdev->dev;
|
||||
pmu->pmu_enabled = false;
|
||||
|
||||
platform_set_drvdata(pdev, pmu);
|
||||
|
||||
return 0;
|
||||
|
||||
pmu_register_err:
|
||||
cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
|
||||
|
||||
cpuhp_instance_err:
|
||||
cpuhp_remove_state(pmu->cpuhp_state);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int meson_ddr_pmu_remove(struct platform_device *pdev)
|
||||
{
|
||||
struct ddr_pmu *pmu = platform_get_drvdata(pdev);
|
||||
|
||||
perf_pmu_unregister(&pmu->pmu);
|
||||
cpuhp_state_remove_instance_nocalls(pmu->cpuhp_state, &pmu->node);
|
||||
cpuhp_remove_state(pmu->cpuhp_state);
|
||||
|
||||
return 0;
|
||||
}
|
394
drivers/perf/amlogic/meson_g12_ddr_pmu.c
Normal file
394
drivers/perf/amlogic/meson_g12_ddr_pmu.c
Normal file
@ -0,0 +1,394 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2022 Amlogic, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <linux/err.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/printk.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include <soc/amlogic/meson_ddr_pmu.h>
|
||||
|
||||
#define PORT_MAJOR 32
|
||||
#define DEFAULT_XTAL_FREQ 24000000UL
|
||||
|
||||
#define DMC_QOS_IRQ BIT(30)
|
||||
|
||||
/* DMC bandwidth monitor register address offset */
|
||||
#define DMC_MON_G12_CTRL0 (0x20 << 2)
|
||||
#define DMC_MON_G12_CTRL1 (0x21 << 2)
|
||||
#define DMC_MON_G12_CTRL2 (0x22 << 2)
|
||||
#define DMC_MON_G12_CTRL3 (0x23 << 2)
|
||||
#define DMC_MON_G12_CTRL4 (0x24 << 2)
|
||||
#define DMC_MON_G12_CTRL5 (0x25 << 2)
|
||||
#define DMC_MON_G12_CTRL6 (0x26 << 2)
|
||||
#define DMC_MON_G12_CTRL7 (0x27 << 2)
|
||||
#define DMC_MON_G12_CTRL8 (0x28 << 2)
|
||||
|
||||
#define DMC_MON_G12_ALL_REQ_CNT (0x29 << 2)
|
||||
#define DMC_MON_G12_ALL_GRANT_CNT (0x2a << 2)
|
||||
#define DMC_MON_G12_ONE_GRANT_CNT (0x2b << 2)
|
||||
#define DMC_MON_G12_SEC_GRANT_CNT (0x2c << 2)
|
||||
#define DMC_MON_G12_THD_GRANT_CNT (0x2d << 2)
|
||||
#define DMC_MON_G12_FOR_GRANT_CNT (0x2e << 2)
|
||||
#define DMC_MON_G12_TIMER (0x2f << 2)
|
||||
|
||||
/* Each bit represent a axi line */
|
||||
PMU_FORMAT_ATTR(event, "config:0-7");
|
||||
PMU_FORMAT_ATTR(arm, "config1:0");
|
||||
PMU_FORMAT_ATTR(gpu, "config1:1");
|
||||
PMU_FORMAT_ATTR(pcie, "config1:2");
|
||||
PMU_FORMAT_ATTR(hdcp, "config1:3");
|
||||
PMU_FORMAT_ATTR(hevc_front, "config1:4");
|
||||
PMU_FORMAT_ATTR(usb3_0, "config1:6");
|
||||
PMU_FORMAT_ATTR(device, "config1:7");
|
||||
PMU_FORMAT_ATTR(hevc_back, "config1:8");
|
||||
PMU_FORMAT_ATTR(h265enc, "config1:9");
|
||||
PMU_FORMAT_ATTR(vpu_read1, "config1:16");
|
||||
PMU_FORMAT_ATTR(vpu_read2, "config1:17");
|
||||
PMU_FORMAT_ATTR(vpu_read3, "config1:18");
|
||||
PMU_FORMAT_ATTR(vpu_write1, "config1:19");
|
||||
PMU_FORMAT_ATTR(vpu_write2, "config1:20");
|
||||
PMU_FORMAT_ATTR(vdec, "config1:21");
|
||||
PMU_FORMAT_ATTR(hcodec, "config1:22");
|
||||
PMU_FORMAT_ATTR(ge2d, "config1:23");
|
||||
|
||||
PMU_FORMAT_ATTR(spicc1, "config1:32");
|
||||
PMU_FORMAT_ATTR(usb0, "config1:33");
|
||||
PMU_FORMAT_ATTR(dma, "config1:34");
|
||||
PMU_FORMAT_ATTR(arb0, "config1:35");
|
||||
PMU_FORMAT_ATTR(sd_emmc_b, "config1:36");
|
||||
PMU_FORMAT_ATTR(usb1, "config1:37");
|
||||
PMU_FORMAT_ATTR(audio, "config1:38");
|
||||
PMU_FORMAT_ATTR(aififo, "config1:39");
|
||||
PMU_FORMAT_ATTR(parser, "config1:41");
|
||||
PMU_FORMAT_ATTR(ao_cpu, "config1:42");
|
||||
PMU_FORMAT_ATTR(sd_emmc_c, "config1:43");
|
||||
PMU_FORMAT_ATTR(spicc2, "config1:44");
|
||||
PMU_FORMAT_ATTR(ethernet, "config1:45");
|
||||
PMU_FORMAT_ATTR(sana, "config1:46");
|
||||
|
||||
/* for sm1 and g12b */
|
||||
PMU_FORMAT_ATTR(nna, "config1:10");
|
||||
|
||||
/* for g12b only */
|
||||
PMU_FORMAT_ATTR(gdc, "config1:11");
|
||||
PMU_FORMAT_ATTR(mipi_isp, "config1:12");
|
||||
PMU_FORMAT_ATTR(arm1, "config1:13");
|
||||
PMU_FORMAT_ATTR(sd_emmc_a, "config1:40");
|
||||
|
||||
static struct attribute *g12_pmu_format_attrs[] = {
|
||||
&format_attr_event.attr,
|
||||
&format_attr_arm.attr,
|
||||
&format_attr_gpu.attr,
|
||||
&format_attr_nna.attr,
|
||||
&format_attr_gdc.attr,
|
||||
&format_attr_arm1.attr,
|
||||
&format_attr_mipi_isp.attr,
|
||||
&format_attr_sd_emmc_a.attr,
|
||||
&format_attr_pcie.attr,
|
||||
&format_attr_hdcp.attr,
|
||||
&format_attr_hevc_front.attr,
|
||||
&format_attr_usb3_0.attr,
|
||||
&format_attr_device.attr,
|
||||
&format_attr_hevc_back.attr,
|
||||
&format_attr_h265enc.attr,
|
||||
&format_attr_vpu_read1.attr,
|
||||
&format_attr_vpu_read2.attr,
|
||||
&format_attr_vpu_read3.attr,
|
||||
&format_attr_vpu_write1.attr,
|
||||
&format_attr_vpu_write2.attr,
|
||||
&format_attr_vdec.attr,
|
||||
&format_attr_hcodec.attr,
|
||||
&format_attr_ge2d.attr,
|
||||
&format_attr_spicc1.attr,
|
||||
&format_attr_usb0.attr,
|
||||
&format_attr_dma.attr,
|
||||
&format_attr_arb0.attr,
|
||||
&format_attr_sd_emmc_b.attr,
|
||||
&format_attr_usb1.attr,
|
||||
&format_attr_audio.attr,
|
||||
&format_attr_aififo.attr,
|
||||
&format_attr_parser.attr,
|
||||
&format_attr_ao_cpu.attr,
|
||||
&format_attr_sd_emmc_c.attr,
|
||||
&format_attr_spicc2.attr,
|
||||
&format_attr_ethernet.attr,
|
||||
&format_attr_sana.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
/* calculate ddr clock */
|
||||
static unsigned long dmc_g12_get_freq_quick(struct dmc_info *info)
|
||||
{
|
||||
unsigned int val;
|
||||
unsigned int n, m, od1;
|
||||
unsigned int od_div = 0xfff;
|
||||
unsigned long freq = 0;
|
||||
|
||||
val = readl(info->pll_reg);
|
||||
val = val & 0xfffff;
|
||||
switch ((val >> 16) & 7) {
|
||||
case 0:
|
||||
od_div = 2;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
od_div = 3;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
od_div = 4;
|
||||
break;
|
||||
|
||||
case 3:
|
||||
od_div = 6;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
od_div = 8;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
m = val & 0x1ff;
|
||||
n = ((val >> 10) & 0x1f);
|
||||
od1 = (((val >> 19) & 0x1)) == 1 ? 2 : 1;
|
||||
freq = DEFAULT_XTAL_FREQ / 1000; /* avoid overflow */
|
||||
if (n)
|
||||
freq = ((((freq * m) / n) >> od1) / od_div) * 1000;
|
||||
|
||||
return freq;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void g12_dump_reg(struct dmc_info *db)
|
||||
{
|
||||
int s = 0, i;
|
||||
unsigned int r;
|
||||
|
||||
for (i = 0; i < 9; i++) {
|
||||
r = readl(db->ddr_reg[0] + (DMC_MON_G12_CTRL0 + (i << 2)));
|
||||
pr_notice("DMC_MON_CTRL%d: %08x\n", i, r);
|
||||
}
|
||||
r = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
|
||||
pr_notice("DMC_MON_ALL_REQ_CNT: %08x\n", r);
|
||||
r = readl(db->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
|
||||
pr_notice("DMC_MON_ALL_GRANT_CNT:%08x\n", r);
|
||||
r = readl(db->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
|
||||
pr_notice("DMC_MON_ONE_GRANT_CNT:%08x\n", r);
|
||||
r = readl(db->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
|
||||
pr_notice("DMC_MON_SEC_GRANT_CNT:%08x\n", r);
|
||||
r = readl(db->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
|
||||
pr_notice("DMC_MON_THD_GRANT_CNT:%08x\n", r);
|
||||
r = readl(db->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
|
||||
pr_notice("DMC_MON_FOR_GRANT_CNT:%08x\n", r);
|
||||
r = readl(db->ddr_reg[0] + DMC_MON_G12_TIMER);
|
||||
pr_notice("DMC_MON_TIMER: %08x\n", r);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void dmc_g12_counter_enable(struct dmc_info *info)
|
||||
{
|
||||
unsigned int val;
|
||||
unsigned long clock_count = dmc_g12_get_freq_quick(info) / 10; /* 100ms */
|
||||
|
||||
writel(clock_count, info->ddr_reg[0] + DMC_MON_G12_TIMER);
|
||||
|
||||
val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
|
||||
|
||||
/* enable all channel */
|
||||
val = BIT(31) | /* enable bit */
|
||||
BIT(20) | /* use timer */
|
||||
0x0f; /* 4 channels */
|
||||
|
||||
writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
|
||||
|
||||
#ifdef DEBUG
|
||||
g12_dump_reg(info);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void dmc_g12_config_fiter(struct dmc_info *info,
|
||||
int port, int channel)
|
||||
{
|
||||
u32 val;
|
||||
u32 rp[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL1, DMC_MON_G12_CTRL3,
|
||||
DMC_MON_G12_CTRL5, DMC_MON_G12_CTRL7};
|
||||
u32 rs[MAX_CHANNEL_NUM] = {DMC_MON_G12_CTRL2, DMC_MON_G12_CTRL4,
|
||||
DMC_MON_G12_CTRL6, DMC_MON_G12_CTRL8};
|
||||
int subport = -1;
|
||||
|
||||
/* clear all port mask */
|
||||
if (port < 0) {
|
||||
writel(0, info->ddr_reg[0] + rp[channel]);
|
||||
writel(0, info->ddr_reg[0] + rs[channel]);
|
||||
return;
|
||||
}
|
||||
|
||||
if (port >= PORT_MAJOR)
|
||||
subport = port - PORT_MAJOR;
|
||||
|
||||
if (subport < 0) {
|
||||
val = readl(info->ddr_reg[0] + rp[channel]);
|
||||
val |= (1 << port);
|
||||
writel(val, info->ddr_reg[0] + rp[channel]);
|
||||
val = 0xffff;
|
||||
writel(val, info->ddr_reg[0] + rs[channel]);
|
||||
} else {
|
||||
val = BIT(23); /* select device */
|
||||
writel(val, info->ddr_reg[0] + rp[channel]);
|
||||
val = readl(info->ddr_reg[0] + rs[channel]);
|
||||
val |= (1 << subport);
|
||||
writel(val, info->ddr_reg[0] + rs[channel]);
|
||||
}
|
||||
}
|
||||
|
||||
static void dmc_g12_set_axi_filter(struct dmc_info *info, int axi_id, int channel)
|
||||
{
|
||||
if (channel > info->hw_info->chann_nr)
|
||||
return;
|
||||
|
||||
dmc_g12_config_fiter(info, axi_id, channel);
|
||||
}
|
||||
|
||||
static void dmc_g12_counter_disable(struct dmc_info *info)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* clear timer */
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_TIMER);
|
||||
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_ONE_GRANT_CNT);
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_SEC_GRANT_CNT);
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_THD_GRANT_CNT);
|
||||
writel(0, info->ddr_reg[0] + DMC_MON_G12_FOR_GRANT_CNT);
|
||||
|
||||
/* clear port channel mapping */
|
||||
for (i = 0; i < info->hw_info->chann_nr; i++)
|
||||
dmc_g12_config_fiter(info, -1, i);
|
||||
}
|
||||
|
||||
static void dmc_g12_get_counters(struct dmc_info *info,
|
||||
struct dmc_counter *counter)
|
||||
{
|
||||
int i;
|
||||
unsigned int reg;
|
||||
|
||||
counter->all_cnt = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_GRANT_CNT);
|
||||
counter->all_req = readl(info->ddr_reg[0] + DMC_MON_G12_ALL_REQ_CNT);
|
||||
|
||||
for (i = 0; i < info->hw_info->chann_nr; i++) {
|
||||
reg = DMC_MON_G12_ONE_GRANT_CNT + (i << 2);
|
||||
counter->channel_cnt[i] = readl(info->ddr_reg[0] + reg);
|
||||
}
|
||||
}
|
||||
|
||||
static int dmc_g12_irq_handler(struct dmc_info *info,
|
||||
struct dmc_counter *counter)
|
||||
{
|
||||
unsigned int val;
|
||||
int ret = -EINVAL;
|
||||
|
||||
val = readl(info->ddr_reg[0] + DMC_MON_G12_CTRL0);
|
||||
if (val & DMC_QOS_IRQ) {
|
||||
dmc_g12_get_counters(info, counter);
|
||||
/* clear irq flags */
|
||||
writel(val, info->ddr_reg[0] + DMC_MON_G12_CTRL0);
|
||||
ret = 0;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const struct dmc_hw_info g12a_dmc_info = {
|
||||
.enable = dmc_g12_counter_enable,
|
||||
.disable = dmc_g12_counter_disable,
|
||||
.irq_handler = dmc_g12_irq_handler,
|
||||
.get_counters = dmc_g12_get_counters,
|
||||
.set_axi_filter = dmc_g12_set_axi_filter,
|
||||
|
||||
.dmc_nr = 1,
|
||||
.chann_nr = 4,
|
||||
.capability = {0X7EFF00FF03DF, 0},
|
||||
.fmt_attr = g12_pmu_format_attrs,
|
||||
};
|
||||
|
||||
static const struct dmc_hw_info g12b_dmc_info = {
|
||||
.enable = dmc_g12_counter_enable,
|
||||
.disable = dmc_g12_counter_disable,
|
||||
.irq_handler = dmc_g12_irq_handler,
|
||||
.get_counters = dmc_g12_get_counters,
|
||||
.set_axi_filter = dmc_g12_set_axi_filter,
|
||||
|
||||
.dmc_nr = 1,
|
||||
.chann_nr = 4,
|
||||
.capability = {0X7FFF00FF3FDF, 0},
|
||||
.fmt_attr = g12_pmu_format_attrs,
|
||||
};
|
||||
|
||||
static const struct dmc_hw_info sm1_dmc_info = {
|
||||
.enable = dmc_g12_counter_enable,
|
||||
.disable = dmc_g12_counter_disable,
|
||||
.irq_handler = dmc_g12_irq_handler,
|
||||
.get_counters = dmc_g12_get_counters,
|
||||
.set_axi_filter = dmc_g12_set_axi_filter,
|
||||
|
||||
.dmc_nr = 1,
|
||||
.chann_nr = 4,
|
||||
.capability = {0X7EFF00FF07DF, 0},
|
||||
.fmt_attr = g12_pmu_format_attrs,
|
||||
};
|
||||
|
||||
static int g12_ddr_pmu_probe(struct platform_device *pdev)
|
||||
{
|
||||
return meson_ddr_pmu_create(pdev);
|
||||
}
|
||||
|
||||
static int g12_ddr_pmu_remove(struct platform_device *pdev)
|
||||
{
|
||||
meson_ddr_pmu_remove(pdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct of_device_id meson_ddr_pmu_dt_match[] = {
|
||||
{
|
||||
.compatible = "amlogic,g12a-ddr-pmu",
|
||||
.data = &g12a_dmc_info,
|
||||
},
|
||||
{
|
||||
.compatible = "amlogic,g12b-ddr-pmu",
|
||||
.data = &g12b_dmc_info,
|
||||
},
|
||||
{
|
||||
.compatible = "amlogic,sm1-ddr-pmu",
|
||||
.data = &sm1_dmc_info,
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
static struct platform_driver g12_ddr_pmu_driver = {
|
||||
.probe = g12_ddr_pmu_probe,
|
||||
.remove = g12_ddr_pmu_remove,
|
||||
|
||||
.driver = {
|
||||
.name = "meson-g12-ddr-pmu",
|
||||
.of_match_table = meson_ddr_pmu_dt_match,
|
||||
},
|
||||
};
|
||||
|
||||
module_platform_driver(g12_ddr_pmu_driver);
|
||||
MODULE_AUTHOR("Jiucheng Xu");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("Amlogic G12 series SoC DDR PMU");
|
13
drivers/perf/arm_cspmu/Kconfig
Normal file
13
drivers/perf/arm_cspmu/Kconfig
Normal file
@ -0,0 +1,13 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
|
||||
config ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
|
||||
tristate "ARM Coresight Architecture PMU"
|
||||
depends on ARM64 && ACPI
|
||||
depends on ACPI_APMT || COMPILE_TEST
|
||||
help
|
||||
Provides support for performance monitoring unit (PMU) devices
|
||||
based on ARM CoreSight PMU architecture. Note that this PMU
|
||||
architecture does not have relationship with the ARM CoreSight
|
||||
Self-Hosted Tracing.
|
6
drivers/perf/arm_cspmu/Makefile
Normal file
6
drivers/perf/arm_cspmu/Makefile
Normal file
@ -0,0 +1,6 @@
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
|
||||
arm_cspmu_module-y := arm_cspmu.o nvidia_cspmu.o
|
1303
drivers/perf/arm_cspmu/arm_cspmu.c
Normal file
1303
drivers/perf/arm_cspmu/arm_cspmu.c
Normal file
File diff suppressed because it is too large
Load Diff
151
drivers/perf/arm_cspmu/arm_cspmu.h
Normal file
151
drivers/perf/arm_cspmu/arm_cspmu.h
Normal file
@ -0,0 +1,151 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* ARM CoreSight Architecture PMU driver.
|
||||
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef __ARM_CSPMU_H__
|
||||
#define __ARM_CSPMU_H__
|
||||
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/cpumask.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#define to_arm_cspmu(p) (container_of(p, struct arm_cspmu, pmu))
|
||||
|
||||
#define ARM_CSPMU_EXT_ATTR(_name, _func, _config) \
|
||||
(&((struct dev_ext_attribute[]){ \
|
||||
{ \
|
||||
.attr = __ATTR(_name, 0444, _func, NULL), \
|
||||
.var = (void *)_config \
|
||||
} \
|
||||
})[0].attr.attr)
|
||||
|
||||
#define ARM_CSPMU_FORMAT_ATTR(_name, _config) \
|
||||
ARM_CSPMU_EXT_ATTR(_name, arm_cspmu_sysfs_format_show, (char *)_config)
|
||||
|
||||
#define ARM_CSPMU_EVENT_ATTR(_name, _config) \
|
||||
PMU_EVENT_ATTR_ID(_name, arm_cspmu_sysfs_event_show, _config)
|
||||
|
||||
|
||||
/* Default event id mask */
|
||||
#define ARM_CSPMU_EVENT_MASK GENMASK_ULL(63, 0)
|
||||
|
||||
/* Default filter value mask */
|
||||
#define ARM_CSPMU_FILTER_MASK GENMASK_ULL(63, 0)
|
||||
|
||||
/* Default event format */
|
||||
#define ARM_CSPMU_FORMAT_EVENT_ATTR \
|
||||
ARM_CSPMU_FORMAT_ATTR(event, "config:0-32")
|
||||
|
||||
/* Default filter format */
|
||||
#define ARM_CSPMU_FORMAT_FILTER_ATTR \
|
||||
ARM_CSPMU_FORMAT_ATTR(filter, "config1:0-31")
|
||||
|
||||
/*
|
||||
* This is the default event number for cycle count, if supported, since the
|
||||
* ARM Coresight PMU specification does not define a standard event code
|
||||
* for cycle count.
|
||||
*/
|
||||
#define ARM_CSPMU_EVT_CYCLES_DEFAULT (0x1ULL << 32)
|
||||
|
||||
/*
|
||||
* The ARM Coresight PMU supports up to 256 event counters.
|
||||
* If the counters are larger-than 32-bits, then the PMU includes at
|
||||
* most 128 counters.
|
||||
*/
|
||||
#define ARM_CSPMU_MAX_HW_CNTRS 256
|
||||
|
||||
/* The cycle counter, if implemented, is located at counter[31]. */
|
||||
#define ARM_CSPMU_CYCLE_CNTR_IDX 31
|
||||
|
||||
/* PMIIDR register field */
|
||||
#define ARM_CSPMU_PMIIDR_IMPLEMENTER GENMASK(11, 0)
|
||||
#define ARM_CSPMU_PMIIDR_PRODUCTID GENMASK(31, 20)
|
||||
|
||||
struct arm_cspmu;
|
||||
|
||||
/* This tracks the events assigned to each counter in the PMU. */
|
||||
struct arm_cspmu_hw_events {
|
||||
/* The events that are active on the PMU for a given logical index. */
|
||||
struct perf_event **events;
|
||||
|
||||
/*
|
||||
* Each bit indicates a logical counter is being used (or not) for an
|
||||
* event. If cycle counter is supported and there is a gap between
|
||||
* regular and cycle counter, the last logical counter is mapped to
|
||||
* cycle counter. Otherwise, logical and physical have 1-to-1 mapping.
|
||||
*/
|
||||
DECLARE_BITMAP(used_ctrs, ARM_CSPMU_MAX_HW_CNTRS);
|
||||
};
|
||||
|
||||
/* Contains ops to query vendor/implementer specific attribute. */
|
||||
struct arm_cspmu_impl_ops {
|
||||
/* Get event attributes */
|
||||
struct attribute **(*get_event_attrs)(const struct arm_cspmu *cspmu);
|
||||
/* Get format attributes */
|
||||
struct attribute **(*get_format_attrs)(const struct arm_cspmu *cspmu);
|
||||
/* Get string identifier */
|
||||
const char *(*get_identifier)(const struct arm_cspmu *cspmu);
|
||||
/* Get PMU name to register to core perf */
|
||||
const char *(*get_name)(const struct arm_cspmu *cspmu);
|
||||
/* Check if the event corresponds to cycle count event */
|
||||
bool (*is_cycle_counter_event)(const struct perf_event *event);
|
||||
/* Decode event type/id from configs */
|
||||
u32 (*event_type)(const struct perf_event *event);
|
||||
/* Decode filter value from configs */
|
||||
u32 (*event_filter)(const struct perf_event *event);
|
||||
/* Hide/show unsupported events */
|
||||
umode_t (*event_attr_is_visible)(struct kobject *kobj,
|
||||
struct attribute *attr, int unused);
|
||||
};
|
||||
|
||||
/* Vendor/implementer descriptor. */
|
||||
struct arm_cspmu_impl {
|
||||
u32 pmiidr;
|
||||
struct arm_cspmu_impl_ops ops;
|
||||
void *ctx;
|
||||
};
|
||||
|
||||
/* Coresight PMU descriptor. */
|
||||
struct arm_cspmu {
|
||||
struct pmu pmu;
|
||||
struct device *dev;
|
||||
struct acpi_apmt_node *apmt_node;
|
||||
const char *name;
|
||||
const char *identifier;
|
||||
void __iomem *base0;
|
||||
void __iomem *base1;
|
||||
int irq;
|
||||
cpumask_t associated_cpus;
|
||||
cpumask_t active_cpu;
|
||||
struct hlist_node cpuhp_node;
|
||||
|
||||
u32 pmcfgr;
|
||||
u32 num_logical_ctrs;
|
||||
u32 num_set_clr_reg;
|
||||
int cycle_counter_logical_idx;
|
||||
|
||||
struct arm_cspmu_hw_events hw_events;
|
||||
|
||||
struct arm_cspmu_impl impl;
|
||||
};
|
||||
|
||||
/* Default function to show event attribute in sysfs. */
|
||||
ssize_t arm_cspmu_sysfs_event_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf);
|
||||
|
||||
/* Default function to show format attribute in sysfs. */
|
||||
ssize_t arm_cspmu_sysfs_format_show(struct device *dev,
|
||||
struct device_attribute *attr,
|
||||
char *buf);
|
||||
|
||||
#endif /* __ARM_CSPMU_H__ */
|
400
drivers/perf/arm_cspmu/nvidia_cspmu.c
Normal file
400
drivers/perf/arm_cspmu/nvidia_cspmu.c
Normal file
@ -0,0 +1,400 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Support for NVIDIA specific attributes. */
|
||||
|
||||
#include <linux/topology.h>
|
||||
|
||||
#include "nvidia_cspmu.h"
|
||||
|
||||
#define NV_PCIE_PORT_COUNT 10ULL
|
||||
#define NV_PCIE_FILTER_ID_MASK GENMASK_ULL(NV_PCIE_PORT_COUNT - 1, 0)
|
||||
|
||||
#define NV_NVL_C2C_PORT_COUNT 2ULL
|
||||
#define NV_NVL_C2C_FILTER_ID_MASK GENMASK_ULL(NV_NVL_C2C_PORT_COUNT - 1, 0)
|
||||
|
||||
#define NV_CNVL_PORT_COUNT 4ULL
|
||||
#define NV_CNVL_FILTER_ID_MASK GENMASK_ULL(NV_CNVL_PORT_COUNT - 1, 0)
|
||||
|
||||
#define NV_GENERIC_FILTER_ID_MASK GENMASK_ULL(31, 0)
|
||||
|
||||
#define NV_PRODID_MASK GENMASK(31, 0)
|
||||
|
||||
#define NV_FORMAT_NAME_GENERIC 0
|
||||
|
||||
#define to_nv_cspmu_ctx(cspmu) ((struct nv_cspmu_ctx *)(cspmu->impl.ctx))
|
||||
|
||||
#define NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _num, _suff, _config) \
|
||||
ARM_CSPMU_EVENT_ATTR(_pref##_num##_suff, _config)
|
||||
|
||||
#define NV_CSPMU_EVENT_ATTR_4(_pref, _suff, _config) \
|
||||
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _0_, _suff, _config), \
|
||||
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _1_, _suff, _config + 1), \
|
||||
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _2_, _suff, _config + 2), \
|
||||
NV_CSPMU_EVENT_ATTR_4_INNER(_pref, _3_, _suff, _config + 3)
|
||||
|
||||
struct nv_cspmu_ctx {
|
||||
const char *name;
|
||||
u32 filter_mask;
|
||||
u32 filter_default_val;
|
||||
struct attribute **event_attr;
|
||||
struct attribute **format_attr;
|
||||
};
|
||||
|
||||
static struct attribute *scf_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(bus_cycles, 0x1d),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(scf_cache_allocate, 0xF0),
|
||||
ARM_CSPMU_EVENT_ATTR(scf_cache_refill, 0xF1),
|
||||
ARM_CSPMU_EVENT_ATTR(scf_cache, 0xF2),
|
||||
ARM_CSPMU_EVENT_ATTR(scf_cache_wb, 0xF3),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, rd_data, 0x101),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, dl_rsp, 0x105),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, wb_data, 0x109),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, ev_rsp, 0x10d),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, prb_data, 0x111),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, rd_outstanding, 0x115),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, dl_outstanding, 0x119),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, wb_outstanding, 0x11d),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, wr_outstanding, 0x121),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, ev_outstanding, 0x125),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, prb_outstanding, 0x129),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, rd_access, 0x12d),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, dl_access, 0x131),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, wb_access, 0x135),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, wr_access, 0x139),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, ev_access, 0x13d),
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, prb_access, 0x141),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_data, 0x145),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_access, 0x149),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_access, 0x14d),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_rd_outstanding, 0x151),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_outstanding, 0x155),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_data, 0x159),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_access, 0x15d),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_access, 0x161),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_rd_outstanding, 0x165),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_outstanding, 0x169),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_rd_data, 0x16d),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_rd_access, 0x16e),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_rd_outstanding, 0x16f),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_dl_rsp, 0x170),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_dl_access, 0x171),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_dl_outstanding, 0x172),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_wb_data, 0x173),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_wb_access, 0x174),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_wb_outstanding, 0x175),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_ev_rsp, 0x176),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_ev_access, 0x177),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_ev_outstanding, 0x178),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_wr_data, 0x179),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_wr_outstanding, 0x17a),
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_wr_access, 0x17b),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(socket, wr_data, 0x17c),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_data, 0x180),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_data, 0x184),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wr_access, 0x188),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, gmem_wb_outstanding, 0x18c),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_data, 0x190),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_data, 0x194),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_wr_access, 0x198),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, rem_wb_outstanding, 0x19c),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(gmem_wr_total_bytes, 0x1a0),
|
||||
ARM_CSPMU_EVENT_ATTR(remote_socket_wr_total_bytes, 0x1a1),
|
||||
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_data, 0x1a2),
|
||||
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_outstanding, 0x1a3),
|
||||
ARM_CSPMU_EVENT_ATTR(remote_socket_rd_access, 0x1a4),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_rd_data, 0x1a5),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_rd_access, 0x1a6),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_rd_outstanding, 0x1a7),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_dl_rsp, 0x1a8),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_dl_access, 0x1a9),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_dl_outstanding, 0x1aa),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_wb_data, 0x1ab),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_wb_access, 0x1ac),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_wb_outstanding, 0x1ad),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_ev_rsp, 0x1ae),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_ev_access, 0x1af),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_ev_outstanding, 0x1b0),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_wr_data, 0x1b1),
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_wr_outstanding, 0x1b2),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_data, 0x1b3),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_access, 0x1b7),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_access, 0x1bb),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_rd_outstanding, 0x1bf),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_outstanding, 0x1c3),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(ocu_prb_access, 0x1c7),
|
||||
ARM_CSPMU_EVENT_ATTR(ocu_prb_data, 0x1c8),
|
||||
ARM_CSPMU_EVENT_ATTR(ocu_prb_outstanding, 0x1c9),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_wr_access, 0x1ca),
|
||||
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_access, 0x1cb),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_data, 0x1cf),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wr_data, 0x1d3),
|
||||
NV_CSPMU_EVENT_ATTR_4(ocu, cmem_wb_outstanding, 0x1d7),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cmem_wr_total_bytes, 0x1db),
|
||||
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *mcf_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(rd_bytes_loc, 0x0),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_bytes_rem, 0x1),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_bytes_loc, 0x2),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_bytes_rem, 0x3),
|
||||
ARM_CSPMU_EVENT_ATTR(total_bytes_loc, 0x4),
|
||||
ARM_CSPMU_EVENT_ATTR(total_bytes_rem, 0x5),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_req_loc, 0x6),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_req_rem, 0x7),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_req_loc, 0x8),
|
||||
ARM_CSPMU_EVENT_ATTR(wr_req_rem, 0x9),
|
||||
ARM_CSPMU_EVENT_ATTR(total_req_loc, 0xa),
|
||||
ARM_CSPMU_EVENT_ATTR(total_req_rem, 0xb),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_cum_outs_loc, 0xc),
|
||||
ARM_CSPMU_EVENT_ATTR(rd_cum_outs_rem, 0xd),
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *generic_pmu_event_attrs[] = {
|
||||
ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *scf_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *pcie_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(root_port, "config1:0-9"),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *nvlink_c2c_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *cnvlink_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_ATTR(rem_socket, "config1:0-3"),
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute *generic_pmu_format_attrs[] = {
|
||||
ARM_CSPMU_FORMAT_EVENT_ATTR,
|
||||
ARM_CSPMU_FORMAT_FILTER_ATTR,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute **
|
||||
nv_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
|
||||
|
||||
return ctx->event_attr;
|
||||
}
|
||||
|
||||
static struct attribute **
|
||||
nv_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
|
||||
|
||||
return ctx->format_attr;
|
||||
}
|
||||
|
||||
static const char *
|
||||
nv_cspmu_get_name(const struct arm_cspmu *cspmu)
|
||||
{
|
||||
const struct nv_cspmu_ctx *ctx = to_nv_cspmu_ctx(cspmu);
|
||||
|
||||
return ctx->name;
|
||||
}
|
||||
|
||||
static u32 nv_cspmu_event_filter(const struct perf_event *event)
|
||||
{
|
||||
const struct nv_cspmu_ctx *ctx =
|
||||
to_nv_cspmu_ctx(to_arm_cspmu(event->pmu));
|
||||
|
||||
if (ctx->filter_mask == 0)
|
||||
return ctx->filter_default_val;
|
||||
|
||||
return event->attr.config1 & ctx->filter_mask;
|
||||
}
|
||||
|
||||
enum nv_cspmu_name_fmt {
|
||||
NAME_FMT_GENERIC,
|
||||
NAME_FMT_SOCKET
|
||||
};
|
||||
|
||||
struct nv_cspmu_match {
|
||||
u32 prodid;
|
||||
u32 prodid_mask;
|
||||
u64 filter_mask;
|
||||
u32 filter_default_val;
|
||||
const char *name_pattern;
|
||||
enum nv_cspmu_name_fmt name_fmt;
|
||||
struct attribute **event_attr;
|
||||
struct attribute **format_attr;
|
||||
};
|
||||
|
||||
static const struct nv_cspmu_match nv_cspmu_match[] = {
|
||||
{
|
||||
.prodid = 0x103,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = NV_PCIE_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_PCIE_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_pcie_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = mcf_pmu_event_attrs,
|
||||
.format_attr = pcie_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x104,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = 0x0,
|
||||
.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_nvlink_c2c1_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = mcf_pmu_event_attrs,
|
||||
.format_attr = nvlink_c2c_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x105,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = 0x0,
|
||||
.filter_default_val = NV_NVL_C2C_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_nvlink_c2c0_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = mcf_pmu_event_attrs,
|
||||
.format_attr = nvlink_c2c_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x106,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = NV_CNVL_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_CNVL_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_cnvlink_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = mcf_pmu_event_attrs,
|
||||
.format_attr = cnvlink_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0x2CF,
|
||||
.prodid_mask = NV_PRODID_MASK,
|
||||
.filter_mask = 0x0,
|
||||
.filter_default_val = 0x0,
|
||||
.name_pattern = "nvidia_scf_pmu_%u",
|
||||
.name_fmt = NAME_FMT_SOCKET,
|
||||
.event_attr = scf_pmu_event_attrs,
|
||||
.format_attr = scf_pmu_format_attrs
|
||||
},
|
||||
{
|
||||
.prodid = 0,
|
||||
.prodid_mask = 0,
|
||||
.filter_mask = NV_GENERIC_FILTER_ID_MASK,
|
||||
.filter_default_val = NV_GENERIC_FILTER_ID_MASK,
|
||||
.name_pattern = "nvidia_uncore_pmu_%u",
|
||||
.name_fmt = NAME_FMT_GENERIC,
|
||||
.event_attr = generic_pmu_event_attrs,
|
||||
.format_attr = generic_pmu_format_attrs
|
||||
},
|
||||
};
|
||||
|
||||
static char *nv_cspmu_format_name(const struct arm_cspmu *cspmu,
|
||||
const struct nv_cspmu_match *match)
|
||||
{
|
||||
char *name;
|
||||
struct device *dev = cspmu->dev;
|
||||
|
||||
static atomic_t pmu_generic_idx = {0};
|
||||
|
||||
switch (match->name_fmt) {
|
||||
case NAME_FMT_SOCKET: {
|
||||
const int cpu = cpumask_first(&cspmu->associated_cpus);
|
||||
const int socket = cpu_to_node(cpu);
|
||||
|
||||
name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
|
||||
socket);
|
||||
break;
|
||||
}
|
||||
case NAME_FMT_GENERIC:
|
||||
name = devm_kasprintf(dev, GFP_KERNEL, match->name_pattern,
|
||||
atomic_fetch_inc(&pmu_generic_idx));
|
||||
break;
|
||||
default:
|
||||
name = NULL;
|
||||
break;
|
||||
}
|
||||
|
||||
return name;
|
||||
}
|
||||
|
||||
int nv_cspmu_init_ops(struct arm_cspmu *cspmu)
|
||||
{
|
||||
u32 prodid;
|
||||
struct nv_cspmu_ctx *ctx;
|
||||
struct device *dev = cspmu->dev;
|
||||
struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
|
||||
const struct nv_cspmu_match *match = nv_cspmu_match;
|
||||
|
||||
ctx = devm_kzalloc(dev, sizeof(struct nv_cspmu_ctx), GFP_KERNEL);
|
||||
if (!ctx)
|
||||
return -ENOMEM;
|
||||
|
||||
prodid = FIELD_GET(ARM_CSPMU_PMIIDR_PRODUCTID, cspmu->impl.pmiidr);
|
||||
|
||||
/* Find matching PMU. */
|
||||
for (; match->prodid; match++) {
|
||||
const u32 prodid_mask = match->prodid_mask;
|
||||
|
||||
if ((match->prodid & prodid_mask) == (prodid & prodid_mask))
|
||||
break;
|
||||
}
|
||||
|
||||
ctx->name = nv_cspmu_format_name(cspmu, match);
|
||||
ctx->filter_mask = match->filter_mask;
|
||||
ctx->filter_default_val = match->filter_default_val;
|
||||
ctx->event_attr = match->event_attr;
|
||||
ctx->format_attr = match->format_attr;
|
||||
|
||||
cspmu->impl.ctx = ctx;
|
||||
|
||||
/* NVIDIA specific callbacks. */
|
||||
impl_ops->event_filter = nv_cspmu_event_filter;
|
||||
impl_ops->get_event_attrs = nv_cspmu_get_event_attrs;
|
||||
impl_ops->get_format_attrs = nv_cspmu_get_format_attrs;
|
||||
impl_ops->get_name = nv_cspmu_get_name;
|
||||
|
||||
/* Set others to NULL to use default callback. */
|
||||
impl_ops->event_type = NULL;
|
||||
impl_ops->event_attr_is_visible = NULL;
|
||||
impl_ops->get_identifier = NULL;
|
||||
impl_ops->is_cycle_counter_event = NULL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(nv_cspmu_init_ops);
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
17
drivers/perf/arm_cspmu/nvidia_cspmu.h
Normal file
17
drivers/perf/arm_cspmu/nvidia_cspmu.h
Normal file
@ -0,0 +1,17 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0
|
||||
*
|
||||
* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
/* Support for NVIDIA specific attributes. */
|
||||
|
||||
#ifndef __NVIDIA_CSPMU_H__
|
||||
#define __NVIDIA_CSPMU_H__
|
||||
|
||||
#include "arm_cspmu.h"
|
||||
|
||||
/* Allocate NVIDIA descriptor. */
|
||||
int nv_cspmu_init_ops(struct arm_cspmu *cspmu);
|
||||
|
||||
#endif /* __NVIDIA_CSPMU_H__ */
|
@ -725,6 +725,8 @@ static struct platform_driver dmc620_pmu_driver = {
|
||||
|
||||
static int __init dmc620_pmu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
|
||||
DMC620_DRVNAME,
|
||||
NULL,
|
||||
@ -732,7 +734,11 @@ static int __init dmc620_pmu_init(void)
|
||||
if (cpuhp_state_num < 0)
|
||||
return cpuhp_state_num;
|
||||
|
||||
return platform_driver_register(&dmc620_pmu_driver);
|
||||
ret = platform_driver_register(&dmc620_pmu_driver);
|
||||
if (ret)
|
||||
cpuhp_remove_multi_state(cpuhp_state_num);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit dmc620_pmu_exit(void)
|
||||
|
@ -858,7 +858,11 @@ static int __init dsu_pmu_init(void)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
dsu_pmu_cpuhp_state = ret;
|
||||
return platform_driver_register(&dsu_pmu_driver);
|
||||
ret = platform_driver_register(&dsu_pmu_driver);
|
||||
if (ret)
|
||||
cpuhp_remove_multi_state(dsu_pmu_cpuhp_state);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit dsu_pmu_exit(void)
|
||||
|
@ -514,9 +514,6 @@ static int armpmu_event_init(struct perf_event *event)
|
||||
if (has_branch_stack(event))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (armpmu->map_event(event) == -ENOENT)
|
||||
return -ENOENT;
|
||||
|
||||
return __hw_perf_event_init(event);
|
||||
}
|
||||
|
||||
|
@ -959,6 +959,8 @@ static struct platform_driver smmu_pmu_driver = {
|
||||
|
||||
static int __init arm_smmu_pmu_init(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
cpuhp_state_num = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
|
||||
"perf/arm/pmcg:online",
|
||||
NULL,
|
||||
@ -966,7 +968,11 @@ static int __init arm_smmu_pmu_init(void)
|
||||
if (cpuhp_state_num < 0)
|
||||
return cpuhp_state_num;
|
||||
|
||||
return platform_driver_register(&smmu_pmu_driver);
|
||||
ret = platform_driver_register(&smmu_pmu_driver);
|
||||
if (ret)
|
||||
cpuhp_remove_multi_state(cpuhp_state_num);
|
||||
|
||||
return ret;
|
||||
}
|
||||
module_init(arm_smmu_pmu_init);
|
||||
|
||||
|
@ -47,10 +47,14 @@
|
||||
#define HISI_PCIE_EVENT_M GENMASK_ULL(15, 0)
|
||||
#define HISI_PCIE_THR_MODE_M GENMASK_ULL(27, 27)
|
||||
#define HISI_PCIE_THR_M GENMASK_ULL(31, 28)
|
||||
#define HISI_PCIE_LEN_M GENMASK_ULL(35, 34)
|
||||
#define HISI_PCIE_TARGET_M GENMASK_ULL(52, 36)
|
||||
#define HISI_PCIE_TRIG_MODE_M GENMASK_ULL(53, 53)
|
||||
#define HISI_PCIE_TRIG_M GENMASK_ULL(59, 56)
|
||||
|
||||
/* Default config of TLP length mode, will count both TLP headers and payloads */
|
||||
#define HISI_PCIE_LEN_M_DEFAULT 3ULL
|
||||
|
||||
#define HISI_PCIE_MAX_COUNTERS 8
|
||||
#define HISI_PCIE_REG_STEP 8
|
||||
#define HISI_PCIE_THR_MAX_VAL 10
|
||||
@ -91,6 +95,7 @@ HISI_PCIE_PMU_FILTER_ATTR(thr_len, config1, 3, 0);
|
||||
HISI_PCIE_PMU_FILTER_ATTR(thr_mode, config1, 4, 4);
|
||||
HISI_PCIE_PMU_FILTER_ATTR(trig_len, config1, 8, 5);
|
||||
HISI_PCIE_PMU_FILTER_ATTR(trig_mode, config1, 9, 9);
|
||||
HISI_PCIE_PMU_FILTER_ATTR(len_mode, config1, 11, 10);
|
||||
HISI_PCIE_PMU_FILTER_ATTR(port, config2, 15, 0);
|
||||
HISI_PCIE_PMU_FILTER_ATTR(bdf, config2, 31, 16);
|
||||
|
||||
@ -215,8 +220,8 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
|
||||
{
|
||||
struct hisi_pcie_pmu *pcie_pmu = to_pcie_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 port, trig_len, thr_len, len_mode;
|
||||
u64 reg = HISI_PCIE_INIT_SET;
|
||||
u64 port, trig_len, thr_len;
|
||||
|
||||
/* Config HISI_PCIE_EVENT_CTRL according to event. */
|
||||
reg |= FIELD_PREP(HISI_PCIE_EVENT_M, hisi_pcie_get_real_event(event));
|
||||
@ -245,6 +250,12 @@ static void hisi_pcie_pmu_config_filter(struct perf_event *event)
|
||||
reg |= HISI_PCIE_THR_EN;
|
||||
}
|
||||
|
||||
len_mode = hisi_pcie_get_len_mode(event);
|
||||
if (len_mode)
|
||||
reg |= FIELD_PREP(HISI_PCIE_LEN_M, len_mode);
|
||||
else
|
||||
reg |= FIELD_PREP(HISI_PCIE_LEN_M, HISI_PCIE_LEN_M_DEFAULT);
|
||||
|
||||
hisi_pcie_pmu_writeq(pcie_pmu, HISI_PCIE_EVENT_CTRL, hwc->idx, reg);
|
||||
}
|
||||
|
||||
@ -693,10 +704,10 @@ static struct attribute *hisi_pcie_pmu_events_attr[] = {
|
||||
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_cnt, 0x10210),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_latency, 0x0011),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_cnt, 0x10011),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x1005),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x11005),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x2004),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x12004),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_flux, 0x0804),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(rx_mrd_time, 0x10804),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_flux, 0x0405),
|
||||
HISI_PCIE_PMU_EVENT_ATTR(tx_mrd_time, 0x10405),
|
||||
NULL
|
||||
};
|
||||
|
||||
@ -711,6 +722,7 @@ static struct attribute *hisi_pcie_pmu_format_attr[] = {
|
||||
HISI_PCIE_PMU_FORMAT_ATTR(thr_mode, "config1:4"),
|
||||
HISI_PCIE_PMU_FORMAT_ATTR(trig_len, "config1:5-8"),
|
||||
HISI_PCIE_PMU_FORMAT_ATTR(trig_mode, "config1:9"),
|
||||
HISI_PCIE_PMU_FORMAT_ATTR(len_mode, "config1:10-11"),
|
||||
HISI_PCIE_PMU_FORMAT_ATTR(port, "config2:0-15"),
|
||||
HISI_PCIE_PMU_FORMAT_ATTR(bdf, "config2:16-31"),
|
||||
NULL
|
||||
|
@ -408,7 +408,11 @@ static int __init tad_pmu_init(void)
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
tad_pmu_cpuhp_state = ret;
|
||||
return platform_driver_register(&tad_pmu_driver);
|
||||
ret = platform_driver_register(&tad_pmu_driver);
|
||||
if (ret)
|
||||
cpuhp_remove_multi_state(tad_pmu_cpuhp_state);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit tad_pmu_exit(void)
|
||||
|
66
include/soc/amlogic/meson_ddr_pmu.h
Normal file
66
include/soc/amlogic/meson_ddr_pmu.h
Normal file
@ -0,0 +1,66 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (c) 2022 Amlogic, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef __MESON_DDR_PMU_H__
|
||||
#define __MESON_DDR_PMU_H__
|
||||
|
||||
#define MAX_CHANNEL_NUM 8
|
||||
|
||||
enum {
|
||||
ALL_CHAN_COUNTER_ID,
|
||||
CHAN1_COUNTER_ID,
|
||||
CHAN2_COUNTER_ID,
|
||||
CHAN3_COUNTER_ID,
|
||||
CHAN4_COUNTER_ID,
|
||||
CHAN5_COUNTER_ID,
|
||||
CHAN6_COUNTER_ID,
|
||||
CHAN7_COUNTER_ID,
|
||||
CHAN8_COUNTER_ID,
|
||||
COUNTER_MAX_ID,
|
||||
};
|
||||
|
||||
struct dmc_info;
|
||||
|
||||
struct dmc_counter {
|
||||
u64 all_cnt; /* The count of all requests come in/out ddr controller */
|
||||
union {
|
||||
u64 all_req;
|
||||
struct {
|
||||
u64 all_idle_cnt;
|
||||
u64 all_16bit_cnt;
|
||||
};
|
||||
};
|
||||
u64 channel_cnt[MAX_CHANNEL_NUM]; /* To save a DMC bandwidth-monitor channel counter */
|
||||
};
|
||||
|
||||
struct dmc_hw_info {
|
||||
void (*enable)(struct dmc_info *info);
|
||||
void (*disable)(struct dmc_info *info);
|
||||
/* Bind an axi line to a bandwidth-monitor channel */
|
||||
void (*set_axi_filter)(struct dmc_info *info, int axi_id, int chann);
|
||||
int (*irq_handler)(struct dmc_info *info,
|
||||
struct dmc_counter *counter);
|
||||
void (*get_counters)(struct dmc_info *info,
|
||||
struct dmc_counter *counter);
|
||||
|
||||
int dmc_nr; /* The number of dmc controller */
|
||||
int chann_nr; /* The number of dmc bandwidth monitor channels */
|
||||
struct attribute **fmt_attr;
|
||||
const u64 capability[2];
|
||||
};
|
||||
|
||||
struct dmc_info {
|
||||
const struct dmc_hw_info *hw_info;
|
||||
|
||||
void __iomem *ddr_reg[4];
|
||||
unsigned long timer_value; /* Timer value in TIMER register */
|
||||
void __iomem *pll_reg;
|
||||
int irq_num; /* irq vector number */
|
||||
};
|
||||
|
||||
int meson_ddr_pmu_create(struct platform_device *pdev);
|
||||
int meson_ddr_pmu_remove(struct platform_device *pdev);
|
||||
|
||||
#endif /* __MESON_DDR_PMU_H__ */
|
Loading…
Reference in New Issue
Block a user