Merge branch 'for-next/perf-m1' into for-next/perf

Support for the CPU PMUs on the Apple M1.

* for-next/perf-m1:
  drivers/perf: Add Apple icestorm/firestorm CPU PMU driver
  drivers/perf: arm_pmu: Handle 47 bit counters
  irqchip/apple-aic: Move PMU-specific registers to their own include file
  arm64: dts: apple: Add t8303 PMU nodes
  arm64: dts: apple: Add t8103 PMU interrupt affinities
  irqchip/apple-aic: Wire PMU interrupts
  irqchip/apple-aic: Parse FIQ affinities from device-tree
  dt-bindings: apple,aic: Add affinity description for per-cpu pseudo-interrupts
  dt-bindings: apple,aic: Add CPU PMU per-cpu pseudo-interrupts
  dt-bindings: arm-pmu: Document Apple PMU compatible strings
This commit is contained in:
Will Deacon 2022-03-08 13:33:34 +00:00
commit 0162052214
11 changed files with 791 additions and 22 deletions

View File

@ -20,6 +20,8 @@ properties:
items:
- enum:
- apm,potenza-pmu
- apple,firestorm-pmu
- apple,icestorm-pmu
- arm,armv8-pmuv3 # Only for s/w models
- arm,arm1136-pmu
- arm,arm1176-pmu

View File

@ -56,6 +56,8 @@ properties:
- 1: virtual HV timer
- 2: physical guest timer
- 3: virtual guest timer
- 4: 'efficient' CPU PMU
- 5: 'performance' CPU PMU
The 3rd cell contains the interrupt flags. This is normally
IRQ_TYPE_LEVEL_HIGH (4).
@ -68,6 +70,35 @@ properties:
power-domains:
maxItems: 1
affinities:
type: object
additionalProperties: false
description:
FIQ affinity can be expressed as a single "affinities" node,
containing a set of sub-nodes, one per FIQ with a non-default
affinity.
patternProperties:
"^.+-affinity$":
type: object
additionalProperties: false
properties:
apple,fiq-index:
description:
The interrupt number specified as a FIQ, and for which
the affinity is not the default.
$ref: /schemas/types.yaml#/definitions/uint32
maximum: 5
cpus:
$ref: /schemas/types.yaml#/definitions/phandle-array
description:
Should be a list of phandles to CPU nodes (as described in
Documentation/devicetree/bindings/arm/cpus.yaml).
required:
- fiq-index
- cpus
required:
- compatible
- '#interrupt-cells'

View File

@ -97,6 +97,18 @@
<AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>;
};
pmu-e {
compatible = "apple,icestorm-pmu";
interrupt-parent = <&aic>;
interrupts = <AIC_FIQ AIC_CPU_PMU_E IRQ_TYPE_LEVEL_HIGH>;
};
pmu-p {
compatible = "apple,firestorm-pmu";
interrupt-parent = <&aic>;
interrupts = <AIC_FIQ AIC_CPU_PMU_P IRQ_TYPE_LEVEL_HIGH>;
};
clkref: clock-ref {
compatible = "fixed-clock";
#clock-cells = <0>;
@ -213,6 +225,18 @@
interrupt-controller;
reg = <0x2 0x3b100000 0x0 0x8000>;
power-domains = <&ps_aic>;
affinities {
e-core-pmu-affinity {
apple,fiq-index = <AIC_CPU_PMU_E>;
cpus = <&cpu0 &cpu1 &cpu2 &cpu3>;
};
p-core-pmu-affinity {
apple,fiq-index = <AIC_CPU_PMU_P>;
cpus = <&cpu4 &cpu5 &cpu6 &cpu7>;
};
};
};
pmgr: power-management@23b700000 {

View File

@ -0,0 +1,64 @@
// SPDX-License-Identifier: GPL-2.0
#ifndef __ASM_APPLE_M1_PMU_h
#define __ASM_APPLE_M1_PMU_h
#include <linux/bits.h>
#include <asm/sysreg.h>
/* Counters */
#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0)
#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0)
#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0)
#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0)
#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0)
#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0)
#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0)
#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0)
#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0)
#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0)
/* Core PMC control register */
#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0)
#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0)
#define PMCR0_IMODE GENMASK(10, 8)
#define PMCR0_IMODE_OFF 0
#define PMCR0_IMODE_PMI 1
#define PMCR0_IMODE_AIC 2
#define PMCR0_IMODE_HALT 3
#define PMCR0_IMODE_FIQ 4
#define PMCR0_IACT BIT(11)
#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12)
#define PMCR0_STOP_CNT_ON_PMI BIT(20)
#define PMCR0_CNT_GLOB_L2C_EVT BIT(21)
#define PMCR0_DEFER_PMI_TO_ERET BIT(22)
#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30)
#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32)
#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44)
#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0)
#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48)
#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0)
#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0)
#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0)
#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0)
#define PMESR0_EVT_CNT_2 GENMASK(7, 0)
#define PMESR0_EVT_CNT_3 GENMASK(15, 8)
#define PMESR0_EVT_CNT_4 GENMASK(23, 16)
#define PMESR0_EVT_CNT_5 GENMASK(31, 24)
#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0)
#define PMESR1_EVT_CNT_6 GENMASK(7, 0)
#define PMESR1_EVT_CNT_7 GENMASK(15, 8)
#define PMESR1_EVT_CNT_8 GENMASK(23, 16)
#define PMESR1_EVT_CNT_9 GENMASK(31, 24)
#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0)
#define PMSR_OVERFLOW GENMASK(9, 0)
#endif /* __ASM_APPLE_M1_PMU_h */

View File

@ -55,6 +55,7 @@
#include <linux/limits.h>
#include <linux/of_address.h>
#include <linux/slab.h>
#include <asm/apple_m1_pmu.h>
#include <asm/exception.h>
#include <asm/sysreg.h>
#include <asm/virt.h>
@ -109,16 +110,6 @@
* Note: sysreg-based IPIs are not supported yet.
*/
/* Core PMC control register */
#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0)
#define PMCR0_IMODE GENMASK(10, 8)
#define PMCR0_IMODE_OFF 0
#define PMCR0_IMODE_PMI 1
#define PMCR0_IMODE_AIC 2
#define PMCR0_IMODE_HALT 3
#define PMCR0_IMODE_FIQ 4
#define PMCR0_IACT BIT(11)
/* IPI request registers */
#define SYS_IMP_APL_IPI_RR_LOCAL_EL1 sys_reg(3, 5, 15, 0, 0)
#define SYS_IMP_APL_IPI_RR_GLOBAL_EL1 sys_reg(3, 5, 15, 0, 1)
@ -155,7 +146,7 @@
#define SYS_IMP_APL_UPMSR_EL1 sys_reg(3, 7, 15, 6, 4)
#define UPMSR_IACT BIT(0)
#define AIC_NR_FIQ 4
#define AIC_NR_FIQ 6
#define AIC_NR_SWIPI 32
/*
@ -177,6 +168,9 @@ struct aic_irq_chip {
void __iomem *base;
struct irq_domain *hw_domain;
struct irq_domain *ipi_domain;
struct {
cpumask_t aff;
} *fiq_aff[AIC_NR_FIQ];
int nr_hw;
};
@ -412,16 +406,15 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs)
aic_irqc->nr_hw + AIC_TMR_EL02_VIRT);
}
if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) ==
(FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) {
/*
* Not supported yet, let's figure out how to handle this when
* we implement these proprietary performance counters. For now,
* just mask it and move on.
*/
pr_err_ratelimited("PMC FIQ fired. Masking.\n");
sysreg_clear_set_s(SYS_IMP_APL_PMCR0_EL1, PMCR0_IMODE | PMCR0_IACT,
FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_OFF));
if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) {
int irq;
if (cpumask_test_cpu(smp_processor_id(),
&aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff))
irq = AIC_CPU_PMU_P;
else
irq = AIC_CPU_PMU_E;
generic_handle_domain_irq(aic_irqc->hw_domain,
aic_irqc->nr_hw + irq);
}
if (FIELD_GET(UPMCR0_IMODE, read_sysreg_s(SYS_IMP_APL_UPMCR0_EL1)) == UPMCR0_IMODE_FIQ &&
@ -461,7 +454,18 @@ static int aic_irq_domain_map(struct irq_domain *id, unsigned int irq,
handle_fasteoi_irq, NULL, NULL);
irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(irq)));
} else {
irq_set_percpu_devid(irq);
int fiq = hw - ic->nr_hw;
switch (fiq) {
case AIC_CPU_PMU_P:
case AIC_CPU_PMU_E:
irq_set_percpu_devid_partition(irq, &ic->fiq_aff[fiq]->aff);
break;
default:
irq_set_percpu_devid(irq);
break;
}
irq_domain_set_info(id, irq, hw, &fiq_chip, id->host_data,
handle_percpu_devid_irq, NULL, NULL);
}
@ -793,12 +797,50 @@ static struct gic_kvm_info vgic_info __initdata = {
.no_hw_deactivation = true,
};
static void build_fiq_affinity(struct aic_irq_chip *ic, struct device_node *aff)
{
int i, n;
u32 fiq;
if (of_property_read_u32(aff, "apple,fiq-index", &fiq) ||
WARN_ON(fiq >= AIC_NR_FIQ) || ic->fiq_aff[fiq])
return;
n = of_property_count_elems_of_size(aff, "cpus", sizeof(u32));
if (WARN_ON(n < 0))
return;
ic->fiq_aff[fiq] = kzalloc(sizeof(ic->fiq_aff[fiq]), GFP_KERNEL);
if (!ic->fiq_aff[fiq])
return;
for (i = 0; i < n; i++) {
struct device_node *cpu_node;
u32 cpu_phandle;
int cpu;
if (of_property_read_u32_index(aff, "cpus", i, &cpu_phandle))
continue;
cpu_node = of_find_node_by_phandle(cpu_phandle);
if (WARN_ON(!cpu_node))
continue;
cpu = of_cpu_node_to_id(cpu_node);
if (WARN_ON(cpu < 0))
continue;
cpumask_set_cpu(cpu, &ic->fiq_aff[fiq]->aff);
}
}
static int __init aic_of_ic_init(struct device_node *node, struct device_node *parent)
{
int i;
void __iomem *regs;
u32 info;
struct aic_irq_chip *irqc;
struct device_node *affs;
regs = of_iomap(node, 0);
if (WARN_ON(!regs))
@ -832,6 +874,14 @@ static int __init aic_of_ic_init(struct device_node *node, struct device_node *p
return -ENODEV;
}
affs = of_get_child_by_name(node, "affinities");
if (affs) {
struct device_node *chld;
for_each_child_of_node(affs, chld)
build_fiq_affinity(irqc, chld);
}
set_handle_irq(aic_handle_irq);
set_handle_fiq(aic_handle_fiq);

View File

@ -146,6 +146,13 @@ config MARVELL_CN10K_TAD_PMU
Provides support for Last-Level cache Tag-and-data Units (LLC-TAD)
performance monitors on CN10K family silicons.
config APPLE_M1_CPU_PMU
bool "Apple M1 CPU PMU support"
depends on ARM_PMU && ARCH_APPLE
help
Provides support for the non-architectural CPU PMUs present on
the Apple M1 SoCs and derivatives.
source "drivers/perf/hisilicon/Kconfig"
config MARVELL_CN10K_DDR_PMU

View File

@ -16,3 +16,4 @@ obj-$(CONFIG_ARM_SPE_PMU) += arm_spe_pmu.o
obj-$(CONFIG_ARM_DMC620_PMU) += arm_dmc620_pmu.o
obj-$(CONFIG_MARVELL_CN10K_TAD_PMU) += marvell_cn10k_tad_pmu.o
obj-$(CONFIG_MARVELL_CN10K_DDR_PMU) += marvell_cn10k_ddr_pmu.o
obj-$(CONFIG_APPLE_M1_CPU_PMU) += apple_m1_cpu_pmu.o

View File

@ -0,0 +1,584 @@
// SPDX-License-Identifier: GPL-2.0
/*
* CPU PMU driver for the Apple M1 and derivatives
*
* Copyright (C) 2021 Google LLC
*
* Author: Marc Zyngier <maz@kernel.org>
*
* Most of the information used in this driver was provided by the
* Asahi Linux project. The rest was experimentally discovered.
*/
#include <linux/of.h>
#include <linux/perf/arm_pmu.h>
#include <linux/platform_device.h>
#include <asm/apple_m1_pmu.h>
#include <asm/irq_regs.h>
#include <asm/perf_event.h>
#define M1_PMU_NR_COUNTERS 10
#define M1_PMU_CFG_EVENT GENMASK(7, 0)
#define ANY_BUT_0_1 GENMASK(9, 2)
#define ONLY_2_TO_7 GENMASK(7, 2)
#define ONLY_2_4_6 (BIT(2) | BIT(4) | BIT(6))
#define ONLY_5_6_7 (BIT(5) | BIT(6) | BIT(7))
/*
* Description of the events we actually know about, as well as those with
* a specific counter affinity. Yes, this is a grand total of two known
* counters, and the rest is anybody's guess.
*
* Not all counters can count all events. Counters #0 and #1 are wired to
* count cycles and instructions respectively, and some events have
* bizarre mappings (every other counter, or even *one* counter). These
* restrictions equally apply to both P and E cores.
*
* It is worth noting that the PMUs attached to P and E cores are likely
* to be different because the underlying uarches are different. At the
* moment, we don't really need to distinguish between the two because we
* know next to nothing about the events themselves, and we already have
* per cpu-type PMU abstractions.
*
* If we eventually find out that the events are different across
* implementations, we'll have to introduce per cpu-type tables.
*/
enum m1_pmu_events {
M1_PMU_PERFCTR_UNKNOWN_01 = 0x01,
M1_PMU_PERFCTR_CPU_CYCLES = 0x02,
M1_PMU_PERFCTR_INSTRUCTIONS = 0x8c,
M1_PMU_PERFCTR_UNKNOWN_8d = 0x8d,
M1_PMU_PERFCTR_UNKNOWN_8e = 0x8e,
M1_PMU_PERFCTR_UNKNOWN_8f = 0x8f,
M1_PMU_PERFCTR_UNKNOWN_90 = 0x90,
M1_PMU_PERFCTR_UNKNOWN_93 = 0x93,
M1_PMU_PERFCTR_UNKNOWN_94 = 0x94,
M1_PMU_PERFCTR_UNKNOWN_95 = 0x95,
M1_PMU_PERFCTR_UNKNOWN_96 = 0x96,
M1_PMU_PERFCTR_UNKNOWN_97 = 0x97,
M1_PMU_PERFCTR_UNKNOWN_98 = 0x98,
M1_PMU_PERFCTR_UNKNOWN_99 = 0x99,
M1_PMU_PERFCTR_UNKNOWN_9a = 0x9a,
M1_PMU_PERFCTR_UNKNOWN_9b = 0x9b,
M1_PMU_PERFCTR_UNKNOWN_9c = 0x9c,
M1_PMU_PERFCTR_UNKNOWN_9f = 0x9f,
M1_PMU_PERFCTR_UNKNOWN_bf = 0xbf,
M1_PMU_PERFCTR_UNKNOWN_c0 = 0xc0,
M1_PMU_PERFCTR_UNKNOWN_c1 = 0xc1,
M1_PMU_PERFCTR_UNKNOWN_c4 = 0xc4,
M1_PMU_PERFCTR_UNKNOWN_c5 = 0xc5,
M1_PMU_PERFCTR_UNKNOWN_c6 = 0xc6,
M1_PMU_PERFCTR_UNKNOWN_c8 = 0xc8,
M1_PMU_PERFCTR_UNKNOWN_ca = 0xca,
M1_PMU_PERFCTR_UNKNOWN_cb = 0xcb,
M1_PMU_PERFCTR_UNKNOWN_f5 = 0xf5,
M1_PMU_PERFCTR_UNKNOWN_f6 = 0xf6,
M1_PMU_PERFCTR_UNKNOWN_f7 = 0xf7,
M1_PMU_PERFCTR_UNKNOWN_f8 = 0xf8,
M1_PMU_PERFCTR_UNKNOWN_fd = 0xfd,
M1_PMU_PERFCTR_LAST = M1_PMU_CFG_EVENT,
/*
* From this point onwards, these are not actual HW events,
* but attributes that get stored in hw->config_base.
*/
M1_PMU_CFG_COUNT_USER = BIT(8),
M1_PMU_CFG_COUNT_KERNEL = BIT(9),
};
/*
* Per-event affinity table. Most events can be installed on counter
* 2-9, but there are a number of exceptions. Note that this table
* has been created experimentally, and I wouldn't be surprised if more
* counters had strange affinities.
*/
static const u16 m1_pmu_event_affinity[M1_PMU_PERFCTR_LAST + 1] = {
[0 ... M1_PMU_PERFCTR_LAST] = ANY_BUT_0_1,
[M1_PMU_PERFCTR_UNKNOWN_01] = BIT(7),
[M1_PMU_PERFCTR_CPU_CYCLES] = ANY_BUT_0_1 | BIT(0),
[M1_PMU_PERFCTR_INSTRUCTIONS] = BIT(7) | BIT(1),
[M1_PMU_PERFCTR_UNKNOWN_8d] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_8e] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_8f] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_90] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_93] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_94] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_95] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_96] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_97] = BIT(7),
[M1_PMU_PERFCTR_UNKNOWN_98] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_99] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_9a] = BIT(7),
[M1_PMU_PERFCTR_UNKNOWN_9b] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_9c] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_9f] = BIT(7),
[M1_PMU_PERFCTR_UNKNOWN_bf] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_c0] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_c1] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_c4] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_c5] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_c6] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_c8] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_ca] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_cb] = ONLY_5_6_7,
[M1_PMU_PERFCTR_UNKNOWN_f5] = ONLY_2_4_6,
[M1_PMU_PERFCTR_UNKNOWN_f6] = ONLY_2_4_6,
[M1_PMU_PERFCTR_UNKNOWN_f7] = ONLY_2_4_6,
[M1_PMU_PERFCTR_UNKNOWN_f8] = ONLY_2_TO_7,
[M1_PMU_PERFCTR_UNKNOWN_fd] = ONLY_2_4_6,
};
static const unsigned m1_pmu_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = M1_PMU_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = M1_PMU_PERFCTR_INSTRUCTIONS,
/* No idea about the rest yet */
};
/* sysfs definitions */
static ssize_t m1_pmu_events_sysfs_show(struct device *dev,
struct device_attribute *attr,
char *page)
{
struct perf_pmu_events_attr *pmu_attr;
pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
return sprintf(page, "event=0x%04llx\n", pmu_attr->id);
}
#define M1_PMU_EVENT_ATTR(name, config) \
PMU_EVENT_ATTR_ID(name, m1_pmu_events_sysfs_show, config)
static struct attribute *m1_pmu_event_attrs[] = {
M1_PMU_EVENT_ATTR(cycles, M1_PMU_PERFCTR_CPU_CYCLES),
M1_PMU_EVENT_ATTR(instructions, M1_PMU_PERFCTR_INSTRUCTIONS),
NULL,
};
static const struct attribute_group m1_pmu_events_attr_group = {
.name = "events",
.attrs = m1_pmu_event_attrs,
};
PMU_FORMAT_ATTR(event, "config:0-7");
static struct attribute *m1_pmu_format_attrs[] = {
&format_attr_event.attr,
NULL,
};
static const struct attribute_group m1_pmu_format_attr_group = {
.name = "format",
.attrs = m1_pmu_format_attrs,
};
/* Low level accessors. No synchronisation. */
#define PMU_READ_COUNTER(_idx) \
case _idx: return read_sysreg_s(SYS_IMP_APL_PMC## _idx ##_EL1)
#define PMU_WRITE_COUNTER(_val, _idx) \
case _idx: \
write_sysreg_s(_val, SYS_IMP_APL_PMC## _idx ##_EL1); \
return
static u64 m1_pmu_read_hw_counter(unsigned int index)
{
switch (index) {
PMU_READ_COUNTER(0);
PMU_READ_COUNTER(1);
PMU_READ_COUNTER(2);
PMU_READ_COUNTER(3);
PMU_READ_COUNTER(4);
PMU_READ_COUNTER(5);
PMU_READ_COUNTER(6);
PMU_READ_COUNTER(7);
PMU_READ_COUNTER(8);
PMU_READ_COUNTER(9);
}
BUG();
}
static void m1_pmu_write_hw_counter(u64 val, unsigned int index)
{
switch (index) {
PMU_WRITE_COUNTER(val, 0);
PMU_WRITE_COUNTER(val, 1);
PMU_WRITE_COUNTER(val, 2);
PMU_WRITE_COUNTER(val, 3);
PMU_WRITE_COUNTER(val, 4);
PMU_WRITE_COUNTER(val, 5);
PMU_WRITE_COUNTER(val, 6);
PMU_WRITE_COUNTER(val, 7);
PMU_WRITE_COUNTER(val, 8);
PMU_WRITE_COUNTER(val, 9);
}
BUG();
}
#define get_bit_offset(index, mask) (__ffs(mask) + (index))
static void __m1_pmu_enable_counter(unsigned int index, bool en)
{
u64 val, bit;
switch (index) {
case 0 ... 7:
bit = BIT(get_bit_offset(index, PMCR0_CNT_ENABLE_0_7));
break;
case 8 ... 9:
bit = BIT(get_bit_offset(index - 8, PMCR0_CNT_ENABLE_8_9));
break;
default:
BUG();
}
val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
if (en)
val |= bit;
else
val &= ~bit;
write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
}
static void m1_pmu_enable_counter(unsigned int index)
{
__m1_pmu_enable_counter(index, true);
}
static void m1_pmu_disable_counter(unsigned int index)
{
__m1_pmu_enable_counter(index, false);
}
static void __m1_pmu_enable_counter_interrupt(unsigned int index, bool en)
{
u64 val, bit;
switch (index) {
case 0 ... 7:
bit = BIT(get_bit_offset(index, PMCR0_PMI_ENABLE_0_7));
break;
case 8 ... 9:
bit = BIT(get_bit_offset(index - 8, PMCR0_PMI_ENABLE_8_9));
break;
default:
BUG();
}
val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
if (en)
val |= bit;
else
val &= ~bit;
write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
}
static void m1_pmu_enable_counter_interrupt(unsigned int index)
{
__m1_pmu_enable_counter_interrupt(index, true);
}
static void m1_pmu_disable_counter_interrupt(unsigned int index)
{
__m1_pmu_enable_counter_interrupt(index, false);
}
static void m1_pmu_configure_counter(unsigned int index, u8 event,
bool user, bool kernel)
{
u64 val, user_bit, kernel_bit;
int shift;
switch (index) {
case 0 ... 7:
user_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL0_0_7));
kernel_bit = BIT(get_bit_offset(index, PMCR1_COUNT_A64_EL1_0_7));
break;
case 8 ... 9:
user_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL0_8_9));
kernel_bit = BIT(get_bit_offset(index - 8, PMCR1_COUNT_A64_EL1_8_9));
break;
default:
BUG();
}
val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
if (user)
val |= user_bit;
else
val &= ~user_bit;
if (kernel)
val |= kernel_bit;
else
val &= ~kernel_bit;
write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
/*
* Counters 0 and 1 have fixed events. For anything else,
* place the event at the expected location in the relevant
* register (PMESR0 holds the event configuration for counters
* 2-5, resp. PMESR1 for counters 6-9).
*/
switch (index) {
case 0 ... 1:
break;
case 2 ... 5:
shift = (index - 2) * 8;
val = read_sysreg_s(SYS_IMP_APL_PMESR0_EL1);
val &= ~((u64)0xff << shift);
val |= (u64)event << shift;
write_sysreg_s(val, SYS_IMP_APL_PMESR0_EL1);
break;
case 6 ... 9:
shift = (index - 6) * 8;
val = read_sysreg_s(SYS_IMP_APL_PMESR1_EL1);
val &= ~((u64)0xff << shift);
val |= (u64)event << shift;
write_sysreg_s(val, SYS_IMP_APL_PMESR1_EL1);
break;
}
}
/* arm_pmu backend */
static void m1_pmu_enable_event(struct perf_event *event)
{
bool user, kernel;
u8 evt;
evt = event->hw.config_base & M1_PMU_CFG_EVENT;
user = event->hw.config_base & M1_PMU_CFG_COUNT_USER;
kernel = event->hw.config_base & M1_PMU_CFG_COUNT_KERNEL;
m1_pmu_disable_counter_interrupt(event->hw.idx);
m1_pmu_disable_counter(event->hw.idx);
isb();
m1_pmu_configure_counter(event->hw.idx, evt, user, kernel);
m1_pmu_enable_counter(event->hw.idx);
m1_pmu_enable_counter_interrupt(event->hw.idx);
isb();
}
static void m1_pmu_disable_event(struct perf_event *event)
{
m1_pmu_disable_counter_interrupt(event->hw.idx);
m1_pmu_disable_counter(event->hw.idx);
isb();
}
static irqreturn_t m1_pmu_handle_irq(struct arm_pmu *cpu_pmu)
{
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
struct pt_regs *regs;
u64 overflow, state;
int idx;
overflow = read_sysreg_s(SYS_IMP_APL_PMSR_EL1);
if (!overflow) {
/* Spurious interrupt? */
state = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
state &= ~PMCR0_IACT;
write_sysreg_s(state, SYS_IMP_APL_PMCR0_EL1);
isb();
return IRQ_NONE;
}
cpu_pmu->stop(cpu_pmu);
regs = get_irq_regs();
for (idx = 0; idx < cpu_pmu->num_events; idx++) {
struct perf_event *event = cpuc->events[idx];
struct perf_sample_data data;
if (!event)
continue;
armpmu_event_update(event);
perf_sample_data_init(&data, 0, event->hw.last_period);
if (!armpmu_event_set_period(event))
continue;
if (perf_event_overflow(event, &data, regs))
m1_pmu_disable_event(event);
}
cpu_pmu->start(cpu_pmu);
return IRQ_HANDLED;
}
static u64 m1_pmu_read_counter(struct perf_event *event)
{
return m1_pmu_read_hw_counter(event->hw.idx);
}
static void m1_pmu_write_counter(struct perf_event *event, u64 value)
{
m1_pmu_write_hw_counter(value, event->hw.idx);
isb();
}
static int m1_pmu_get_event_idx(struct pmu_hw_events *cpuc,
struct perf_event *event)
{
unsigned long evtype = event->hw.config_base & M1_PMU_CFG_EVENT;
unsigned long affinity = m1_pmu_event_affinity[evtype];
int idx;
/*
* Place the event on the first free counter that can count
* this event.
*
* We could do a better job if we had a view of all the events
* counting on the PMU at any given time, and by placing the
* most constraining events first.
*/
for_each_set_bit(idx, &affinity, M1_PMU_NR_COUNTERS) {
if (!test_and_set_bit(idx, cpuc->used_mask))
return idx;
}
return -EAGAIN;
}
static void m1_pmu_clear_event_idx(struct pmu_hw_events *cpuc,
struct perf_event *event)
{
clear_bit(event->hw.idx, cpuc->used_mask);
}
static void __m1_pmu_set_mode(u8 mode)
{
u64 val;
val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
val &= ~(PMCR0_IMODE | PMCR0_IACT);
val |= FIELD_PREP(PMCR0_IMODE, mode);
write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
isb();
}
static void m1_pmu_start(struct arm_pmu *cpu_pmu)
{
__m1_pmu_set_mode(PMCR0_IMODE_FIQ);
}
static void m1_pmu_stop(struct arm_pmu *cpu_pmu)
{
__m1_pmu_set_mode(PMCR0_IMODE_OFF);
}
static int m1_pmu_map_event(struct perf_event *event)
{
/*
* Although the counters are 48bit wide, bit 47 is what
* triggers the overflow interrupt. Advertise the counters
* being 47bit wide to mimick the behaviour of the ARM PMU.
*/
event->hw.flags |= ARMPMU_EVT_47BIT;
return armpmu_map_event(event, &m1_pmu_perf_map, NULL, M1_PMU_CFG_EVENT);
}
static void m1_pmu_reset(void *info)
{
int i;
__m1_pmu_set_mode(PMCR0_IMODE_OFF);
for (i = 0; i < M1_PMU_NR_COUNTERS; i++) {
m1_pmu_disable_counter(i);
m1_pmu_disable_counter_interrupt(i);
m1_pmu_write_hw_counter(0, i);
}
isb();
}
static int m1_pmu_set_event_filter(struct hw_perf_event *event,
struct perf_event_attr *attr)
{
unsigned long config_base = 0;
if (!attr->exclude_guest)
return -EINVAL;
if (!attr->exclude_kernel)
config_base |= M1_PMU_CFG_COUNT_KERNEL;
if (!attr->exclude_user)
config_base |= M1_PMU_CFG_COUNT_USER;
event->config_base = config_base;
return 0;
}
static int m1_pmu_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->handle_irq = m1_pmu_handle_irq;
cpu_pmu->enable = m1_pmu_enable_event;
cpu_pmu->disable = m1_pmu_disable_event;
cpu_pmu->read_counter = m1_pmu_read_counter;
cpu_pmu->write_counter = m1_pmu_write_counter;
cpu_pmu->get_event_idx = m1_pmu_get_event_idx;
cpu_pmu->clear_event_idx = m1_pmu_clear_event_idx;
cpu_pmu->start = m1_pmu_start;
cpu_pmu->stop = m1_pmu_stop;
cpu_pmu->map_event = m1_pmu_map_event;
cpu_pmu->reset = m1_pmu_reset;
cpu_pmu->set_event_filter = m1_pmu_set_event_filter;
cpu_pmu->num_events = M1_PMU_NR_COUNTERS;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_EVENTS] = &m1_pmu_events_attr_group;
cpu_pmu->attr_groups[ARMPMU_ATTR_GROUP_FORMATS] = &m1_pmu_format_attr_group;
return 0;
}
/* Device driver gunk */
static int m1_pmu_ice_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "apple_icestorm_pmu";
return m1_pmu_init(cpu_pmu);
}
static int m1_pmu_fire_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "apple_firestorm_pmu";
return m1_pmu_init(cpu_pmu);
}
static const struct of_device_id m1_pmu_of_device_ids[] = {
{ .compatible = "apple,icestorm-pmu", .data = m1_pmu_ice_init, },
{ .compatible = "apple,firestorm-pmu", .data = m1_pmu_fire_init, },
{ },
};
MODULE_DEVICE_TABLE(of, m1_pmu_of_device_ids);
static int m1_pmu_device_probe(struct platform_device *pdev)
{
return arm_pmu_device_probe(pdev, m1_pmu_of_device_ids, NULL);
}
static struct platform_driver m1_pmu_driver = {
.driver = {
.name = "apple-m1-cpu-pmu",
.of_match_table = m1_pmu_of_device_ids,
.suppress_bind_attrs = true,
},
.probe = m1_pmu_device_probe,
};
module_platform_driver(m1_pmu_driver);
MODULE_LICENSE("GPL v2");

View File

@ -109,6 +109,8 @@ static inline u64 arm_pmu_event_max_period(struct perf_event *event)
{
if (event->hw.flags & ARMPMU_EVT_64BIT)
return GENMASK_ULL(63, 0);
else if (event->hw.flags & ARMPMU_EVT_47BIT)
return GENMASK_ULL(46, 0);
else
return GENMASK_ULL(31, 0);
}

View File

@ -11,5 +11,7 @@
#define AIC_TMR_HV_VIRT 1
#define AIC_TMR_GUEST_PHYS 2
#define AIC_TMR_GUEST_VIRT 3
#define AIC_CPU_PMU_E 4
#define AIC_CPU_PMU_P 5
#endif

View File

@ -26,6 +26,8 @@
*/
/* Event uses a 64bit counter */
#define ARMPMU_EVT_64BIT 1
/* Event uses a 47bit counter */
#define ARMPMU_EVT_47BIT 2
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x