forked from Minki/linux
This patch series takes us slightly further on the road to big.LITTLE
support in perf. The main change enabling this is moving the CCI PMU driver away from the arm-pmu abstraction, allowing the arch code to focus specifically on support for CPU PMUs. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1 iQEcBAABCgAGBQJUZeYuAAoJELescNyEwWM0u7QH/1AOiUjffkY3kKFyI4STU2Dw EO/ReouiSfmGpB5VHFwndGweQJPrw1ihViropEZyRo7tlHFJVFxTS8a2HxF+Pv6y JcvGVMrCKvsk1slsuq0hHpOs3SFj6b0EX+fg4iggUo5oNrgSo/qgNeQtNOnUpR7v ZAcMm/N6dKS6ZVwpzmayT0MCwg1HLr8iFuHXWOnwZIQ8WhXSlF6CUOIoRDO/63Ta Dsz/NX6vAGnY1tInKRV/gTcuTihku0qbkLTCMKnd9+jZhZrOIHOtV5XqGvRrlq+t 66oJgsUFtG2FbkmAwnm+sep8Frztj2NLqKIxnKI+72AthrSvmGiPXh8kyUrQRGQ= =gOqc -----END PGP SIGNATURE----- Merge tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into next/drivers Pull "ARM: perf: updates for 3.19" from Will Deacon: This patch series takes us slightly further on the road to big.LITTLE support in perf. The main change enabling this is moving the CCI PMU driver away from the arm-pmu abstraction, allowing the arch code to focus specifically on support for CPU PMUs. * tag 'arm-perf-3.19' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux: arm: perf: fold hotplug notifier into arm_pmu arm: perf: dynamically allocate cpu hardware data arm: perf: fold percpu_pmu into pmu_hw_events arm: perf: kill get_hw_events() arm: perf: limit size of accounting data arm: perf: use IDR types for CPU PMUs arm: perf: make PMU probing data-driven arm: perf: add missing pr_info newlines arm: perf: factor out callchain code ARM: perf: use pr_* instead of printk ARM: perf: remove useless return and check of idx in counter handling bus: cci: move away from arm_pmu framework Signed-off-by: Arnd Bergmann <arnd@arndb.de>
This commit is contained in:
commit
b9e0e5a9e0
@ -12,7 +12,7 @@
|
||||
#ifndef __ARM_PERF_EVENT_H__
|
||||
#define __ARM_PERF_EVENT_H__
|
||||
|
||||
#ifdef CONFIG_HW_PERF_EVENTS
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
extern unsigned long perf_misc_flags(struct pt_regs *regs);
|
||||
|
@ -15,6 +15,8 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <asm/cputype.h>
|
||||
|
||||
/*
|
||||
* struct arm_pmu_platdata - ARM PMU platform data
|
||||
*
|
||||
@ -66,19 +68,25 @@ struct pmu_hw_events {
|
||||
/*
|
||||
* The events that are active on the PMU for the given index.
|
||||
*/
|
||||
struct perf_event **events;
|
||||
struct perf_event *events[ARMPMU_MAX_HWEVENTS];
|
||||
|
||||
/*
|
||||
* A 1 bit for an index indicates that the counter is being used for
|
||||
* an event. A 0 means that the counter can be used.
|
||||
*/
|
||||
unsigned long *used_mask;
|
||||
DECLARE_BITMAP(used_mask, ARMPMU_MAX_HWEVENTS);
|
||||
|
||||
/*
|
||||
* Hardware lock to serialize accesses to PMU registers. Needed for the
|
||||
* read/modify/write sequences.
|
||||
*/
|
||||
raw_spinlock_t pmu_lock;
|
||||
|
||||
/*
|
||||
* When using percpu IRQs, we need a percpu dev_id. Place it here as we
|
||||
* already have to allocate this struct per cpu.
|
||||
*/
|
||||
struct arm_pmu *percpu_pmu;
|
||||
};
|
||||
|
||||
struct arm_pmu {
|
||||
@ -107,7 +115,8 @@ struct arm_pmu {
|
||||
struct mutex reserve_mutex;
|
||||
u64 max_period;
|
||||
struct platform_device *plat_device;
|
||||
struct pmu_hw_events *(*get_hw_events)(void);
|
||||
struct pmu_hw_events __percpu *hw_events;
|
||||
struct notifier_block hotplug_nb;
|
||||
};
|
||||
|
||||
#define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu))
|
||||
@ -127,6 +136,27 @@ int armpmu_map_event(struct perf_event *event,
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX],
|
||||
u32 raw_event_mask);
|
||||
|
||||
struct pmu_probe_info {
|
||||
unsigned int cpuid;
|
||||
unsigned int mask;
|
||||
int (*init)(struct arm_pmu *);
|
||||
};
|
||||
|
||||
#define PMU_PROBE(_cpuid, _mask, _fn) \
|
||||
{ \
|
||||
.cpuid = (_cpuid), \
|
||||
.mask = (_mask), \
|
||||
.init = (_fn), \
|
||||
}
|
||||
|
||||
#define ARM_PMU_PROBE(_cpuid, _fn) \
|
||||
PMU_PROBE(_cpuid, ARM_CPU_PART_MASK, _fn)
|
||||
|
||||
#define ARM_PMU_XSCALE_MASK ((0xff << 24) | ARM_CPU_XSCALE_ARCH_MASK)
|
||||
|
||||
#define XSCALE_PMU_PROBE(_version, _fn) \
|
||||
PMU_PROBE(ARM_CPU_IMP_INTEL << 24 | _version, ARM_PMU_XSCALE_MASK, _fn)
|
||||
|
||||
#endif /* CONFIG_HW_PERF_EVENTS */
|
||||
|
||||
#endif /* __ARM_PMU_H__ */
|
||||
|
@ -82,7 +82,7 @@ obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o
|
||||
obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o
|
||||
obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o
|
||||
obj-$(CONFIG_IWMMXT) += iwmmxt.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o
|
||||
obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o
|
||||
AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt
|
||||
obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o
|
||||
|
136
arch/arm/kernel/perf_callchain.c
Normal file
136
arch/arm/kernel/perf_callchain.c
Normal file
@ -0,0 +1,136 @@
|
||||
/*
|
||||
* ARM callchain support
|
||||
*
|
||||
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
|
||||
* Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
|
||||
*
|
||||
* This code is based on the ARM OProfile backtrace code.
|
||||
*/
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
/*
|
||||
* The registers we're interested in are at the end of the variable
|
||||
* length saved register structure. The fp points at the end of this
|
||||
* structure so the address of this struct is:
|
||||
* (struct frame_tail *)(xxx->fp)-1
|
||||
*
|
||||
* This code has been adapted from the ARM OProfile support.
|
||||
*/
|
||||
struct frame_tail {
|
||||
struct frame_tail __user *fp;
|
||||
unsigned long sp;
|
||||
unsigned long lr;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* Get the return address for a single stackframe and return a pointer to the
|
||||
* next frame tail.
|
||||
*/
|
||||
static struct frame_tail __user *
|
||||
user_backtrace(struct frame_tail __user *tail,
|
||||
struct perf_callchain_entry *entry)
|
||||
{
|
||||
struct frame_tail buftail;
|
||||
unsigned long err;
|
||||
|
||||
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
|
||||
return NULL;
|
||||
|
||||
pagefault_disable();
|
||||
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
|
||||
pagefault_enable();
|
||||
|
||||
if (err)
|
||||
return NULL;
|
||||
|
||||
perf_callchain_store(entry, buftail.lr);
|
||||
|
||||
/*
|
||||
* Frame pointers should strictly progress back up the stack
|
||||
* (towards higher addresses).
|
||||
*/
|
||||
if (tail + 1 >= buftail.fp)
|
||||
return NULL;
|
||||
|
||||
return buftail.fp - 1;
|
||||
}
|
||||
|
||||
void
|
||||
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
{
|
||||
struct frame_tail __user *tail;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
||||
/* We don't support guest os callchain now */
|
||||
return;
|
||||
}
|
||||
|
||||
perf_callchain_store(entry, regs->ARM_pc);
|
||||
|
||||
if (!current->mm)
|
||||
return;
|
||||
|
||||
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
|
||||
|
||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
||||
tail && !((unsigned long)tail & 0x3))
|
||||
tail = user_backtrace(tail, entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* Gets called by walk_stackframe() for every stackframe. This will be called
|
||||
* whist unwinding the stackframe and is like a subroutine return so we use
|
||||
* the PC.
|
||||
*/
|
||||
static int
|
||||
callchain_trace(struct stackframe *fr,
|
||||
void *data)
|
||||
{
|
||||
struct perf_callchain_entry *entry = data;
|
||||
perf_callchain_store(entry, fr->pc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
{
|
||||
struct stackframe fr;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
||||
/* We don't support guest os callchain now */
|
||||
return;
|
||||
}
|
||||
|
||||
arm_get_current_stackframe(regs, &fr);
|
||||
walk_stackframe(&fr, callchain_trace, entry);
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
|
||||
return perf_guest_cbs->get_guest_ip();
|
||||
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
int misc = 0;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
||||
if (perf_guest_cbs->is_user_mode())
|
||||
misc |= PERF_RECORD_MISC_GUEST_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
} else {
|
||||
if (user_mode(regs))
|
||||
misc |= PERF_RECORD_MISC_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_KERNEL;
|
||||
}
|
||||
|
||||
return misc;
|
||||
}
|
@ -7,21 +7,18 @@
|
||||
* Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
|
||||
*
|
||||
* This code is based on the sparc64 perf event code, which is in turn based
|
||||
* on the x86 code. Callchain code is based on the ARM OProfile backtrace
|
||||
* code.
|
||||
* on the x86 code.
|
||||
*/
|
||||
#define pr_fmt(fmt) "hw perfevents: " fmt
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/pm_runtime.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/irqdesc.h>
|
||||
|
||||
#include <asm/irq_regs.h>
|
||||
#include <asm/pmu.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
static int
|
||||
armpmu_map_cache_event(const unsigned (*cache_map)
|
||||
@ -80,8 +77,12 @@ armpmu_map_event(struct perf_event *event,
|
||||
u32 raw_event_mask)
|
||||
{
|
||||
u64 config = event->attr.config;
|
||||
int type = event->attr.type;
|
||||
|
||||
switch (event->attr.type) {
|
||||
if (type == event->pmu->type)
|
||||
return armpmu_map_raw_event(raw_event_mask, config);
|
||||
|
||||
switch (type) {
|
||||
case PERF_TYPE_HARDWARE:
|
||||
return armpmu_map_hw_event(event_map, config);
|
||||
case PERF_TYPE_HW_CACHE:
|
||||
@ -200,7 +201,7 @@ static void
|
||||
armpmu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *hw_events = armpmu->get_hw_events();
|
||||
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
@ -217,7 +218,7 @@ static int
|
||||
armpmu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
struct arm_pmu *armpmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *hw_events = armpmu->get_hw_events();
|
||||
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx;
|
||||
int err = 0;
|
||||
@ -274,14 +275,12 @@ validate_group(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *sibling, *leader = event->group_leader;
|
||||
struct pmu_hw_events fake_pmu;
|
||||
DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS);
|
||||
|
||||
/*
|
||||
* Initialise the fake PMU. We only need to populate the
|
||||
* used_mask for the purposes of validation.
|
||||
*/
|
||||
memset(fake_used_mask, 0, sizeof(fake_used_mask));
|
||||
fake_pmu.used_mask = fake_used_mask;
|
||||
memset(&fake_pmu.used_mask, 0, sizeof(fake_pmu.used_mask));
|
||||
|
||||
if (!validate_event(&fake_pmu, leader))
|
||||
return -EINVAL;
|
||||
@ -305,17 +304,21 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev)
|
||||
int ret;
|
||||
u64 start_clock, finish_clock;
|
||||
|
||||
if (irq_is_percpu(irq))
|
||||
dev = *(void **)dev;
|
||||
armpmu = dev;
|
||||
/*
|
||||
* we request the IRQ with a (possibly percpu) struct arm_pmu**, but
|
||||
* the handlers expect a struct arm_pmu*. The percpu_irq framework will
|
||||
* do any necessary shifting, we just need to perform the first
|
||||
* dereference.
|
||||
*/
|
||||
armpmu = *(void **)dev;
|
||||
plat_device = armpmu->plat_device;
|
||||
plat = dev_get_platdata(&plat_device->dev);
|
||||
|
||||
start_clock = sched_clock();
|
||||
if (plat && plat->handle_irq)
|
||||
ret = plat->handle_irq(irq, dev, armpmu->handle_irq);
|
||||
ret = plat->handle_irq(irq, armpmu, armpmu->handle_irq);
|
||||
else
|
||||
ret = armpmu->handle_irq(irq, dev);
|
||||
ret = armpmu->handle_irq(irq, armpmu);
|
||||
finish_clock = sched_clock();
|
||||
|
||||
perf_sample_event_took(finish_clock - start_clock);
|
||||
@ -468,7 +471,7 @@ static int armpmu_event_init(struct perf_event *event)
|
||||
static void armpmu_enable(struct pmu *pmu)
|
||||
{
|
||||
struct arm_pmu *armpmu = to_arm_pmu(pmu);
|
||||
struct pmu_hw_events *hw_events = armpmu->get_hw_events();
|
||||
struct pmu_hw_events *hw_events = this_cpu_ptr(armpmu->hw_events);
|
||||
int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events);
|
||||
|
||||
if (enabled)
|
||||
@ -533,130 +536,3 @@ int armpmu_register(struct arm_pmu *armpmu, int type)
|
||||
return perf_pmu_register(&armpmu->pmu, armpmu->name, type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Callchain handling code.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The registers we're interested in are at the end of the variable
|
||||
* length saved register structure. The fp points at the end of this
|
||||
* structure so the address of this struct is:
|
||||
* (struct frame_tail *)(xxx->fp)-1
|
||||
*
|
||||
* This code has been adapted from the ARM OProfile support.
|
||||
*/
|
||||
struct frame_tail {
|
||||
struct frame_tail __user *fp;
|
||||
unsigned long sp;
|
||||
unsigned long lr;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* Get the return address for a single stackframe and return a pointer to the
|
||||
* next frame tail.
|
||||
*/
|
||||
static struct frame_tail __user *
|
||||
user_backtrace(struct frame_tail __user *tail,
|
||||
struct perf_callchain_entry *entry)
|
||||
{
|
||||
struct frame_tail buftail;
|
||||
unsigned long err;
|
||||
|
||||
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
|
||||
return NULL;
|
||||
|
||||
pagefault_disable();
|
||||
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
|
||||
pagefault_enable();
|
||||
|
||||
if (err)
|
||||
return NULL;
|
||||
|
||||
perf_callchain_store(entry, buftail.lr);
|
||||
|
||||
/*
|
||||
* Frame pointers should strictly progress back up the stack
|
||||
* (towards higher addresses).
|
||||
*/
|
||||
if (tail + 1 >= buftail.fp)
|
||||
return NULL;
|
||||
|
||||
return buftail.fp - 1;
|
||||
}
|
||||
|
||||
void
|
||||
perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
{
|
||||
struct frame_tail __user *tail;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
||||
/* We don't support guest os callchain now */
|
||||
return;
|
||||
}
|
||||
|
||||
perf_callchain_store(entry, regs->ARM_pc);
|
||||
|
||||
if (!current->mm)
|
||||
return;
|
||||
|
||||
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
|
||||
|
||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
||||
tail && !((unsigned long)tail & 0x3))
|
||||
tail = user_backtrace(tail, entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* Gets called by walk_stackframe() for every stackframe. This will be called
|
||||
* whist unwinding the stackframe and is like a subroutine return so we use
|
||||
* the PC.
|
||||
*/
|
||||
static int
|
||||
callchain_trace(struct stackframe *fr,
|
||||
void *data)
|
||||
{
|
||||
struct perf_callchain_entry *entry = data;
|
||||
perf_callchain_store(entry, fr->pc);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
{
|
||||
struct stackframe fr;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
||||
/* We don't support guest os callchain now */
|
||||
return;
|
||||
}
|
||||
|
||||
arm_get_current_stackframe(regs, &fr);
|
||||
walk_stackframe(&fr, callchain_trace, entry);
|
||||
}
|
||||
|
||||
unsigned long perf_instruction_pointer(struct pt_regs *regs)
|
||||
{
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
|
||||
return perf_guest_cbs->get_guest_ip();
|
||||
|
||||
return instruction_pointer(regs);
|
||||
}
|
||||
|
||||
unsigned long perf_misc_flags(struct pt_regs *regs)
|
||||
{
|
||||
int misc = 0;
|
||||
|
||||
if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
|
||||
if (perf_guest_cbs->is_user_mode())
|
||||
misc |= PERF_RECORD_MISC_GUEST_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
|
||||
} else {
|
||||
if (user_mode(regs))
|
||||
misc |= PERF_RECORD_MISC_USER;
|
||||
else
|
||||
misc |= PERF_RECORD_MISC_KERNEL;
|
||||
}
|
||||
|
||||
return misc;
|
||||
}
|
||||
|
@ -35,11 +35,6 @@
|
||||
/* Set at runtime when we know what CPU type we are. */
|
||||
static struct arm_pmu *cpu_pmu;
|
||||
|
||||
static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu);
|
||||
static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events);
|
||||
static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask);
|
||||
static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events);
|
||||
|
||||
/*
|
||||
* Despite the names, these two functions are CPU-specific and are used
|
||||
* by the OProfile/perf code.
|
||||
@ -69,11 +64,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
|
||||
#include "perf_event_v6.c"
|
||||
#include "perf_event_v7.c"
|
||||
|
||||
static struct pmu_hw_events *cpu_pmu_get_cpu_events(void)
|
||||
{
|
||||
return this_cpu_ptr(&cpu_hw_events);
|
||||
}
|
||||
|
||||
static void cpu_pmu_enable_percpu_irq(void *data)
|
||||
{
|
||||
int irq = *(int *)data;
|
||||
@ -92,20 +82,21 @@ static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
int i, irq, irqs;
|
||||
struct platform_device *pmu_device = cpu_pmu->plat_device;
|
||||
struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
|
||||
|
||||
irqs = min(pmu_device->num_resources, num_possible_cpus());
|
||||
|
||||
irq = platform_get_irq(pmu_device, 0);
|
||||
if (irq >= 0 && irq_is_percpu(irq)) {
|
||||
on_each_cpu(cpu_pmu_disable_percpu_irq, &irq, 1);
|
||||
free_percpu_irq(irq, &percpu_pmu);
|
||||
free_percpu_irq(irq, &hw_events->percpu_pmu);
|
||||
} else {
|
||||
for (i = 0; i < irqs; ++i) {
|
||||
if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs))
|
||||
continue;
|
||||
irq = platform_get_irq(pmu_device, i);
|
||||
if (irq >= 0)
|
||||
free_irq(irq, cpu_pmu);
|
||||
free_irq(irq, per_cpu_ptr(&hw_events->percpu_pmu, i));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -114,19 +105,21 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
|
||||
{
|
||||
int i, err, irq, irqs;
|
||||
struct platform_device *pmu_device = cpu_pmu->plat_device;
|
||||
struct pmu_hw_events __percpu *hw_events = cpu_pmu->hw_events;
|
||||
|
||||
if (!pmu_device)
|
||||
return -ENODEV;
|
||||
|
||||
irqs = min(pmu_device->num_resources, num_possible_cpus());
|
||||
if (irqs < 1) {
|
||||
printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
|
||||
pr_warn_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
irq = platform_get_irq(pmu_device, 0);
|
||||
if (irq >= 0 && irq_is_percpu(irq)) {
|
||||
err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu);
|
||||
err = request_percpu_irq(irq, handler, "arm-pmu",
|
||||
&hw_events->percpu_pmu);
|
||||
if (err) {
|
||||
pr_err("unable to request IRQ%d for ARM PMU counters\n",
|
||||
irq);
|
||||
@ -153,7 +146,7 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
|
||||
|
||||
err = request_irq(irq, handler,
|
||||
IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu",
|
||||
cpu_pmu);
|
||||
per_cpu_ptr(&hw_events->percpu_pmu, i));
|
||||
if (err) {
|
||||
pr_err("unable to request IRQ%d for ARM PMU counters\n",
|
||||
irq);
|
||||
@ -167,18 +160,50 @@ static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
/*
|
||||
* PMU hardware loses all context when a CPU goes offline.
|
||||
* When a CPU is hotplugged back in, since some hardware registers are
|
||||
* UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
|
||||
* junk values out of them.
|
||||
*/
|
||||
static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
|
||||
void *hcpu)
|
||||
{
|
||||
struct arm_pmu *pmu = container_of(b, struct arm_pmu, hotplug_nb);
|
||||
|
||||
if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (pmu->reset)
|
||||
pmu->reset(pmu);
|
||||
else
|
||||
return NOTIFY_DONE;
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static int cpu_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
int err;
|
||||
int cpu;
|
||||
struct pmu_hw_events __percpu *cpu_hw_events;
|
||||
|
||||
cpu_hw_events = alloc_percpu(struct pmu_hw_events);
|
||||
if (!cpu_hw_events)
|
||||
return -ENOMEM;
|
||||
|
||||
cpu_pmu->hotplug_nb.notifier_call = cpu_pmu_notify;
|
||||
err = register_cpu_notifier(&cpu_pmu->hotplug_nb);
|
||||
if (err)
|
||||
goto out_hw_events;
|
||||
|
||||
for_each_possible_cpu(cpu) {
|
||||
struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu);
|
||||
events->events = per_cpu(hw_events, cpu);
|
||||
events->used_mask = per_cpu(used_mask, cpu);
|
||||
struct pmu_hw_events *events = per_cpu_ptr(cpu_hw_events, cpu);
|
||||
raw_spin_lock_init(&events->pmu_lock);
|
||||
per_cpu(percpu_pmu, cpu) = cpu_pmu;
|
||||
events->percpu_pmu = cpu_pmu;
|
||||
}
|
||||
|
||||
cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events;
|
||||
cpu_pmu->hw_events = cpu_hw_events;
|
||||
cpu_pmu->request_irq = cpu_pmu_request_irq;
|
||||
cpu_pmu->free_irq = cpu_pmu_free_irq;
|
||||
|
||||
@ -189,32 +214,20 @@ static void cpu_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
/* If no interrupts available, set the corresponding capability flag */
|
||||
if (!platform_get_irq(cpu_pmu->plat_device, 0))
|
||||
cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
|
||||
|
||||
return 0;
|
||||
|
||||
out_hw_events:
|
||||
free_percpu(cpu_hw_events);
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* PMU hardware loses all context when a CPU goes offline.
|
||||
* When a CPU is hotplugged back in, since some hardware registers are
|
||||
* UNKNOWN at reset, the PMU must be explicitly reset to avoid reading
|
||||
* junk values out of them.
|
||||
*/
|
||||
static int cpu_pmu_notify(struct notifier_block *b, unsigned long action,
|
||||
void *hcpu)
|
||||
static void cpu_pmu_destroy(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
if (cpu_pmu && cpu_pmu->reset)
|
||||
cpu_pmu->reset(cpu_pmu);
|
||||
else
|
||||
return NOTIFY_DONE;
|
||||
|
||||
return NOTIFY_OK;
|
||||
unregister_cpu_notifier(&cpu_pmu->hotplug_nb);
|
||||
free_percpu(cpu_pmu->hw_events);
|
||||
}
|
||||
|
||||
static struct notifier_block cpu_pmu_hotplug_notifier = {
|
||||
.notifier_call = cpu_pmu_notify,
|
||||
};
|
||||
|
||||
/*
|
||||
* PMU platform driver and devicetree bindings.
|
||||
*/
|
||||
@ -241,48 +254,34 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
|
||||
{},
|
||||
};
|
||||
|
||||
static const struct pmu_probe_info pmu_probe_table[] = {
|
||||
ARM_PMU_PROBE(ARM_CPU_PART_ARM1136, armv6_1136_pmu_init),
|
||||
ARM_PMU_PROBE(ARM_CPU_PART_ARM1156, armv6_1156_pmu_init),
|
||||
ARM_PMU_PROBE(ARM_CPU_PART_ARM1176, armv6_1176_pmu_init),
|
||||
ARM_PMU_PROBE(ARM_CPU_PART_ARM11MPCORE, armv6mpcore_pmu_init),
|
||||
ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A8, armv7_a8_pmu_init),
|
||||
ARM_PMU_PROBE(ARM_CPU_PART_CORTEX_A9, armv7_a9_pmu_init),
|
||||
XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V1, xscale1pmu_init),
|
||||
XSCALE_PMU_PROBE(ARM_CPU_XSCALE_ARCH_V2, xscale2pmu_init),
|
||||
{ /* sentinel value */ }
|
||||
};
|
||||
|
||||
/*
|
||||
* CPU PMU identification and probing.
|
||||
*/
|
||||
static int probe_current_pmu(struct arm_pmu *pmu)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
unsigned int cpuid = read_cpuid_id();
|
||||
int ret = -ENODEV;
|
||||
const struct pmu_probe_info *info;
|
||||
|
||||
pr_info("probing PMU on CPU %d\n", cpu);
|
||||
|
||||
switch (read_cpuid_part()) {
|
||||
/* ARM Ltd CPUs. */
|
||||
case ARM_CPU_PART_ARM1136:
|
||||
ret = armv6_1136_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM1156:
|
||||
ret = armv6_1156_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM1176:
|
||||
ret = armv6_1176_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM11MPCORE:
|
||||
ret = armv6mpcore_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_CORTEX_A8:
|
||||
ret = armv7_a8_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_CORTEX_A9:
|
||||
ret = armv7_a9_pmu_init(pmu);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
|
||||
switch (xscale_cpu_arch_version()) {
|
||||
case ARM_CPU_XSCALE_ARCH_V1:
|
||||
ret = xscale1pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_XSCALE_ARCH_V2:
|
||||
ret = xscale2pmu_init(pmu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (info = pmu_probe_table; info->init != NULL; info++) {
|
||||
if ((cpuid & info->mask) != info->cpuid)
|
||||
continue;
|
||||
ret = info->init(pmu);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -299,13 +298,13 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
|
||||
int ret = -ENODEV;
|
||||
|
||||
if (cpu_pmu) {
|
||||
pr_info("attempt to register multiple PMU devices!");
|
||||
pr_info("attempt to register multiple PMU devices!\n");
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL);
|
||||
if (!pmu) {
|
||||
pr_info("failed to allocate PMU device!");
|
||||
pr_info("failed to allocate PMU device!\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
@ -320,18 +319,24 @@ static int cpu_pmu_device_probe(struct platform_device *pdev)
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
pr_info("failed to probe PMU!");
|
||||
pr_info("failed to probe PMU!\n");
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
cpu_pmu_init(cpu_pmu);
|
||||
ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW);
|
||||
ret = cpu_pmu_init(cpu_pmu);
|
||||
if (ret)
|
||||
goto out_free;
|
||||
|
||||
if (!ret)
|
||||
return 0;
|
||||
ret = armpmu_register(cpu_pmu, -1);
|
||||
if (ret)
|
||||
goto out_destroy;
|
||||
|
||||
return 0;
|
||||
|
||||
out_destroy:
|
||||
cpu_pmu_destroy(cpu_pmu);
|
||||
out_free:
|
||||
pr_info("failed to register PMU devices!");
|
||||
pr_info("failed to register PMU devices!\n");
|
||||
kfree(pmu);
|
||||
return ret;
|
||||
}
|
||||
@ -348,16 +353,6 @@ static struct platform_driver cpu_pmu_driver = {
|
||||
|
||||
static int __init register_pmu_driver(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = register_cpu_notifier(&cpu_pmu_hotplug_notifier);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = platform_driver_register(&cpu_pmu_driver);
|
||||
if (err)
|
||||
unregister_cpu_notifier(&cpu_pmu_hotplug_notifier);
|
||||
|
||||
return err;
|
||||
return platform_driver_register(&cpu_pmu_driver);
|
||||
}
|
||||
device_initcall(register_pmu_driver);
|
||||
|
@ -262,7 +262,7 @@ static void armv6pmu_enable_event(struct perf_event *event)
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
@ -300,7 +300,7 @@ armv6pmu_handle_irq(int irq_num,
|
||||
unsigned long pmcr = armv6_pmcr_read();
|
||||
struct perf_sample_data data;
|
||||
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
|
||||
struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
@ -356,7 +356,7 @@ armv6pmu_handle_irq(int irq_num,
|
||||
static void armv6pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
@ -368,7 +368,7 @@ static void armv6pmu_start(struct arm_pmu *cpu_pmu)
|
||||
static void armv6pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
@ -409,7 +409,7 @@ static void armv6pmu_disable_event(struct perf_event *event)
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
@ -444,7 +444,7 @@ static void armv6mpcore_pmu_disable_event(struct perf_event *event)
|
||||
unsigned long val, mask, flags, evt = 0;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
|
@ -564,13 +564,11 @@ static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx)
|
||||
return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx));
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_select_counter(int idx)
|
||||
static inline void armv7_pmnc_select_counter(int idx)
|
||||
{
|
||||
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter));
|
||||
isb();
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline u32 armv7pmu_read_counter(struct perf_event *event)
|
||||
@ -580,13 +578,15 @@ static inline u32 armv7pmu_read_counter(struct perf_event *event)
|
||||
int idx = hwc->idx;
|
||||
u32 value = 0;
|
||||
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
|
||||
pr_err("CPU%u reading wrong counter %d\n",
|
||||
smp_processor_id(), idx);
|
||||
else if (idx == ARMV7_IDX_CYCLE_COUNTER)
|
||||
} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
|
||||
else if (armv7_pmnc_select_counter(idx) == idx)
|
||||
} else {
|
||||
armv7_pmnc_select_counter(idx);
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value));
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
@ -597,45 +597,43 @@ static inline void armv7pmu_write_counter(struct perf_event *event, u32 value)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx))
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
|
||||
pr_err("CPU%u writing wrong counter %d\n",
|
||||
smp_processor_id(), idx);
|
||||
else if (idx == ARMV7_IDX_CYCLE_COUNTER)
|
||||
} else if (idx == ARMV7_IDX_CYCLE_COUNTER) {
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
|
||||
else if (armv7_pmnc_select_counter(idx) == idx)
|
||||
} else {
|
||||
armv7_pmnc_select_counter(idx);
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value));
|
||||
}
|
||||
}
|
||||
|
||||
static inline void armv7_pmnc_write_evtsel(int idx, u32 val)
|
||||
{
|
||||
if (armv7_pmnc_select_counter(idx) == idx) {
|
||||
val &= ARMV7_EVTYPE_MASK;
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
|
||||
}
|
||||
armv7_pmnc_select_counter(idx);
|
||||
val &= ARMV7_EVTYPE_MASK;
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_enable_counter(int idx)
|
||||
static inline void armv7_pmnc_enable_counter(int idx)
|
||||
{
|
||||
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter)));
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_disable_counter(int idx)
|
||||
static inline void armv7_pmnc_disable_counter(int idx)
|
||||
{
|
||||
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter)));
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_enable_intens(int idx)
|
||||
static inline void armv7_pmnc_enable_intens(int idx)
|
||||
{
|
||||
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
|
||||
asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter)));
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_disable_intens(int idx)
|
||||
static inline void armv7_pmnc_disable_intens(int idx)
|
||||
{
|
||||
u32 counter = ARMV7_IDX_TO_COUNTER(idx);
|
||||
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter)));
|
||||
@ -643,8 +641,6 @@ static inline int armv7_pmnc_disable_intens(int idx)
|
||||
/* Clear the overflow flag in case an interrupt is pending. */
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter)));
|
||||
isb();
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline u32 armv7_pmnc_getreset_flags(void)
|
||||
@ -667,34 +663,34 @@ static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu)
|
||||
u32 val;
|
||||
unsigned int cnt;
|
||||
|
||||
printk(KERN_INFO "PMNC registers dump:\n");
|
||||
pr_info("PMNC registers dump:\n");
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
|
||||
printk(KERN_INFO "PMNC =0x%08x\n", val);
|
||||
pr_info("PMNC =0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
|
||||
printk(KERN_INFO "CNTENS=0x%08x\n", val);
|
||||
pr_info("CNTENS=0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
|
||||
printk(KERN_INFO "INTENS=0x%08x\n", val);
|
||||
pr_info("INTENS=0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
|
||||
printk(KERN_INFO "FLAGS =0x%08x\n", val);
|
||||
pr_info("FLAGS =0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
|
||||
printk(KERN_INFO "SELECT=0x%08x\n", val);
|
||||
pr_info("SELECT=0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
|
||||
printk(KERN_INFO "CCNT =0x%08x\n", val);
|
||||
pr_info("CCNT =0x%08x\n", val);
|
||||
|
||||
for (cnt = ARMV7_IDX_COUNTER0;
|
||||
cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) {
|
||||
armv7_pmnc_select_counter(cnt);
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
|
||||
printk(KERN_INFO "CNT[%d] count =0x%08x\n",
|
||||
pr_info("CNT[%d] count =0x%08x\n",
|
||||
ARMV7_IDX_TO_COUNTER(cnt), val);
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
|
||||
printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
|
||||
pr_info("CNT[%d] evtsel=0x%08x\n",
|
||||
ARMV7_IDX_TO_COUNTER(cnt), val);
|
||||
}
|
||||
}
|
||||
@ -705,7 +701,7 @@ static void armv7pmu_enable_event(struct perf_event *event)
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
|
||||
@ -751,7 +747,7 @@ static void armv7pmu_disable_event(struct perf_event *event)
|
||||
unsigned long flags;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) {
|
||||
@ -783,7 +779,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
|
||||
u32 pmnc;
|
||||
struct perf_sample_data data;
|
||||
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
|
||||
struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
@ -843,7 +839,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
|
||||
static void armv7pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
/* Enable all counters */
|
||||
@ -854,7 +850,7 @@ static void armv7pmu_start(struct arm_pmu *cpu_pmu)
|
||||
static void armv7pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
/* Disable all counters */
|
||||
@ -1287,7 +1283,7 @@ static void krait_pmu_disable_event(struct perf_event *event)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
/* Disable counter and interrupt */
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
@ -1313,7 +1309,7 @@ static void krait_pmu_enable_event(struct perf_event *event)
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
/*
|
||||
* Enable counter and interrupt, and set the counter to count
|
||||
|
@ -138,7 +138,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev)
|
||||
unsigned long pmnc;
|
||||
struct perf_sample_data data;
|
||||
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
|
||||
struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
@ -198,7 +198,7 @@ static void xscale1pmu_enable_event(struct perf_event *event)
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
switch (idx) {
|
||||
@ -234,7 +234,7 @@ static void xscale1pmu_disable_event(struct perf_event *event)
|
||||
unsigned long val, mask, evt, flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
switch (idx) {
|
||||
@ -287,7 +287,7 @@ xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc,
|
||||
static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
@ -299,7 +299,7 @@ static void xscale1pmu_start(struct arm_pmu *cpu_pmu)
|
||||
static void xscale1pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
@ -485,7 +485,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev)
|
||||
unsigned long pmnc, of_flags;
|
||||
struct perf_sample_data data;
|
||||
struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev;
|
||||
struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
@ -539,7 +539,7 @@ static void xscale2pmu_enable_event(struct perf_event *event)
|
||||
unsigned long flags, ien, evtsel;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
ien = xscale2pmu_read_int_enable();
|
||||
@ -585,7 +585,7 @@ static void xscale2pmu_disable_event(struct perf_event *event)
|
||||
unsigned long flags, ien, evtsel, of_flags;
|
||||
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
int idx = hwc->idx;
|
||||
|
||||
ien = xscale2pmu_read_int_enable();
|
||||
@ -651,7 +651,7 @@ out:
|
||||
static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
|
||||
@ -663,7 +663,7 @@ static void xscale2pmu_start(struct arm_pmu *cpu_pmu)
|
||||
static void xscale2pmu_stop(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
struct pmu_hw_events *events = cpu_pmu->get_hw_events();
|
||||
struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events);
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
val = xscale2pmu_read_pmnc();
|
||||
|
@ -16,17 +16,17 @@
|
||||
|
||||
#include <linux/arm-cci.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/of_address.h>
|
||||
#include <linux/of_irq.h>
|
||||
#include <linux/of_platform.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/irq_regs.h>
|
||||
#include <asm/pmu.h>
|
||||
#include <asm/smp_plat.h>
|
||||
|
||||
#define DRIVER_NAME "CCI-400"
|
||||
@ -98,6 +98,8 @@ static unsigned long cci_ctrl_phys;
|
||||
|
||||
#define CCI_PMU_CNTR_BASE(idx) ((idx) * SZ_4K)
|
||||
|
||||
#define CCI_PMU_CNTR_MASK ((1ULL << 32) -1)
|
||||
|
||||
/*
|
||||
* Instead of an event id to monitor CCI cycles, a dedicated counter is
|
||||
* provided. Use 0xff to represent CCI cycles and hope that no future revisions
|
||||
@ -170,18 +172,29 @@ static char *const pmu_names[] = {
|
||||
[CCI_REV_R1] = "CCI_400_r1",
|
||||
};
|
||||
|
||||
struct cci_pmu_drv_data {
|
||||
struct cci_pmu_hw_events {
|
||||
struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
|
||||
unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
|
||||
raw_spinlock_t pmu_lock;
|
||||
};
|
||||
|
||||
struct cci_pmu {
|
||||
void __iomem *base;
|
||||
struct arm_pmu *cci_pmu;
|
||||
struct pmu pmu;
|
||||
int nr_irqs;
|
||||
int irqs[CCI_PMU_MAX_HW_EVENTS];
|
||||
unsigned long active_irqs;
|
||||
struct perf_event *events[CCI_PMU_MAX_HW_EVENTS];
|
||||
unsigned long used_mask[BITS_TO_LONGS(CCI_PMU_MAX_HW_EVENTS)];
|
||||
struct pmu_port_event_ranges *port_ranges;
|
||||
struct pmu_hw_events hw_events;
|
||||
struct cci_pmu_hw_events hw_events;
|
||||
struct platform_device *plat_device;
|
||||
int num_events;
|
||||
atomic_t active_events;
|
||||
struct mutex reserve_mutex;
|
||||
cpumask_t cpus;
|
||||
};
|
||||
static struct cci_pmu_drv_data *pmu;
|
||||
static struct cci_pmu *pmu;
|
||||
|
||||
#define to_cci_pmu(c) (container_of(c, struct cci_pmu, pmu))
|
||||
|
||||
static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs)
|
||||
{
|
||||
@ -252,7 +265,7 @@ static int pmu_validate_hw_event(u8 hw_event)
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static int pmu_is_valid_counter(struct arm_pmu *cci_pmu, int idx)
|
||||
static int pmu_is_valid_counter(struct cci_pmu *cci_pmu, int idx)
|
||||
{
|
||||
return CCI_PMU_CYCLE_CNTR_IDX <= idx &&
|
||||
idx <= CCI_PMU_CNTR_LAST(cci_pmu);
|
||||
@ -293,14 +306,9 @@ static u32 pmu_get_max_counters(void)
|
||||
return n_cnts + 1;
|
||||
}
|
||||
|
||||
static struct pmu_hw_events *pmu_get_hw_events(void)
|
||||
static int pmu_get_event_idx(struct cci_pmu_hw_events *hw, struct perf_event *event)
|
||||
{
|
||||
return &pmu->hw_events;
|
||||
}
|
||||
|
||||
static int pmu_get_event_idx(struct pmu_hw_events *hw, struct perf_event *event)
|
||||
{
|
||||
struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
struct hw_perf_event *hw_event = &event->hw;
|
||||
unsigned long cci_event = hw_event->config_base & CCI_PMU_EVENT_MASK;
|
||||
int idx;
|
||||
@ -336,7 +344,7 @@ static int pmu_map_event(struct perf_event *event)
|
||||
return mapping;
|
||||
}
|
||||
|
||||
static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
|
||||
static int pmu_request_irq(struct cci_pmu *cci_pmu, irq_handler_t handler)
|
||||
{
|
||||
int i;
|
||||
struct platform_device *pmu_device = cci_pmu->plat_device;
|
||||
@ -371,17 +379,91 @@ static int pmu_request_irq(struct arm_pmu *cci_pmu, irq_handler_t handler)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void pmu_free_irq(struct cci_pmu *cci_pmu)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pmu->nr_irqs; i++) {
|
||||
if (!test_and_clear_bit(i, &pmu->active_irqs))
|
||||
continue;
|
||||
|
||||
free_irq(pmu->irqs[i], cci_pmu);
|
||||
}
|
||||
}
|
||||
|
||||
static u32 pmu_read_counter(struct perf_event *event)
|
||||
{
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
struct hw_perf_event *hw_counter = &event->hw;
|
||||
int idx = hw_counter->idx;
|
||||
u32 value;
|
||||
|
||||
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
|
||||
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
|
||||
return 0;
|
||||
}
|
||||
value = pmu_read_register(idx, CCI_PMU_CNTR);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static void pmu_write_counter(struct perf_event *event, u32 value)
|
||||
{
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
struct hw_perf_event *hw_counter = &event->hw;
|
||||
int idx = hw_counter->idx;
|
||||
|
||||
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
|
||||
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
|
||||
else
|
||||
pmu_write_register(value, idx, CCI_PMU_CNTR);
|
||||
}
|
||||
|
||||
static u64 pmu_event_update(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
u64 delta, prev_raw_count, new_raw_count;
|
||||
|
||||
do {
|
||||
prev_raw_count = local64_read(&hwc->prev_count);
|
||||
new_raw_count = pmu_read_counter(event);
|
||||
} while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
|
||||
new_raw_count) != prev_raw_count);
|
||||
|
||||
delta = (new_raw_count - prev_raw_count) & CCI_PMU_CNTR_MASK;
|
||||
|
||||
local64_add(delta, &event->count);
|
||||
|
||||
return new_raw_count;
|
||||
}
|
||||
|
||||
static void pmu_read(struct perf_event *event)
|
||||
{
|
||||
pmu_event_update(event);
|
||||
}
|
||||
|
||||
void pmu_event_set_period(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
/*
|
||||
* The CCI PMU counters have a period of 2^32. To account for the
|
||||
* possiblity of extreme interrupt latency we program for a period of
|
||||
* half that. Hopefully we can handle the interrupt before another 2^31
|
||||
* events occur and the counter overtakes its previous value.
|
||||
*/
|
||||
u64 val = 1ULL << 31;
|
||||
local64_set(&hwc->prev_count, val);
|
||||
pmu_write_counter(event, val);
|
||||
}
|
||||
|
||||
static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct arm_pmu *cci_pmu = (struct arm_pmu *)dev;
|
||||
struct pmu_hw_events *events = cci_pmu->get_hw_events();
|
||||
struct perf_sample_data data;
|
||||
struct pt_regs *regs;
|
||||
struct cci_pmu *cci_pmu = dev;
|
||||
struct cci_pmu_hw_events *events = &pmu->hw_events;
|
||||
int idx, handled = IRQ_NONE;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
regs = get_irq_regs();
|
||||
/*
|
||||
* Iterate over counters and update the corresponding perf events.
|
||||
* This should work regardless of whether we have per-counter overflow
|
||||
@ -403,154 +485,407 @@ static irqreturn_t pmu_handle_irq(int irq_num, void *dev)
|
||||
|
||||
pmu_write_register(CCI_PMU_OVRFLW_FLAG, idx, CCI_PMU_OVRFLW);
|
||||
|
||||
pmu_event_update(event);
|
||||
pmu_event_set_period(event);
|
||||
handled = IRQ_HANDLED;
|
||||
|
||||
armpmu_event_update(event);
|
||||
perf_sample_data_init(&data, 0, hw_counter->last_period);
|
||||
if (!armpmu_event_set_period(event))
|
||||
continue;
|
||||
|
||||
if (perf_event_overflow(event, &data, regs))
|
||||
cci_pmu->disable(event);
|
||||
}
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
|
||||
return IRQ_RETVAL(handled);
|
||||
}
|
||||
|
||||
static void pmu_free_irq(struct arm_pmu *cci_pmu)
|
||||
static int cci_pmu_get_hw(struct cci_pmu *cci_pmu)
|
||||
{
|
||||
int i;
|
||||
int ret = pmu_request_irq(cci_pmu, pmu_handle_irq);
|
||||
if (ret) {
|
||||
pmu_free_irq(cci_pmu);
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < pmu->nr_irqs; i++) {
|
||||
if (!test_and_clear_bit(i, &pmu->active_irqs))
|
||||
continue;
|
||||
static void cci_pmu_put_hw(struct cci_pmu *cci_pmu)
|
||||
{
|
||||
pmu_free_irq(cci_pmu);
|
||||
}
|
||||
|
||||
free_irq(pmu->irqs[i], cci_pmu);
|
||||
static void hw_perf_event_destroy(struct perf_event *event)
|
||||
{
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
atomic_t *active_events = &cci_pmu->active_events;
|
||||
struct mutex *reserve_mutex = &cci_pmu->reserve_mutex;
|
||||
|
||||
if (atomic_dec_and_mutex_lock(active_events, reserve_mutex)) {
|
||||
cci_pmu_put_hw(cci_pmu);
|
||||
mutex_unlock(reserve_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
static void pmu_enable_event(struct perf_event *event)
|
||||
static void cci_pmu_enable(struct pmu *pmu)
|
||||
{
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
|
||||
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
|
||||
int enabled = bitmap_weight(hw_events->used_mask, cci_pmu->num_events);
|
||||
unsigned long flags;
|
||||
struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
|
||||
struct pmu_hw_events *events = cci_pmu->get_hw_events();
|
||||
struct hw_perf_event *hw_counter = &event->hw;
|
||||
int idx = hw_counter->idx;
|
||||
|
||||
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
|
||||
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
|
||||
/* Configure the event to count, unless you are counting cycles */
|
||||
if (idx != CCI_PMU_CYCLE_CNTR_IDX)
|
||||
pmu_set_event(idx, hw_counter->config_base);
|
||||
|
||||
pmu_enable_counter(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hw_counter = &event->hw;
|
||||
int idx = hw_counter->idx;
|
||||
|
||||
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
|
||||
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
pmu_disable_counter(idx);
|
||||
}
|
||||
|
||||
static void pmu_start(struct arm_pmu *cci_pmu)
|
||||
{
|
||||
u32 val;
|
||||
unsigned long flags;
|
||||
struct pmu_hw_events *events = cci_pmu->get_hw_events();
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
|
||||
|
||||
/* Enable all the PMU counters. */
|
||||
val = readl_relaxed(cci_ctrl_base + CCI_PMCR) | CCI_PMCR_CEN;
|
||||
writel(val, cci_ctrl_base + CCI_PMCR);
|
||||
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void pmu_stop(struct arm_pmu *cci_pmu)
|
||||
static void cci_pmu_disable(struct pmu *pmu)
|
||||
{
|
||||
u32 val;
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(pmu);
|
||||
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
|
||||
unsigned long flags;
|
||||
struct pmu_hw_events *events = cci_pmu->get_hw_events();
|
||||
u32 val;
|
||||
|
||||
raw_spin_lock_irqsave(&events->pmu_lock, flags);
|
||||
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
|
||||
|
||||
/* Disable all the PMU counters. */
|
||||
val = readl_relaxed(cci_ctrl_base + CCI_PMCR) & ~CCI_PMCR_CEN;
|
||||
writel(val, cci_ctrl_base + CCI_PMCR);
|
||||
|
||||
raw_spin_unlock_irqrestore(&events->pmu_lock, flags);
|
||||
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static u32 pmu_read_counter(struct perf_event *event)
|
||||
static void cci_pmu_start(struct perf_event *event, int pmu_flags)
|
||||
{
|
||||
struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hw_counter = &event->hw;
|
||||
int idx = hw_counter->idx;
|
||||
u32 value;
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* To handle interrupt latency, we always reprogram the period
|
||||
* regardlesss of PERF_EF_RELOAD.
|
||||
*/
|
||||
if (pmu_flags & PERF_EF_RELOAD)
|
||||
WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
|
||||
|
||||
hwc->state = 0;
|
||||
|
||||
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
|
||||
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
|
||||
return 0;
|
||||
return;
|
||||
}
|
||||
value = pmu_read_register(idx, CCI_PMU_CNTR);
|
||||
|
||||
return value;
|
||||
raw_spin_lock_irqsave(&hw_events->pmu_lock, flags);
|
||||
|
||||
/* Configure the event to count, unless you are counting cycles */
|
||||
if (idx != CCI_PMU_CYCLE_CNTR_IDX)
|
||||
pmu_set_event(idx, hwc->config_base);
|
||||
|
||||
pmu_event_set_period(event);
|
||||
pmu_enable_counter(idx);
|
||||
|
||||
raw_spin_unlock_irqrestore(&hw_events->pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void pmu_write_counter(struct perf_event *event, u32 value)
|
||||
static void cci_pmu_stop(struct perf_event *event, int pmu_flags)
|
||||
{
|
||||
struct arm_pmu *cci_pmu = to_arm_pmu(event->pmu);
|
||||
struct hw_perf_event *hw_counter = &event->hw;
|
||||
int idx = hw_counter->idx;
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx)))
|
||||
if (hwc->state & PERF_HES_STOPPED)
|
||||
return;
|
||||
|
||||
if (unlikely(!pmu_is_valid_counter(cci_pmu, idx))) {
|
||||
dev_err(&cci_pmu->plat_device->dev, "Invalid CCI PMU counter %d\n", idx);
|
||||
else
|
||||
pmu_write_register(value, idx, CCI_PMU_CNTR);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We always reprogram the counter, so ignore PERF_EF_UPDATE. See
|
||||
* cci_pmu_start()
|
||||
*/
|
||||
pmu_disable_counter(idx);
|
||||
pmu_event_update(event);
|
||||
hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE;
|
||||
}
|
||||
|
||||
static int cci_pmu_init(struct arm_pmu *cci_pmu, struct platform_device *pdev)
|
||||
static int cci_pmu_add(struct perf_event *event, int flags)
|
||||
{
|
||||
*cci_pmu = (struct arm_pmu){
|
||||
.name = pmu_names[probe_cci_revision()],
|
||||
.max_period = (1LLU << 32) - 1,
|
||||
.get_hw_events = pmu_get_hw_events,
|
||||
.get_event_idx = pmu_get_event_idx,
|
||||
.map_event = pmu_map_event,
|
||||
.request_irq = pmu_request_irq,
|
||||
.handle_irq = pmu_handle_irq,
|
||||
.free_irq = pmu_free_irq,
|
||||
.enable = pmu_enable_event,
|
||||
.disable = pmu_disable_event,
|
||||
.start = pmu_start,
|
||||
.stop = pmu_stop,
|
||||
.read_counter = pmu_read_counter,
|
||||
.write_counter = pmu_write_counter,
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx;
|
||||
int err = 0;
|
||||
|
||||
perf_pmu_disable(event->pmu);
|
||||
|
||||
/* If we don't have a space for the counter then finish early. */
|
||||
idx = pmu_get_event_idx(hw_events, event);
|
||||
if (idx < 0) {
|
||||
err = idx;
|
||||
goto out;
|
||||
}
|
||||
|
||||
event->hw.idx = idx;
|
||||
hw_events->events[idx] = event;
|
||||
|
||||
hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
|
||||
if (flags & PERF_EF_START)
|
||||
cci_pmu_start(event, PERF_EF_RELOAD);
|
||||
|
||||
/* Propagate our changes to the userspace mapping. */
|
||||
perf_event_update_userpage(event);
|
||||
|
||||
out:
|
||||
perf_pmu_enable(event->pmu);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void cci_pmu_del(struct perf_event *event, int flags)
|
||||
{
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events;
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int idx = hwc->idx;
|
||||
|
||||
cci_pmu_stop(event, PERF_EF_UPDATE);
|
||||
hw_events->events[idx] = NULL;
|
||||
clear_bit(idx, hw_events->used_mask);
|
||||
|
||||
perf_event_update_userpage(event);
|
||||
}
|
||||
|
||||
static int
|
||||
validate_event(struct cci_pmu_hw_events *hw_events,
|
||||
struct perf_event *event)
|
||||
{
|
||||
if (is_software_event(event))
|
||||
return 1;
|
||||
|
||||
if (event->state < PERF_EVENT_STATE_OFF)
|
||||
return 1;
|
||||
|
||||
if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec)
|
||||
return 1;
|
||||
|
||||
return pmu_get_event_idx(hw_events, event) >= 0;
|
||||
}
|
||||
|
||||
static int
|
||||
validate_group(struct perf_event *event)
|
||||
{
|
||||
struct perf_event *sibling, *leader = event->group_leader;
|
||||
struct cci_pmu_hw_events fake_pmu = {
|
||||
/*
|
||||
* Initialise the fake PMU. We only need to populate the
|
||||
* used_mask for the purposes of validation.
|
||||
*/
|
||||
.used_mask = CPU_BITS_NONE,
|
||||
};
|
||||
|
||||
if (!validate_event(&fake_pmu, leader))
|
||||
return -EINVAL;
|
||||
|
||||
list_for_each_entry(sibling, &leader->sibling_list, group_entry) {
|
||||
if (!validate_event(&fake_pmu, sibling))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!validate_event(&fake_pmu, event))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
__hw_perf_event_init(struct perf_event *event)
|
||||
{
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
int mapping;
|
||||
|
||||
mapping = pmu_map_event(event);
|
||||
|
||||
if (mapping < 0) {
|
||||
pr_debug("event %x:%llx not supported\n", event->attr.type,
|
||||
event->attr.config);
|
||||
return mapping;
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't assign an index until we actually place the event onto
|
||||
* hardware. Use -1 to signify that we haven't decided where to put it
|
||||
* yet.
|
||||
*/
|
||||
hwc->idx = -1;
|
||||
hwc->config_base = 0;
|
||||
hwc->config = 0;
|
||||
hwc->event_base = 0;
|
||||
|
||||
/*
|
||||
* Store the event encoding into the config_base field.
|
||||
*/
|
||||
hwc->config_base |= (unsigned long)mapping;
|
||||
|
||||
/*
|
||||
* Limit the sample_period to half of the counter width. That way, the
|
||||
* new counter value is far less likely to overtake the previous one
|
||||
* unless you have some serious IRQ latency issues.
|
||||
*/
|
||||
hwc->sample_period = CCI_PMU_CNTR_MASK >> 1;
|
||||
hwc->last_period = hwc->sample_period;
|
||||
local64_set(&hwc->period_left, hwc->sample_period);
|
||||
|
||||
if (event->group_leader != event) {
|
||||
if (validate_group(event) != 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cci_pmu_event_init(struct perf_event *event)
|
||||
{
|
||||
struct cci_pmu *cci_pmu = to_cci_pmu(event->pmu);
|
||||
atomic_t *active_events = &cci_pmu->active_events;
|
||||
int err = 0;
|
||||
int cpu;
|
||||
|
||||
if (event->attr.type != event->pmu->type)
|
||||
return -ENOENT;
|
||||
|
||||
/* Shared by all CPUs, no meaningful state to sample */
|
||||
if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
/* We have no filtering of any kind */
|
||||
if (event->attr.exclude_user ||
|
||||
event->attr.exclude_kernel ||
|
||||
event->attr.exclude_hv ||
|
||||
event->attr.exclude_idle ||
|
||||
event->attr.exclude_host ||
|
||||
event->attr.exclude_guest)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Following the example set by other "uncore" PMUs, we accept any CPU
|
||||
* and rewrite its affinity dynamically rather than having perf core
|
||||
* handle cpu == -1 and pid == -1 for this case.
|
||||
*
|
||||
* The perf core will pin online CPUs for the duration of this call and
|
||||
* the event being installed into its context, so the PMU's CPU can't
|
||||
* change under our feet.
|
||||
*/
|
||||
cpu = cpumask_first(&cci_pmu->cpus);
|
||||
if (event->cpu < 0 || cpu < 0)
|
||||
return -EINVAL;
|
||||
event->cpu = cpu;
|
||||
|
||||
event->destroy = hw_perf_event_destroy;
|
||||
if (!atomic_inc_not_zero(active_events)) {
|
||||
mutex_lock(&cci_pmu->reserve_mutex);
|
||||
if (atomic_read(active_events) == 0)
|
||||
err = cci_pmu_get_hw(cci_pmu);
|
||||
if (!err)
|
||||
atomic_inc(active_events);
|
||||
mutex_unlock(&cci_pmu->reserve_mutex);
|
||||
}
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = __hw_perf_event_init(event);
|
||||
if (err)
|
||||
hw_perf_event_destroy(event);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static ssize_t pmu_attr_cpumask_show(struct device *dev,
|
||||
struct device_attribute *attr, char *buf)
|
||||
{
|
||||
int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &pmu->cpus);
|
||||
|
||||
buf[n++] = '\n';
|
||||
buf[n] = '\0';
|
||||
return n;
|
||||
}
|
||||
|
||||
static DEVICE_ATTR(cpumask, S_IRUGO, pmu_attr_cpumask_show, NULL);
|
||||
|
||||
static struct attribute *pmu_attrs[] = {
|
||||
&dev_attr_cpumask.attr,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static struct attribute_group pmu_attr_group = {
|
||||
.attrs = pmu_attrs,
|
||||
};
|
||||
|
||||
static const struct attribute_group *pmu_attr_groups[] = {
|
||||
&pmu_attr_group,
|
||||
NULL
|
||||
};
|
||||
|
||||
static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev)
|
||||
{
|
||||
char *name = pmu_names[probe_cci_revision()];
|
||||
cci_pmu->pmu = (struct pmu) {
|
||||
.name = pmu_names[probe_cci_revision()],
|
||||
.task_ctx_nr = perf_invalid_context,
|
||||
.pmu_enable = cci_pmu_enable,
|
||||
.pmu_disable = cci_pmu_disable,
|
||||
.event_init = cci_pmu_event_init,
|
||||
.add = cci_pmu_add,
|
||||
.del = cci_pmu_del,
|
||||
.start = cci_pmu_start,
|
||||
.stop = cci_pmu_stop,
|
||||
.read = pmu_read,
|
||||
.attr_groups = pmu_attr_groups,
|
||||
};
|
||||
|
||||
cci_pmu->plat_device = pdev;
|
||||
cci_pmu->num_events = pmu_get_max_counters();
|
||||
|
||||
return armpmu_register(cci_pmu, -1);
|
||||
return perf_pmu_register(&cci_pmu->pmu, name, -1);
|
||||
}
|
||||
|
||||
static int cci_pmu_cpu_notifier(struct notifier_block *self,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
unsigned int cpu = (long)hcpu;
|
||||
unsigned int target;
|
||||
|
||||
switch (action & ~CPU_TASKS_FROZEN) {
|
||||
case CPU_DOWN_PREPARE:
|
||||
if (!cpumask_test_and_clear_cpu(cpu, &pmu->cpus))
|
||||
break;
|
||||
target = cpumask_any_but(cpu_online_mask, cpu);
|
||||
if (target < 0) // UP, last CPU
|
||||
break;
|
||||
/*
|
||||
* TODO: migrate context once core races on event->ctx have
|
||||
* been fixed.
|
||||
*/
|
||||
cpumask_set_cpu(target, &pmu->cpus);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
static struct notifier_block cci_pmu_cpu_nb = {
|
||||
.notifier_call = cci_pmu_cpu_notifier,
|
||||
/*
|
||||
* to migrate uncore events, our notifier should be executed
|
||||
* before perf core's notifier.
|
||||
*/
|
||||
.priority = CPU_PRI_PERF + 1,
|
||||
};
|
||||
|
||||
static const struct of_device_id arm_cci_pmu_matches[] = {
|
||||
{
|
||||
.compatible = "arm,cci-400-pmu",
|
||||
@ -604,15 +939,16 @@ static int cci_pmu_probe(struct platform_device *pdev)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
pmu->cci_pmu = devm_kzalloc(&pdev->dev, sizeof(*(pmu->cci_pmu)), GFP_KERNEL);
|
||||
if (!pmu->cci_pmu)
|
||||
return -ENOMEM;
|
||||
|
||||
pmu->hw_events.events = pmu->events;
|
||||
pmu->hw_events.used_mask = pmu->used_mask;
|
||||
raw_spin_lock_init(&pmu->hw_events.pmu_lock);
|
||||
mutex_init(&pmu->reserve_mutex);
|
||||
atomic_set(&pmu->active_events, 0);
|
||||
cpumask_set_cpu(smp_processor_id(), &pmu->cpus);
|
||||
|
||||
ret = cci_pmu_init(pmu->cci_pmu, pdev);
|
||||
ret = register_cpu_notifier(&cci_pmu_cpu_nb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = cci_pmu_init(pmu, pdev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user