forked from Minki/linux
Merge branch 'clksrc' into devel
Conflicts: arch/arm/mach-vexpress/v2m.c arch/arm/plat-omap/counter_32k.c arch/arm/plat-versatile/Makefile
This commit is contained in:
commit
58daf18cdc
@ -14,6 +14,7 @@ config ARM
|
||||
select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
|
||||
select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL)
|
||||
select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL)
|
||||
select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
|
||||
select HAVE_GENERIC_DMA_COHERENT
|
||||
select HAVE_KERNEL_GZIP
|
||||
select HAVE_KERNEL_LZO
|
||||
@ -38,6 +39,9 @@ config HAVE_PWM
|
||||
config SYS_SUPPORTS_APM_EMULATION
|
||||
bool
|
||||
|
||||
config HAVE_SCHED_CLOCK
|
||||
bool
|
||||
|
||||
config GENERIC_GPIO
|
||||
bool
|
||||
|
||||
@ -233,6 +237,7 @@ config ARCH_REALVIEW
|
||||
bool "ARM Ltd. RealView family"
|
||||
select ARM_AMBA
|
||||
select COMMON_CLKDEV
|
||||
select HAVE_SCHED_CLOCK
|
||||
select ICST
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select ARCH_WANT_OPTIONAL_GPIOLIB
|
||||
@ -247,6 +252,7 @@ config ARCH_VERSATILE
|
||||
select ARM_AMBA
|
||||
select ARM_VIC
|
||||
select COMMON_CLKDEV
|
||||
select HAVE_SCHED_CLOCK
|
||||
select ICST
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select ARCH_WANT_OPTIONAL_GPIOLIB
|
||||
@ -263,6 +269,7 @@ config ARCH_VEXPRESS
|
||||
select COMMON_CLKDEV
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select HAVE_CLK
|
||||
select HAVE_SCHED_CLOCK
|
||||
select ICST
|
||||
select PLAT_VERSATILE
|
||||
help
|
||||
@ -434,6 +441,7 @@ config ARCH_IXP4XX
|
||||
select CPU_XSCALE
|
||||
select GENERIC_GPIO
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select HAVE_SCHED_CLOCK
|
||||
select DMABOUNCE if PCI
|
||||
help
|
||||
Support for Intel's IXP4XX (XScale) family of processors.
|
||||
@ -509,6 +517,7 @@ config ARCH_MMP
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
select COMMON_CLKDEV
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select HAVE_SCHED_CLOCK
|
||||
select TICK_ONESHOT
|
||||
select PLAT_PXA
|
||||
select SPARSE_IRQ
|
||||
@ -565,6 +574,7 @@ config ARCH_TEGRA
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select GENERIC_GPIO
|
||||
select HAVE_CLK
|
||||
select HAVE_SCHED_CLOCK
|
||||
select COMMON_CLKDEV
|
||||
select ARCH_HAS_BARRIERS if CACHE_L2X0
|
||||
select ARCH_HAS_CPUFREQ
|
||||
@ -588,6 +598,7 @@ config ARCH_PXA
|
||||
select COMMON_CLKDEV
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select HAVE_SCHED_CLOCK
|
||||
select TICK_ONESHOT
|
||||
select PLAT_PXA
|
||||
select SPARSE_IRQ
|
||||
@ -636,6 +647,7 @@ config ARCH_SA1100
|
||||
select CPU_FREQ
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select HAVE_CLK
|
||||
select HAVE_SCHED_CLOCK
|
||||
select TICK_ONESHOT
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
help
|
||||
@ -782,6 +794,7 @@ config ARCH_U300
|
||||
bool "ST-Ericsson U300 Series"
|
||||
depends on MMU
|
||||
select CPU_ARM926T
|
||||
select HAVE_SCHED_CLOCK
|
||||
select HAVE_TCM
|
||||
select ARM_AMBA
|
||||
select ARM_VIC
|
||||
@ -830,6 +843,7 @@ config ARCH_OMAP
|
||||
select ARCH_REQUIRE_GPIOLIB
|
||||
select ARCH_HAS_CPUFREQ
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select HAVE_SCHED_CLOCK
|
||||
select ARCH_HAS_HOLES_MEMORYMODEL
|
||||
help
|
||||
Support for TI's OMAP platform (OMAP1/2/3/4).
|
||||
@ -983,9 +997,11 @@ config ARCH_ACORN
|
||||
config PLAT_IOP
|
||||
bool
|
||||
select GENERIC_CLOCKEVENTS
|
||||
select HAVE_SCHED_CLOCK
|
||||
|
||||
config PLAT_ORION
|
||||
bool
|
||||
select HAVE_SCHED_CLOCK
|
||||
|
||||
config PLAT_PXA
|
||||
bool
|
||||
@ -1212,10 +1228,11 @@ config SMP
|
||||
depends on EXPERIMENTAL
|
||||
depends on GENERIC_CLOCKEVENTS
|
||||
depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \
|
||||
MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\
|
||||
ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4
|
||||
MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \
|
||||
ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \
|
||||
ARCH_MSM_SCORPIONMP
|
||||
select USE_GENERIC_SMP_HELPERS
|
||||
select HAVE_ARM_SCU
|
||||
select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP
|
||||
help
|
||||
This enables support for systems with more than one CPU. If you have
|
||||
a system with only one CPU, like most personal computers, say N. If
|
||||
@ -1290,6 +1307,7 @@ config NR_CPUS
|
||||
config HOTPLUG_CPU
|
||||
bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
|
||||
depends on SMP && HOTPLUG && EXPERIMENTAL
|
||||
depends on !ARCH_MSM
|
||||
help
|
||||
Say Y here to experiment with turning CPUs off and on. CPUs
|
||||
can be controlled through /sys/devices/system/cpu.
|
||||
@ -1298,7 +1316,7 @@ config LOCAL_TIMERS
|
||||
bool "Use local timer interrupts"
|
||||
depends on SMP
|
||||
default y
|
||||
select HAVE_ARM_TWD
|
||||
select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP
|
||||
help
|
||||
Enable support for local timers on SMP platforms, rather then the
|
||||
legacy IPI broadcast method. Local timers allows the system
|
||||
|
@ -23,7 +23,7 @@ config STRICT_DEVMEM
|
||||
config FRAME_POINTER
|
||||
bool
|
||||
depends on !THUMB2_KERNEL
|
||||
default y if !ARM_UNWIND
|
||||
default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER
|
||||
help
|
||||
If you say N here, the resulting kernel will be slightly smaller and
|
||||
faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled,
|
||||
|
@ -44,7 +44,6 @@ static struct clocksource clocksource_sp804 = {
|
||||
.rating = 200,
|
||||
.read = sp804_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -61,8 +60,7 @@ void __init sp804_clocksource_init(void __iomem *base)
|
||||
writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
|
||||
clksrc_base + TIMER_CTRL);
|
||||
|
||||
cs->mult = clocksource_khz2mult(TIMER_FREQ_KHZ, cs->shift);
|
||||
clocksource_register(cs);
|
||||
clocksource_register_khz(cs, TIMER_FREQ_KHZ);
|
||||
}
|
||||
|
||||
|
||||
|
118
arch/arm/include/asm/sched_clock.h
Normal file
118
arch/arm/include/asm/sched_clock.h
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* sched_clock.h: support for extending counters to full 64-bit ns counter
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#ifndef ASM_SCHED_CLOCK
|
||||
#define ASM_SCHED_CLOCK
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
struct clock_data {
|
||||
u64 epoch_ns;
|
||||
u32 epoch_cyc;
|
||||
u32 epoch_cyc_copy;
|
||||
u32 mult;
|
||||
u32 shift;
|
||||
};
|
||||
|
||||
#define DEFINE_CLOCK_DATA(name) struct clock_data name
|
||||
|
||||
static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift)
|
||||
{
|
||||
return (cyc * mult) >> shift;
|
||||
}
|
||||
|
||||
/*
|
||||
* Atomically update the sched_clock epoch. Your update callback will
|
||||
* be called from a timer before the counter wraps - read the current
|
||||
* counter value, and call this function to safely move the epochs
|
||||
* forward. Only use this from the update callback.
|
||||
*/
|
||||
static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask)
|
||||
{
|
||||
unsigned long flags;
|
||||
u64 ns = cd->epoch_ns +
|
||||
cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift);
|
||||
|
||||
/*
|
||||
* Write epoch_cyc and epoch_ns in a way that the update is
|
||||
* detectable in cyc_to_fixed_sched_clock().
|
||||
*/
|
||||
raw_local_irq_save(flags);
|
||||
cd->epoch_cyc = cyc;
|
||||
smp_wmb();
|
||||
cd->epoch_ns = ns;
|
||||
smp_wmb();
|
||||
cd->epoch_cyc_copy = cyc;
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* If your clock rate is known at compile time, using this will allow
|
||||
* you to optimize the mult/shift loads away. This is paired with
|
||||
* init_fixed_sched_clock() to ensure that your mult/shift are correct.
|
||||
*/
|
||||
static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd,
|
||||
u32 cyc, u32 mask, u32 mult, u32 shift)
|
||||
{
|
||||
u64 epoch_ns;
|
||||
u32 epoch_cyc;
|
||||
|
||||
/*
|
||||
* Load the epoch_cyc and epoch_ns atomically. We do this by
|
||||
* ensuring that we always write epoch_cyc, epoch_ns and
|
||||
* epoch_cyc_copy in strict order, and read them in strict order.
|
||||
* If epoch_cyc and epoch_cyc_copy are not equal, then we're in
|
||||
* the middle of an update, and we should repeat the load.
|
||||
*/
|
||||
do {
|
||||
epoch_cyc = cd->epoch_cyc;
|
||||
smp_rmb();
|
||||
epoch_ns = cd->epoch_ns;
|
||||
smp_rmb();
|
||||
} while (epoch_cyc != cd->epoch_cyc_copy);
|
||||
|
||||
return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Otherwise, you need to use this, which will obtain the mult/shift
|
||||
* from the clock_data structure. Use init_sched_clock() with this.
|
||||
*/
|
||||
static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd,
|
||||
u32 cyc, u32 mask)
|
||||
{
|
||||
return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the clock data - calculate the appropriate multiplier
|
||||
* and shift. Also setup a timer to ensure that the epoch is refreshed
|
||||
* at the appropriate time interval, which will call your update
|
||||
* handler.
|
||||
*/
|
||||
void init_sched_clock(struct clock_data *, void (*)(void),
|
||||
unsigned int, unsigned long);
|
||||
|
||||
/*
|
||||
* Use this initialization function rather than init_sched_clock() if
|
||||
* you're using cyc_to_fixed_sched_clock, which will warn if your
|
||||
* constants are incorrect.
|
||||
*/
|
||||
static inline void init_fixed_sched_clock(struct clock_data *cd,
|
||||
void (*update)(void), unsigned int bits, unsigned long rate,
|
||||
u32 mult, u32 shift)
|
||||
{
|
||||
init_sched_clock(cd, update, bits, rate);
|
||||
if (cd->mult != mult || cd->shift != shift) {
|
||||
pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n"
|
||||
"sched_clock: fix multiply/shift to avoid scheduler hiccups\n",
|
||||
mult, shift, cd->mult, cd->shift);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -63,6 +63,11 @@
|
||||
#include <asm/outercache.h>
|
||||
|
||||
#define __exception __attribute__((section(".exception.text")))
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
#define __exception_irq_entry __irq_entry
|
||||
#else
|
||||
#define __exception_irq_entry __exception
|
||||
#endif
|
||||
|
||||
struct thread_info;
|
||||
struct task_struct;
|
||||
|
@ -15,13 +15,32 @@ struct undef_hook {
|
||||
void register_undef_hook(struct undef_hook *hook);
|
||||
void unregister_undef_hook(struct undef_hook *hook);
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
extern char __irqentry_text_start[];
|
||||
extern char __irqentry_text_end[];
|
||||
|
||||
return ptr >= (unsigned long)&__irqentry_text_start &&
|
||||
ptr < (unsigned long)&__irqentry_text_end;
|
||||
}
|
||||
#else
|
||||
static inline int __in_irqentry_text(unsigned long ptr)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int in_exception_text(unsigned long ptr)
|
||||
{
|
||||
extern char __exception_text_start[];
|
||||
extern char __exception_text_end[];
|
||||
int in;
|
||||
|
||||
return ptr >= (unsigned long)&__exception_text_start &&
|
||||
ptr < (unsigned long)&__exception_text_end;
|
||||
in = ptr >= (unsigned long)&__exception_text_start &&
|
||||
ptr < (unsigned long)&__exception_text_end;
|
||||
|
||||
return in ? : __in_irqentry_text(ptr);
|
||||
}
|
||||
|
||||
extern void __init early_trap_init(void);
|
||||
|
@ -5,7 +5,7 @@
|
||||
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
|
||||
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
|
||||
|
||||
ifdef CONFIG_DYNAMIC_FTRACE
|
||||
ifdef CONFIG_FUNCTION_TRACER
|
||||
CFLAGS_REMOVE_ftrace.o = -pg
|
||||
endif
|
||||
|
||||
@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o
|
||||
obj-$(CONFIG_ARTHUR) += arthur.o
|
||||
obj-$(CONFIG_ISA_DMA) += dma-isa.o
|
||||
obj-$(CONFIG_PCI) += bios32.o isa.o
|
||||
obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o
|
||||
obj-$(CONFIG_SMP) += smp.o
|
||||
obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
|
||||
obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
|
||||
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
|
||||
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
|
||||
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
|
||||
obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
|
||||
obj-$(CONFIG_ATAGS_PROC) += atags.o
|
||||
|
@ -147,98 +147,170 @@ ENDPROC(ret_from_fork)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
ENTRY(__gnu_mcount_nc)
|
||||
mov ip, lr
|
||||
ldmia sp!, {lr}
|
||||
mov pc, ip
|
||||
ENDPROC(__gnu_mcount_nc)
|
||||
|
||||
ENTRY(ftrace_caller)
|
||||
stmdb sp!, {r0-r3, lr}
|
||||
mov r0, lr
|
||||
sub r0, r0, #MCOUNT_INSN_SIZE
|
||||
ldr r1, [sp, #20]
|
||||
|
||||
.global ftrace_call
|
||||
ftrace_call:
|
||||
bl ftrace_stub
|
||||
ldmia sp!, {r0-r3, ip, lr}
|
||||
mov pc, ip
|
||||
ENDPROC(ftrace_caller)
|
||||
|
||||
#ifdef CONFIG_OLD_MCOUNT
|
||||
ENTRY(mcount)
|
||||
stmdb sp!, {lr}
|
||||
ldr lr, [fp, #-4]
|
||||
ldmia sp!, {pc}
|
||||
ENDPROC(mcount)
|
||||
|
||||
ENTRY(ftrace_caller_old)
|
||||
stmdb sp!, {r0-r3, lr}
|
||||
ldr r1, [fp, #-4]
|
||||
mov r0, lr
|
||||
sub r0, r0, #MCOUNT_INSN_SIZE
|
||||
|
||||
.globl ftrace_call_old
|
||||
ftrace_call_old:
|
||||
bl ftrace_stub
|
||||
ldr lr, [fp, #-4] @ restore lr
|
||||
ldmia sp!, {r0-r3, pc}
|
||||
ENDPROC(ftrace_caller_old)
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
ENTRY(__gnu_mcount_nc)
|
||||
stmdb sp!, {r0-r3, lr}
|
||||
.macro __mcount suffix
|
||||
mcount_enter
|
||||
ldr r0, =ftrace_trace_function
|
||||
ldr r2, [r0]
|
||||
adr r0, .Lftrace_stub
|
||||
cmp r0, r2
|
||||
bne gnu_trace
|
||||
ldmia sp!, {r0-r3, ip, lr}
|
||||
mov pc, ip
|
||||
bne 1f
|
||||
|
||||
gnu_trace:
|
||||
ldr r1, [sp, #20] @ lr of instrumented routine
|
||||
mov r0, lr
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
ldr r1, =ftrace_graph_return
|
||||
ldr r2, [r1]
|
||||
cmp r0, r2
|
||||
bne ftrace_graph_caller\suffix
|
||||
|
||||
ldr r1, =ftrace_graph_entry
|
||||
ldr r2, [r1]
|
||||
ldr r0, =ftrace_graph_entry_stub
|
||||
cmp r0, r2
|
||||
bne ftrace_graph_caller\suffix
|
||||
#endif
|
||||
|
||||
mcount_exit
|
||||
|
||||
1: mcount_get_lr r1 @ lr of instrumented func
|
||||
mov r0, lr @ instrumented function
|
||||
sub r0, r0, #MCOUNT_INSN_SIZE
|
||||
adr lr, BSYM(1f)
|
||||
adr lr, BSYM(2f)
|
||||
mov pc, r2
|
||||
1:
|
||||
ldmia sp!, {r0-r3, ip, lr}
|
||||
mov pc, ip
|
||||
ENDPROC(__gnu_mcount_nc)
|
||||
2: mcount_exit
|
||||
.endm
|
||||
|
||||
.macro __ftrace_caller suffix
|
||||
mcount_enter
|
||||
|
||||
mcount_get_lr r1 @ lr of instrumented func
|
||||
mov r0, lr @ instrumented function
|
||||
sub r0, r0, #MCOUNT_INSN_SIZE
|
||||
|
||||
.globl ftrace_call\suffix
|
||||
ftrace_call\suffix:
|
||||
bl ftrace_stub
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
.globl ftrace_graph_call\suffix
|
||||
ftrace_graph_call\suffix:
|
||||
mov r0, r0
|
||||
#endif
|
||||
|
||||
mcount_exit
|
||||
.endm
|
||||
|
||||
.macro __ftrace_graph_caller
|
||||
sub r0, fp, #4 @ &lr of instrumented routine (&parent)
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
@ called from __ftrace_caller, saved in mcount_enter
|
||||
ldr r1, [sp, #16] @ instrumented routine (func)
|
||||
#else
|
||||
@ called from __mcount, untouched in lr
|
||||
mov r1, lr @ instrumented routine (func)
|
||||
#endif
|
||||
sub r1, r1, #MCOUNT_INSN_SIZE
|
||||
mov r2, fp @ frame pointer
|
||||
bl prepare_ftrace_return
|
||||
mcount_exit
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_OLD_MCOUNT
|
||||
/*
|
||||
* This is under an ifdef in order to force link-time errors for people trying
|
||||
* to build with !FRAME_POINTER with a GCC which doesn't use the new-style
|
||||
* mcount.
|
||||
* mcount
|
||||
*/
|
||||
ENTRY(mcount)
|
||||
stmdb sp!, {r0-r3, lr}
|
||||
ldr r0, =ftrace_trace_function
|
||||
ldr r2, [r0]
|
||||
adr r0, ftrace_stub
|
||||
cmp r0, r2
|
||||
bne trace
|
||||
ldr lr, [fp, #-4] @ restore lr
|
||||
ldmia sp!, {r0-r3, pc}
|
||||
|
||||
trace:
|
||||
ldr r1, [fp, #-4] @ lr of instrumented routine
|
||||
mov r0, lr
|
||||
sub r0, r0, #MCOUNT_INSN_SIZE
|
||||
mov lr, pc
|
||||
mov pc, r2
|
||||
ldr lr, [fp, #-4] @ restore lr
|
||||
.macro mcount_enter
|
||||
stmdb sp!, {r0-r3, lr}
|
||||
.endm
|
||||
|
||||
.macro mcount_get_lr reg
|
||||
ldr \reg, [fp, #-4]
|
||||
.endm
|
||||
|
||||
.macro mcount_exit
|
||||
ldr lr, [fp, #-4]
|
||||
ldmia sp!, {r0-r3, pc}
|
||||
.endm
|
||||
|
||||
ENTRY(mcount)
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
stmdb sp!, {lr}
|
||||
ldr lr, [fp, #-4]
|
||||
ldmia sp!, {pc}
|
||||
#else
|
||||
__mcount _old
|
||||
#endif
|
||||
ENDPROC(mcount)
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
ENTRY(ftrace_caller_old)
|
||||
__ftrace_caller _old
|
||||
ENDPROC(ftrace_caller_old)
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
ENTRY(ftrace_graph_caller_old)
|
||||
__ftrace_graph_caller
|
||||
ENDPROC(ftrace_graph_caller_old)
|
||||
#endif
|
||||
|
||||
.purgem mcount_enter
|
||||
.purgem mcount_get_lr
|
||||
.purgem mcount_exit
|
||||
#endif
|
||||
|
||||
/*
|
||||
* __gnu_mcount_nc
|
||||
*/
|
||||
|
||||
.macro mcount_enter
|
||||
stmdb sp!, {r0-r3, lr}
|
||||
.endm
|
||||
|
||||
.macro mcount_get_lr reg
|
||||
ldr \reg, [sp, #20]
|
||||
.endm
|
||||
|
||||
.macro mcount_exit
|
||||
ldmia sp!, {r0-r3, ip, lr}
|
||||
mov pc, ip
|
||||
.endm
|
||||
|
||||
ENTRY(__gnu_mcount_nc)
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
mov ip, lr
|
||||
ldmia sp!, {lr}
|
||||
mov pc, ip
|
||||
#else
|
||||
__mcount
|
||||
#endif
|
||||
ENDPROC(__gnu_mcount_nc)
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
ENTRY(ftrace_caller)
|
||||
__ftrace_caller
|
||||
ENDPROC(ftrace_caller)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
ENTRY(ftrace_graph_caller)
|
||||
__ftrace_graph_caller
|
||||
ENDPROC(ftrace_graph_caller)
|
||||
#endif
|
||||
|
||||
.purgem mcount_enter
|
||||
.purgem mcount_get_lr
|
||||
.purgem mcount_exit
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
.globl return_to_handler
|
||||
return_to_handler:
|
||||
stmdb sp!, {r0-r3}
|
||||
mov r0, fp @ frame pointer
|
||||
bl ftrace_return_to_handler
|
||||
mov lr, r0 @ r0 has real ret addr
|
||||
ldmia sp!, {r0-r3}
|
||||
mov pc, lr
|
||||
#endif
|
||||
|
||||
ENTRY(ftrace_stub)
|
||||
.Lftrace_stub:
|
||||
|
@ -24,6 +24,7 @@
|
||||
#define NOP 0xe8bd4000 /* pop {lr} */
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
#ifdef CONFIG_OLD_MCOUNT
|
||||
#define OLD_MCOUNT_ADDR ((unsigned long) mcount)
|
||||
#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
|
||||
@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
|
||||
}
|
||||
#endif
|
||||
|
||||
/* construct a branch (BL) instruction to addr */
|
||||
#ifdef CONFIG_THUMB2_KERNEL
|
||||
static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
|
||||
static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
|
||||
bool link)
|
||||
{
|
||||
unsigned long s, j1, j2, i1, i2, imm10, imm11;
|
||||
unsigned long first, second;
|
||||
@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
|
||||
j2 = (!i2) ^ s;
|
||||
|
||||
first = 0xf000 | (s << 10) | imm10;
|
||||
second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11;
|
||||
second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
|
||||
if (link)
|
||||
second |= 1 << 14;
|
||||
|
||||
return (second << 16) | first;
|
||||
}
|
||||
#else
|
||||
static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
|
||||
static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
|
||||
bool link)
|
||||
{
|
||||
unsigned long opcode = 0xea000000;
|
||||
long offset;
|
||||
|
||||
if (link)
|
||||
opcode |= 1 << 24;
|
||||
|
||||
offset = (long)addr - (long)(pc + 8);
|
||||
if (unlikely(offset < -33554432 || offset > 33554428)) {
|
||||
/* Can't generate branches that far (from ARM ARM). Ftrace
|
||||
@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
|
||||
|
||||
offset = (offset >> 2) & 0x00ffffff;
|
||||
|
||||
return 0xeb000000 | offset;
|
||||
return opcode | offset;
|
||||
}
|
||||
#endif
|
||||
|
||||
static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
|
||||
{
|
||||
return ftrace_gen_branch(pc, addr, true);
|
||||
}
|
||||
|
||||
static int ftrace_modify_code(unsigned long pc, unsigned long old,
|
||||
unsigned long new)
|
||||
{
|
||||
@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data)
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
|
||||
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
||||
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
|
||||
unsigned long frame_pointer)
|
||||
{
|
||||
unsigned long return_hooker = (unsigned long) &return_to_handler;
|
||||
struct ftrace_graph_ent trace;
|
||||
unsigned long old;
|
||||
int err;
|
||||
|
||||
if (unlikely(atomic_read(¤t->tracing_graph_pause)))
|
||||
return;
|
||||
|
||||
old = *parent;
|
||||
*parent = return_hooker;
|
||||
|
||||
err = ftrace_push_return_trace(old, self_addr, &trace.depth,
|
||||
frame_pointer);
|
||||
if (err == -EBUSY) {
|
||||
*parent = old;
|
||||
return;
|
||||
}
|
||||
|
||||
trace.func = self_addr;
|
||||
|
||||
/* Only trace if the calling function expects to */
|
||||
if (!ftrace_graph_entry(&trace)) {
|
||||
current->curr_ret_stack--;
|
||||
*parent = old;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
extern unsigned long ftrace_graph_call;
|
||||
extern unsigned long ftrace_graph_call_old;
|
||||
extern void ftrace_graph_caller_old(void);
|
||||
|
||||
static int __ftrace_modify_caller(unsigned long *callsite,
|
||||
void (*func) (void), bool enable)
|
||||
{
|
||||
unsigned long caller_fn = (unsigned long) func;
|
||||
unsigned long pc = (unsigned long) callsite;
|
||||
unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
|
||||
unsigned long nop = 0xe1a00000; /* mov r0, r0 */
|
||||
unsigned long old = enable ? nop : branch;
|
||||
unsigned long new = enable ? branch : nop;
|
||||
|
||||
return ftrace_modify_code(pc, old, new);
|
||||
}
|
||||
|
||||
static int ftrace_modify_graph_caller(bool enable)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __ftrace_modify_caller(&ftrace_graph_call,
|
||||
ftrace_graph_caller,
|
||||
enable);
|
||||
|
||||
#ifdef CONFIG_OLD_MCOUNT
|
||||
if (!ret)
|
||||
ret = __ftrace_modify_caller(&ftrace_graph_call_old,
|
||||
ftrace_graph_caller_old,
|
||||
enable);
|
||||
#endif
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int ftrace_enable_ftrace_graph_caller(void)
|
||||
{
|
||||
return ftrace_modify_graph_caller(true);
|
||||
}
|
||||
|
||||
int ftrace_disable_ftrace_graph_caller(void)
|
||||
{
|
||||
return ftrace_modify_graph_caller(false);
|
||||
}
|
||||
#endif /* CONFIG_DYNAMIC_FTRACE */
|
||||
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/ftrace.h>
|
||||
|
||||
#include <asm/system.h>
|
||||
#include <asm/mach/irq.h>
|
||||
@ -105,7 +106,8 @@ unlock:
|
||||
* come via this function. Instead, they should provide their
|
||||
* own 'handler'
|
||||
*/
|
||||
asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
|
||||
asmlinkage void __exception_irq_entry
|
||||
asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
672
arch/arm/kernel/perf_event_v6.c
Normal file
672
arch/arm/kernel/perf_event_v6.c
Normal file
@ -0,0 +1,672 @@
|
||||
/*
|
||||
* ARMv6 Performance counter handling code.
|
||||
*
|
||||
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
|
||||
*
|
||||
* ARMv6 has 2 configurable performance counters and a single cycle counter.
|
||||
* They all share a single reset bit but can be written to zero so we can use
|
||||
* that for a reset.
|
||||
*
|
||||
* The counters can't be individually enabled or disabled so when we remove
|
||||
* one event and replace it with another we could get spurious counts from the
|
||||
* wrong event. However, we can take advantage of the fact that the
|
||||
* performance counters can export events to the event bus, and the event bus
|
||||
* itself can be monitored. This requires that we *don't* export the events to
|
||||
* the event bus. The procedure for disabling a configurable counter is:
|
||||
* - change the counter to count the ETMEXTOUT[0] signal (0x20). This
|
||||
* effectively stops the counter from counting.
|
||||
* - disable the counter's interrupt generation (each counter has it's
|
||||
* own interrupt enable bit).
|
||||
* Once stopped, the counter value can be written as 0 to reset.
|
||||
*
|
||||
* To enable a counter:
|
||||
* - enable the counter's interrupt generation.
|
||||
* - set the new event type.
|
||||
*
|
||||
* Note: the dedicated cycle counter only counts cycles and can't be
|
||||
* enabled/disabled independently of the others. When we want to disable the
|
||||
* cycle counter, we have to just disable the interrupt reporting and start
|
||||
* ignoring that counter. When re-enabling, we have to reset the value and
|
||||
* enable the interrupt.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_CPU_V6
|
||||
enum armv6_perf_types {
|
||||
ARMV6_PERFCTR_ICACHE_MISS = 0x0,
|
||||
ARMV6_PERFCTR_IBUF_STALL = 0x1,
|
||||
ARMV6_PERFCTR_DDEP_STALL = 0x2,
|
||||
ARMV6_PERFCTR_ITLB_MISS = 0x3,
|
||||
ARMV6_PERFCTR_DTLB_MISS = 0x4,
|
||||
ARMV6_PERFCTR_BR_EXEC = 0x5,
|
||||
ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
|
||||
ARMV6_PERFCTR_INSTR_EXEC = 0x7,
|
||||
ARMV6_PERFCTR_DCACHE_HIT = 0x9,
|
||||
ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
|
||||
ARMV6_PERFCTR_DCACHE_MISS = 0xB,
|
||||
ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
|
||||
ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
|
||||
ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
|
||||
ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
|
||||
ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
|
||||
ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
|
||||
ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
|
||||
ARMV6_PERFCTR_NOP = 0x20,
|
||||
};
|
||||
|
||||
enum armv6_counters {
|
||||
ARMV6_CYCLE_COUNTER = 1,
|
||||
ARMV6_COUNTER0,
|
||||
ARMV6_COUNTER1,
|
||||
};
|
||||
|
||||
/*
|
||||
* The hardware events that we support. We do support cache operations but
|
||||
* we have harvard caches and no way to combine instruction and data
|
||||
* accesses/misses in hardware.
|
||||
*/
|
||||
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
||||
};
|
||||
|
||||
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
/*
|
||||
* The performance counters don't differentiate between read
|
||||
* and write accesses/misses so this isn't strictly correct,
|
||||
* but it's the best we can do. Writes and reads get
|
||||
* combined.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
/*
|
||||
* The ARM performance counters can count micro DTLB misses,
|
||||
* micro ITLB misses and main TLB misses. There isn't an event
|
||||
* for TLB misses, so use the micro misses here and if users
|
||||
* want the main TLB misses they can use a raw counter.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
enum armv6mpcore_perf_types {
|
||||
ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
|
||||
ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
|
||||
ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
|
||||
ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
|
||||
ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
|
||||
ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
|
||||
ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
|
||||
ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
|
||||
ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
|
||||
ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
|
||||
ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
|
||||
ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
|
||||
ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
|
||||
ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
|
||||
ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
|
||||
};
|
||||
|
||||
/*
|
||||
* The hardware events that we support. We do support cache operations but
|
||||
* we have harvard caches and no way to combine instruction and data
|
||||
* accesses/misses in hardware.
|
||||
*/
|
||||
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
||||
};
|
||||
|
||||
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
/*
|
||||
* The ARM performance counters can count micro DTLB misses,
|
||||
* micro ITLB misses and main TLB misses. There isn't an event
|
||||
* for TLB misses, so use the micro misses here and if users
|
||||
* want the main TLB misses they can use a raw counter.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
static inline unsigned long
|
||||
armv6_pmcr_read(void)
|
||||
{
|
||||
u32 val;
|
||||
asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
armv6_pmcr_write(unsigned long val)
|
||||
{
|
||||
asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
|
||||
}
|
||||
|
||||
#define ARMV6_PMCR_ENABLE (1 << 0)
|
||||
#define ARMV6_PMCR_CTR01_RESET (1 << 1)
|
||||
#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
|
||||
#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
|
||||
#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
|
||||
#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
|
||||
#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
|
||||
#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
|
||||
#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
|
||||
#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
|
||||
#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
|
||||
#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
|
||||
#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
|
||||
#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
|
||||
|
||||
#define ARMV6_PMCR_OVERFLOWED_MASK \
|
||||
(ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
|
||||
ARMV6_PMCR_CCOUNT_OVERFLOW)
|
||||
|
||||
static inline int
|
||||
armv6_pmcr_has_overflowed(unsigned long pmcr)
|
||||
{
|
||||
return pmcr & ARMV6_PMCR_OVERFLOWED_MASK;
|
||||
}
|
||||
|
||||
static inline int
|
||||
armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
|
||||
enum armv6_counters counter)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == counter)
|
||||
ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
|
||||
else if (ARMV6_COUNTER0 == counter)
|
||||
ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
|
||||
else if (ARMV6_COUNTER1 == counter)
|
||||
ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
|
||||
else
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline u32
|
||||
armv6pmu_read_counter(int counter)
|
||||
{
|
||||
unsigned long value = 0;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == counter)
|
||||
asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
|
||||
else if (ARMV6_COUNTER0 == counter)
|
||||
asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
|
||||
else if (ARMV6_COUNTER1 == counter)
|
||||
asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
|
||||
else
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline void
|
||||
armv6pmu_write_counter(int counter,
|
||||
u32 value)
|
||||
{
|
||||
if (ARMV6_CYCLE_COUNTER == counter)
|
||||
asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
|
||||
else if (ARMV6_COUNTER0 == counter)
|
||||
asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
|
||||
else if (ARMV6_COUNTER1 == counter)
|
||||
asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
|
||||
else
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
|
||||
}
|
||||
|
||||
void
|
||||
armv6pmu_enable_event(struct hw_perf_event *hwc,
|
||||
int idx)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
mask = 0;
|
||||
evt = ARMV6_PMCR_CCOUNT_IEN;
|
||||
} else if (ARMV6_COUNTER0 == idx) {
|
||||
mask = ARMV6_PMCR_EVT_COUNT0_MASK;
|
||||
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
|
||||
ARMV6_PMCR_COUNT0_IEN;
|
||||
} else if (ARMV6_COUNTER1 == idx) {
|
||||
mask = ARMV6_PMCR_EVT_COUNT1_MASK;
|
||||
evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
|
||||
ARMV6_PMCR_COUNT1_IEN;
|
||||
} else {
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mask out the current event and set the counter to count the event
|
||||
* that we're interested in.
|
||||
*/
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
armv6_pmcr_write(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static irqreturn_t
|
||||
armv6pmu_handle_irq(int irq_num,
|
||||
void *dev)
|
||||
{
|
||||
unsigned long pmcr = armv6_pmcr_read();
|
||||
struct perf_sample_data data;
|
||||
struct cpu_hw_events *cpuc;
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
if (!armv6_pmcr_has_overflowed(pmcr))
|
||||
return IRQ_NONE;
|
||||
|
||||
regs = get_irq_regs();
|
||||
|
||||
/*
|
||||
* The interrupts are cleared by writing the overflow flags back to
|
||||
* the control register. All of the other bits don't have any effect
|
||||
* if they are rewritten, so write the whole value back.
|
||||
*/
|
||||
armv6_pmcr_write(pmcr);
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
for (idx = 0; idx <= armpmu->num_events; ++idx) {
|
||||
struct perf_event *event = cpuc->events[idx];
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We have a single interrupt for all counters. Check that
|
||||
* each counter has overflowed before we process it.
|
||||
*/
|
||||
if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
|
||||
continue;
|
||||
|
||||
hwc = &event->hw;
|
||||
armpmu_event_update(event, hwc, idx);
|
||||
data.period = event->hw.last_period;
|
||||
if (!armpmu_event_set_period(event, hwc, idx))
|
||||
continue;
|
||||
|
||||
if (perf_event_overflow(event, 0, &data, regs))
|
||||
armpmu->disable(hwc, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the pending perf events.
|
||||
*
|
||||
* Note: this call *must* be run with interrupts disabled. For
|
||||
* platforms that can have the PMU interrupts raised as an NMI, this
|
||||
* will not work.
|
||||
*/
|
||||
irq_work_run();
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void
|
||||
armv6pmu_start(void)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val |= ARMV6_PMCR_ENABLE;
|
||||
armv6_pmcr_write(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
armv6pmu_stop(void)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~ARMV6_PMCR_ENABLE;
|
||||
armv6_pmcr_write(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int
|
||||
armv6pmu_get_event_idx(struct cpu_hw_events *cpuc,
|
||||
struct hw_perf_event *event)
|
||||
{
|
||||
/* Always place a cycle counter into the cycle counter. */
|
||||
if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) {
|
||||
if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask))
|
||||
return -EAGAIN;
|
||||
|
||||
return ARMV6_CYCLE_COUNTER;
|
||||
} else {
|
||||
/*
|
||||
* For anything other than a cycle counter, try and use
|
||||
* counter0 and counter1.
|
||||
*/
|
||||
if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask))
|
||||
return ARMV6_COUNTER1;
|
||||
|
||||
if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask))
|
||||
return ARMV6_COUNTER0;
|
||||
|
||||
/* The counters are all in use. */
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
armv6pmu_disable_event(struct hw_perf_event *hwc,
|
||||
int idx)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
mask = ARMV6_PMCR_CCOUNT_IEN;
|
||||
evt = 0;
|
||||
} else if (ARMV6_COUNTER0 == idx) {
|
||||
mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK;
|
||||
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT;
|
||||
} else if (ARMV6_COUNTER1 == idx) {
|
||||
mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK;
|
||||
evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT;
|
||||
} else {
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Mask out the current event and set the counter to count the number
|
||||
* of ETM bus signal assertion cycles. The external reporting should
|
||||
* be disabled and so this should never increment.
|
||||
*/
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
armv6_pmcr_write(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc,
|
||||
int idx)
|
||||
{
|
||||
unsigned long val, mask, flags, evt = 0;
|
||||
|
||||
if (ARMV6_CYCLE_COUNTER == idx) {
|
||||
mask = ARMV6_PMCR_CCOUNT_IEN;
|
||||
} else if (ARMV6_COUNTER0 == idx) {
|
||||
mask = ARMV6_PMCR_COUNT0_IEN;
|
||||
} else if (ARMV6_COUNTER1 == idx) {
|
||||
mask = ARMV6_PMCR_COUNT1_IEN;
|
||||
} else {
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlike UP ARMv6, we don't have a way of stopping the counters. We
|
||||
* simply disable the interrupt reporting.
|
||||
*/
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = armv6_pmcr_read();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
armv6_pmcr_write(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static const struct arm_pmu armv6pmu = {
|
||||
.id = ARM_PERF_PMU_ID_V6,
|
||||
.name = "v6",
|
||||
.handle_irq = armv6pmu_handle_irq,
|
||||
.enable = armv6pmu_enable_event,
|
||||
.disable = armv6pmu_disable_event,
|
||||
.read_counter = armv6pmu_read_counter,
|
||||
.write_counter = armv6pmu_write_counter,
|
||||
.get_event_idx = armv6pmu_get_event_idx,
|
||||
.start = armv6pmu_start,
|
||||
.stop = armv6pmu_stop,
|
||||
.cache_map = &armv6_perf_cache_map,
|
||||
.event_map = &armv6_perf_map,
|
||||
.raw_event_mask = 0xFF,
|
||||
.num_events = 3,
|
||||
.max_period = (1LLU << 32) - 1,
|
||||
};
|
||||
|
||||
const struct arm_pmu *__init armv6pmu_init(void)
|
||||
{
|
||||
return &armv6pmu;
|
||||
}
|
||||
|
||||
/*
|
||||
* ARMv6mpcore is almost identical to single core ARMv6 with the exception
|
||||
* that some of the events have different enumerations and that there is no
|
||||
* *hack* to stop the programmable counters. To stop the counters we simply
|
||||
* disable the interrupt reporting and update the event. When unthrottling we
|
||||
* reset the period and enable the interrupt reporting.
|
||||
*/
|
||||
static const struct arm_pmu armv6mpcore_pmu = {
|
||||
.id = ARM_PERF_PMU_ID_V6MP,
|
||||
.name = "v6mpcore",
|
||||
.handle_irq = armv6pmu_handle_irq,
|
||||
.enable = armv6pmu_enable_event,
|
||||
.disable = armv6mpcore_pmu_disable_event,
|
||||
.read_counter = armv6pmu_read_counter,
|
||||
.write_counter = armv6pmu_write_counter,
|
||||
.get_event_idx = armv6pmu_get_event_idx,
|
||||
.start = armv6pmu_start,
|
||||
.stop = armv6pmu_stop,
|
||||
.cache_map = &armv6mpcore_perf_cache_map,
|
||||
.event_map = &armv6mpcore_perf_map,
|
||||
.raw_event_mask = 0xFF,
|
||||
.num_events = 3,
|
||||
.max_period = (1LLU << 32) - 1,
|
||||
};
|
||||
|
||||
const struct arm_pmu *__init armv6mpcore_pmu_init(void)
|
||||
{
|
||||
return &armv6mpcore_pmu;
|
||||
}
|
||||
#else
|
||||
const struct arm_pmu *__init armv6pmu_init(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct arm_pmu *__init armv6mpcore_pmu_init(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_CPU_V6 */
|
906
arch/arm/kernel/perf_event_v7.c
Normal file
906
arch/arm/kernel/perf_event_v7.c
Normal file
@ -0,0 +1,906 @@
|
||||
/*
|
||||
* ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code.
|
||||
*
|
||||
* ARMv7 support: Jean Pihet <jpihet@mvista.com>
|
||||
* 2010 (c) MontaVista Software, LLC.
|
||||
*
|
||||
* Copied from ARMv6 code, with the low level code inspired
|
||||
* by the ARMv7 Oprofile code.
|
||||
*
|
||||
* Cortex-A8 has up to 4 configurable performance counters and
|
||||
* a single cycle counter.
|
||||
* Cortex-A9 has up to 31 configurable performance counters and
|
||||
* a single cycle counter.
|
||||
*
|
||||
* All counters can be enabled/disabled and IRQ masked separately. The cycle
|
||||
* counter and all 4 performance counters together can be reset separately.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_CPU_V7
|
||||
/* Common ARMv7 event types */
|
||||
enum armv7_perf_types {
|
||||
ARMV7_PERFCTR_PMNC_SW_INCR = 0x00,
|
||||
ARMV7_PERFCTR_IFETCH_MISS = 0x01,
|
||||
ARMV7_PERFCTR_ITLB_MISS = 0x02,
|
||||
ARMV7_PERFCTR_DCACHE_REFILL = 0x03,
|
||||
ARMV7_PERFCTR_DCACHE_ACCESS = 0x04,
|
||||
ARMV7_PERFCTR_DTLB_REFILL = 0x05,
|
||||
ARMV7_PERFCTR_DREAD = 0x06,
|
||||
ARMV7_PERFCTR_DWRITE = 0x07,
|
||||
|
||||
ARMV7_PERFCTR_EXC_TAKEN = 0x09,
|
||||
ARMV7_PERFCTR_EXC_EXECUTED = 0x0A,
|
||||
ARMV7_PERFCTR_CID_WRITE = 0x0B,
|
||||
/* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS.
|
||||
* It counts:
|
||||
* - all branch instructions,
|
||||
* - instructions that explicitly write the PC,
|
||||
* - exception generating instructions.
|
||||
*/
|
||||
ARMV7_PERFCTR_PC_WRITE = 0x0C,
|
||||
ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D,
|
||||
ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F,
|
||||
ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10,
|
||||
ARMV7_PERFCTR_CLOCK_CYCLES = 0x11,
|
||||
|
||||
ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12,
|
||||
|
||||
ARMV7_PERFCTR_CPU_CYCLES = 0xFF
|
||||
};
|
||||
|
||||
/* ARMv7 Cortex-A8 specific event types */
|
||||
enum armv7_a8_perf_types {
|
||||
ARMV7_PERFCTR_INSTR_EXECUTED = 0x08,
|
||||
|
||||
ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E,
|
||||
|
||||
ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40,
|
||||
ARMV7_PERFCTR_L2_STORE_MERGED = 0x41,
|
||||
ARMV7_PERFCTR_L2_STORE_BUFF = 0x42,
|
||||
ARMV7_PERFCTR_L2_ACCESS = 0x43,
|
||||
ARMV7_PERFCTR_L2_CACH_MISS = 0x44,
|
||||
ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45,
|
||||
ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46,
|
||||
ARMV7_PERFCTR_MEMORY_REPLAY = 0x47,
|
||||
ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48,
|
||||
ARMV7_PERFCTR_L1_DATA_MISS = 0x49,
|
||||
ARMV7_PERFCTR_L1_INST_MISS = 0x4A,
|
||||
ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B,
|
||||
ARMV7_PERFCTR_L1_NEON_DATA = 0x4C,
|
||||
ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D,
|
||||
ARMV7_PERFCTR_L2_NEON = 0x4E,
|
||||
ARMV7_PERFCTR_L2_NEON_HIT = 0x4F,
|
||||
ARMV7_PERFCTR_L1_INST = 0x50,
|
||||
ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51,
|
||||
ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52,
|
||||
ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53,
|
||||
ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54,
|
||||
ARMV7_PERFCTR_OP_EXECUTED = 0x55,
|
||||
ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56,
|
||||
ARMV7_PERFCTR_CYCLES_INST = 0x57,
|
||||
ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58,
|
||||
ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59,
|
||||
ARMV7_PERFCTR_NEON_CYCLES = 0x5A,
|
||||
|
||||
ARMV7_PERFCTR_PMU0_EVENTS = 0x70,
|
||||
ARMV7_PERFCTR_PMU1_EVENTS = 0x71,
|
||||
ARMV7_PERFCTR_PMU_EVENTS = 0x72,
|
||||
};
|
||||
|
||||
/* ARMv7 Cortex-A9 specific event types */
|
||||
enum armv7_a9_perf_types {
|
||||
ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40,
|
||||
ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41,
|
||||
ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42,
|
||||
|
||||
ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50,
|
||||
ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51,
|
||||
|
||||
ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60,
|
||||
ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61,
|
||||
ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62,
|
||||
ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63,
|
||||
ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64,
|
||||
ARMV7_PERFCTR_DATA_EVICTION = 0x65,
|
||||
ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66,
|
||||
ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67,
|
||||
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68,
|
||||
|
||||
ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E,
|
||||
|
||||
ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70,
|
||||
ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71,
|
||||
ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72,
|
||||
ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73,
|
||||
ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74,
|
||||
|
||||
ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80,
|
||||
ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81,
|
||||
ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82,
|
||||
ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83,
|
||||
ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84,
|
||||
ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85,
|
||||
ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86,
|
||||
|
||||
ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A,
|
||||
ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B,
|
||||
|
||||
ARMV7_PERFCTR_ISB_INST = 0x90,
|
||||
ARMV7_PERFCTR_DSB_INST = 0x91,
|
||||
ARMV7_PERFCTR_DMB_INST = 0x92,
|
||||
ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93,
|
||||
|
||||
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0,
|
||||
ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1,
|
||||
ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2,
|
||||
ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3,
|
||||
ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4,
|
||||
ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5
|
||||
};
|
||||
|
||||
/*
|
||||
* Cortex-A8 HW events mapping
|
||||
*
|
||||
* The hardware events that we support. We do support cache operations but
|
||||
* we have harvard caches and no way to combine instruction and data
|
||||
* accesses/misses in hardware.
|
||||
*/
|
||||
static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
|
||||
};
|
||||
|
||||
static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
/*
|
||||
* The performance counters don't differentiate between read
|
||||
* and write accesses/misses so this isn't strictly correct,
|
||||
* but it's the best we can do. Writes and reads get
|
||||
* combined.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
/*
|
||||
* Only ITLB misses and DTLB refills are supported.
|
||||
* If users want the DTLB refills misses a raw counter
|
||||
* must be used.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
|
||||
[C(RESULT_MISS)]
|
||||
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
|
||||
[C(RESULT_MISS)]
|
||||
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Cortex-A9 HW events mapping
|
||||
*/
|
||||
static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] =
|
||||
ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES,
|
||||
};
|
||||
|
||||
static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
/*
|
||||
* The performance counters don't differentiate between read
|
||||
* and write accesses/misses so this isn't strictly correct,
|
||||
* but it's the best we can do. Writes and reads get
|
||||
* combined.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
/*
|
||||
* Only ITLB misses and DTLB refills are supported.
|
||||
* If users want the DTLB refills misses a raw counter
|
||||
* must be used.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
|
||||
[C(RESULT_MISS)]
|
||||
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE,
|
||||
[C(RESULT_MISS)]
|
||||
= ARMV7_PERFCTR_PC_BRANCH_MIS_PRED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/*
|
||||
* Perf Events counters
|
||||
*/
|
||||
enum armv7_counters {
|
||||
ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */
|
||||
ARMV7_COUNTER0 = 2, /* First event counter */
|
||||
};
|
||||
|
||||
/*
|
||||
* The cycle counter is ARMV7_CYCLE_COUNTER.
|
||||
* The first event counter is ARMV7_COUNTER0.
|
||||
* The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1).
|
||||
*/
|
||||
#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1)
|
||||
|
||||
/*
|
||||
* ARMv7 low level PMNC access
|
||||
*/
|
||||
|
||||
/*
|
||||
* Per-CPU PMNC: config reg
|
||||
*/
|
||||
#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */
|
||||
#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */
|
||||
#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */
|
||||
#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */
|
||||
#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */
|
||||
#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/
|
||||
#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */
|
||||
#define ARMV7_PMNC_N_MASK 0x1f
|
||||
#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */
|
||||
|
||||
/*
|
||||
* Available counters
|
||||
*/
|
||||
#define ARMV7_CNT0 0 /* First event counter */
|
||||
#define ARMV7_CCNT 31 /* Cycle counter */
|
||||
|
||||
/* Perf Event to low level counters mapping */
|
||||
#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0)
|
||||
|
||||
/*
|
||||
* CNTENS: counters enable reg
|
||||
*/
|
||||
#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
|
||||
#define ARMV7_CNTENS_C (1 << ARMV7_CCNT)
|
||||
|
||||
/*
|
||||
* CNTENC: counters disable reg
|
||||
*/
|
||||
#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
|
||||
#define ARMV7_CNTENC_C (1 << ARMV7_CCNT)
|
||||
|
||||
/*
|
||||
* INTENS: counters overflow interrupt enable reg
|
||||
*/
|
||||
#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
|
||||
#define ARMV7_INTENS_C (1 << ARMV7_CCNT)
|
||||
|
||||
/*
|
||||
* INTENC: counters overflow interrupt disable reg
|
||||
*/
|
||||
#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
|
||||
#define ARMV7_INTENC_C (1 << ARMV7_CCNT)
|
||||
|
||||
/*
|
||||
* EVTSEL: Event selection reg
|
||||
*/
|
||||
#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */
|
||||
|
||||
/*
|
||||
* SELECT: Counter selection reg
|
||||
*/
|
||||
#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */
|
||||
|
||||
/*
|
||||
* FLAG: counters overflow flag status reg
|
||||
*/
|
||||
#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx))
|
||||
#define ARMV7_FLAG_C (1 << ARMV7_CCNT)
|
||||
#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */
|
||||
#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK
|
||||
|
||||
static inline unsigned long armv7_pmnc_read(void)
|
||||
{
|
||||
u32 val;
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void armv7_pmnc_write(unsigned long val)
|
||||
{
|
||||
val &= ARMV7_PMNC_MASK;
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val));
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_has_overflowed(unsigned long pmnc)
|
||||
{
|
||||
return pmnc & ARMV7_OVERFLOWED_MASK;
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc,
|
||||
enum armv7_counters counter)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (counter == ARMV7_CYCLE_COUNTER)
|
||||
ret = pmnc & ARMV7_FLAG_C;
|
||||
else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST))
|
||||
ret = pmnc & ARMV7_FLAG_P(counter);
|
||||
else
|
||||
pr_err("CPU%u checking wrong counter %d overflow status\n",
|
||||
smp_processor_id(), counter);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int armv7_pmnc_select_counter(unsigned int idx)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) {
|
||||
pr_err("CPU%u selecting wrong PMNC counter"
|
||||
" %d\n", smp_processor_id(), idx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK;
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val));
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline u32 armv7pmu_read_counter(int idx)
|
||||
{
|
||||
unsigned long value = 0;
|
||||
|
||||
if (idx == ARMV7_CYCLE_COUNTER)
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value));
|
||||
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
|
||||
if (armv7_pmnc_select_counter(idx) == idx)
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 2"
|
||||
: "=r" (value));
|
||||
} else
|
||||
pr_err("CPU%u reading wrong counter %d\n",
|
||||
smp_processor_id(), idx);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
static inline void armv7pmu_write_counter(int idx, u32 value)
|
||||
{
|
||||
if (idx == ARMV7_CYCLE_COUNTER)
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value));
|
||||
else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) {
|
||||
if (armv7_pmnc_select_counter(idx) == idx)
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 2"
|
||||
: : "r" (value));
|
||||
} else
|
||||
pr_err("CPU%u writing wrong counter %d\n",
|
||||
smp_processor_id(), idx);
|
||||
}
|
||||
|
||||
static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val)
|
||||
{
|
||||
if (armv7_pmnc_select_counter(idx) == idx) {
|
||||
val &= ARMV7_EVTSEL_MASK;
|
||||
asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val));
|
||||
}
|
||||
}
|
||||
|
||||
static inline u32 armv7_pmnc_enable_counter(unsigned int idx)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if ((idx != ARMV7_CYCLE_COUNTER) &&
|
||||
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
|
||||
pr_err("CPU%u enabling wrong PMNC counter"
|
||||
" %d\n", smp_processor_id(), idx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (idx == ARMV7_CYCLE_COUNTER)
|
||||
val = ARMV7_CNTENS_C;
|
||||
else
|
||||
val = ARMV7_CNTENS_P(idx);
|
||||
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val));
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline u32 armv7_pmnc_disable_counter(unsigned int idx)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
|
||||
if ((idx != ARMV7_CYCLE_COUNTER) &&
|
||||
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
|
||||
pr_err("CPU%u disabling wrong PMNC counter"
|
||||
" %d\n", smp_processor_id(), idx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (idx == ARMV7_CYCLE_COUNTER)
|
||||
val = ARMV7_CNTENC_C;
|
||||
else
|
||||
val = ARMV7_CNTENC_P(idx);
|
||||
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val));
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline u32 armv7_pmnc_enable_intens(unsigned int idx)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if ((idx != ARMV7_CYCLE_COUNTER) &&
|
||||
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
|
||||
pr_err("CPU%u enabling wrong PMNC counter"
|
||||
" interrupt enable %d\n", smp_processor_id(), idx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (idx == ARMV7_CYCLE_COUNTER)
|
||||
val = ARMV7_INTENS_C;
|
||||
else
|
||||
val = ARMV7_INTENS_P(idx);
|
||||
|
||||
asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val));
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline u32 armv7_pmnc_disable_intens(unsigned int idx)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
if ((idx != ARMV7_CYCLE_COUNTER) &&
|
||||
((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) {
|
||||
pr_err("CPU%u disabling wrong PMNC counter"
|
||||
" interrupt enable %d\n", smp_processor_id(), idx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (idx == ARMV7_CYCLE_COUNTER)
|
||||
val = ARMV7_INTENC_C;
|
||||
else
|
||||
val = ARMV7_INTENC_P(idx);
|
||||
|
||||
asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val));
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
static inline u32 armv7_pmnc_getreset_flags(void)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
/* Read */
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
|
||||
|
||||
/* Write to clear flags */
|
||||
val &= ARMV7_FLAG_MASK;
|
||||
asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val));
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
static void armv7_pmnc_dump_regs(void)
|
||||
{
|
||||
u32 val;
|
||||
unsigned int cnt;
|
||||
|
||||
printk(KERN_INFO "PMNC registers dump:\n");
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val));
|
||||
printk(KERN_INFO "PMNC =0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val));
|
||||
printk(KERN_INFO "CNTENS=0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val));
|
||||
printk(KERN_INFO "INTENS=0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val));
|
||||
printk(KERN_INFO "FLAGS =0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val));
|
||||
printk(KERN_INFO "SELECT=0x%08x\n", val);
|
||||
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val));
|
||||
printk(KERN_INFO "CCNT =0x%08x\n", val);
|
||||
|
||||
for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) {
|
||||
armv7_pmnc_select_counter(cnt);
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val));
|
||||
printk(KERN_INFO "CNT[%d] count =0x%08x\n",
|
||||
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
|
||||
asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val));
|
||||
printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n",
|
||||
cnt-ARMV7_EVENT_CNT_TO_CNTx, val);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Enable counter and interrupt, and set the counter to count
|
||||
* the event that we're interested in.
|
||||
*/
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
|
||||
/*
|
||||
* Disable counter
|
||||
*/
|
||||
armv7_pmnc_disable_counter(idx);
|
||||
|
||||
/*
|
||||
* Set event (if destined for PMNx counters)
|
||||
* We don't need to set the event if it's a cycle count
|
||||
*/
|
||||
if (idx != ARMV7_CYCLE_COUNTER)
|
||||
armv7_pmnc_write_evtsel(idx, hwc->config_base);
|
||||
|
||||
/*
|
||||
* Enable interrupt for this counter
|
||||
*/
|
||||
armv7_pmnc_enable_intens(idx);
|
||||
|
||||
/*
|
||||
* Enable counter
|
||||
*/
|
||||
armv7_pmnc_enable_counter(idx);
|
||||
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Disable counter and interrupt
|
||||
*/
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
|
||||
/*
|
||||
* Disable counter
|
||||
*/
|
||||
armv7_pmnc_disable_counter(idx);
|
||||
|
||||
/*
|
||||
* Disable interrupt for this counter
|
||||
*/
|
||||
armv7_pmnc_disable_intens(idx);
|
||||
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev)
|
||||
{
|
||||
unsigned long pmnc;
|
||||
struct perf_sample_data data;
|
||||
struct cpu_hw_events *cpuc;
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
/*
|
||||
* Get and reset the IRQ flags
|
||||
*/
|
||||
pmnc = armv7_pmnc_getreset_flags();
|
||||
|
||||
/*
|
||||
* Did an overflow occur?
|
||||
*/
|
||||
if (!armv7_pmnc_has_overflowed(pmnc))
|
||||
return IRQ_NONE;
|
||||
|
||||
/*
|
||||
* Handle the counter(s) overflow(s)
|
||||
*/
|
||||
regs = get_irq_regs();
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
for (idx = 0; idx <= armpmu->num_events; ++idx) {
|
||||
struct perf_event *event = cpuc->events[idx];
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We have a single interrupt for all counters. Check that
|
||||
* each counter has overflowed before we process it.
|
||||
*/
|
||||
if (!armv7_pmnc_counter_has_overflowed(pmnc, idx))
|
||||
continue;
|
||||
|
||||
hwc = &event->hw;
|
||||
armpmu_event_update(event, hwc, idx);
|
||||
data.period = event->hw.last_period;
|
||||
if (!armpmu_event_set_period(event, hwc, idx))
|
||||
continue;
|
||||
|
||||
if (perf_event_overflow(event, 0, &data, regs))
|
||||
armpmu->disable(hwc, idx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the pending perf events.
|
||||
*
|
||||
* Note: this call *must* be run with interrupts disabled. For
|
||||
* platforms that can have the PMU interrupts raised as an NMI, this
|
||||
* will not work.
|
||||
*/
|
||||
irq_work_run();
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void armv7pmu_start(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
/* Enable all counters */
|
||||
armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void armv7pmu_stop(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
/* Disable all counters */
|
||||
armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc,
|
||||
struct hw_perf_event *event)
|
||||
{
|
||||
int idx;
|
||||
|
||||
/* Always place a cycle counter into the cycle counter. */
|
||||
if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) {
|
||||
if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask))
|
||||
return -EAGAIN;
|
||||
|
||||
return ARMV7_CYCLE_COUNTER;
|
||||
} else {
|
||||
/*
|
||||
* For anything other than a cycle counter, try and use
|
||||
* the events counters
|
||||
*/
|
||||
for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) {
|
||||
if (!test_and_set_bit(idx, cpuc->used_mask))
|
||||
return idx;
|
||||
}
|
||||
|
||||
/* The counters are all in use. */
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
static struct arm_pmu armv7pmu = {
|
||||
.handle_irq = armv7pmu_handle_irq,
|
||||
.enable = armv7pmu_enable_event,
|
||||
.disable = armv7pmu_disable_event,
|
||||
.read_counter = armv7pmu_read_counter,
|
||||
.write_counter = armv7pmu_write_counter,
|
||||
.get_event_idx = armv7pmu_get_event_idx,
|
||||
.start = armv7pmu_start,
|
||||
.stop = armv7pmu_stop,
|
||||
.raw_event_mask = 0xFF,
|
||||
.max_period = (1LLU << 32) - 1,
|
||||
};
|
||||
|
||||
static u32 __init armv7_reset_read_pmnc(void)
|
||||
{
|
||||
u32 nb_cnt;
|
||||
|
||||
/* Initialize & Reset PMNC: C and P bits */
|
||||
armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C);
|
||||
|
||||
/* Read the nb of CNTx counters supported from PMNC */
|
||||
nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK;
|
||||
|
||||
/* Add the CPU cycles counter and return */
|
||||
return nb_cnt + 1;
|
||||
}
|
||||
|
||||
const struct arm_pmu *__init armv7_a8_pmu_init(void)
|
||||
{
|
||||
armv7pmu.id = ARM_PERF_PMU_ID_CA8;
|
||||
armv7pmu.name = "ARMv7 Cortex-A8";
|
||||
armv7pmu.cache_map = &armv7_a8_perf_cache_map;
|
||||
armv7pmu.event_map = &armv7_a8_perf_map;
|
||||
armv7pmu.num_events = armv7_reset_read_pmnc();
|
||||
return &armv7pmu;
|
||||
}
|
||||
|
||||
const struct arm_pmu *__init armv7_a9_pmu_init(void)
|
||||
{
|
||||
armv7pmu.id = ARM_PERF_PMU_ID_CA9;
|
||||
armv7pmu.name = "ARMv7 Cortex-A9";
|
||||
armv7pmu.cache_map = &armv7_a9_perf_cache_map;
|
||||
armv7pmu.event_map = &armv7_a9_perf_map;
|
||||
armv7pmu.num_events = armv7_reset_read_pmnc();
|
||||
return &armv7pmu;
|
||||
}
|
||||
#else
|
||||
const struct arm_pmu *__init armv7_a8_pmu_init(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct arm_pmu *__init armv7_a9_pmu_init(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_CPU_V7 */
|
807
arch/arm/kernel/perf_event_xscale.c
Normal file
807
arch/arm/kernel/perf_event_xscale.c
Normal file
@ -0,0 +1,807 @@
|
||||
/*
|
||||
* ARMv5 [xscale] Performance counter handling code.
|
||||
*
|
||||
* Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com>
|
||||
*
|
||||
* Based on the previous xscale OProfile code.
|
||||
*
|
||||
* There are two variants of the xscale PMU that we support:
|
||||
* - xscale1pmu: 2 event counters and a cycle counter
|
||||
* - xscale2pmu: 4 event counters and a cycle counter
|
||||
* The two variants share event definitions, but have different
|
||||
* PMU structures.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_CPU_XSCALE
|
||||
enum xscale_perf_types {
|
||||
XSCALE_PERFCTR_ICACHE_MISS = 0x00,
|
||||
XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01,
|
||||
XSCALE_PERFCTR_DATA_STALL = 0x02,
|
||||
XSCALE_PERFCTR_ITLB_MISS = 0x03,
|
||||
XSCALE_PERFCTR_DTLB_MISS = 0x04,
|
||||
XSCALE_PERFCTR_BRANCH = 0x05,
|
||||
XSCALE_PERFCTR_BRANCH_MISS = 0x06,
|
||||
XSCALE_PERFCTR_INSTRUCTION = 0x07,
|
||||
XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08,
|
||||
XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09,
|
||||
XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A,
|
||||
XSCALE_PERFCTR_DCACHE_MISS = 0x0B,
|
||||
XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C,
|
||||
XSCALE_PERFCTR_PC_CHANGED = 0x0D,
|
||||
XSCALE_PERFCTR_BCU_REQUEST = 0x10,
|
||||
XSCALE_PERFCTR_BCU_FULL = 0x11,
|
||||
XSCALE_PERFCTR_BCU_DRAIN = 0x12,
|
||||
XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14,
|
||||
XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15,
|
||||
XSCALE_PERFCTR_RMW = 0x16,
|
||||
/* XSCALE_PERFCTR_CCNT is not hardware defined */
|
||||
XSCALE_PERFCTR_CCNT = 0xFE,
|
||||
XSCALE_PERFCTR_UNUSED = 0xFF,
|
||||
};
|
||||
|
||||
enum xscale_counters {
|
||||
XSCALE_CYCLE_COUNTER = 1,
|
||||
XSCALE_COUNTER0,
|
||||
XSCALE_COUNTER1,
|
||||
XSCALE_COUNTER2,
|
||||
XSCALE_COUNTER3,
|
||||
};
|
||||
|
||||
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
||||
};
|
||||
|
||||
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
#define XSCALE_PMU_ENABLE 0x001
|
||||
#define XSCALE_PMN_RESET 0x002
|
||||
#define XSCALE_CCNT_RESET 0x004
|
||||
#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET)
|
||||
#define XSCALE_PMU_CNT64 0x008
|
||||
|
||||
#define XSCALE1_OVERFLOWED_MASK 0x700
|
||||
#define XSCALE1_CCOUNT_OVERFLOW 0x400
|
||||
#define XSCALE1_COUNT0_OVERFLOW 0x100
|
||||
#define XSCALE1_COUNT1_OVERFLOW 0x200
|
||||
#define XSCALE1_CCOUNT_INT_EN 0x040
|
||||
#define XSCALE1_COUNT0_INT_EN 0x010
|
||||
#define XSCALE1_COUNT1_INT_EN 0x020
|
||||
#define XSCALE1_COUNT0_EVT_SHFT 12
|
||||
#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT)
|
||||
#define XSCALE1_COUNT1_EVT_SHFT 20
|
||||
#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT)
|
||||
|
||||
static inline u32
|
||||
xscale1pmu_read_pmnc(void)
|
||||
{
|
||||
u32 val;
|
||||
asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xscale1pmu_write_pmnc(u32 val)
|
||||
{
|
||||
/* upper 4bits and 7, 11 are write-as-0 */
|
||||
val &= 0xffff77f;
|
||||
asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val));
|
||||
}
|
||||
|
||||
static inline int
|
||||
xscale1_pmnc_counter_has_overflowed(unsigned long pmnc,
|
||||
enum xscale_counters counter)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (counter) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
ret = pmnc & XSCALE1_CCOUNT_OVERFLOW;
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
ret = pmnc & XSCALE1_COUNT0_OVERFLOW;
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
ret = pmnc & XSCALE1_COUNT1_OVERFLOW;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static irqreturn_t
|
||||
xscale1pmu_handle_irq(int irq_num, void *dev)
|
||||
{
|
||||
unsigned long pmnc;
|
||||
struct perf_sample_data data;
|
||||
struct cpu_hw_events *cpuc;
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
/*
|
||||
* NOTE: there's an A stepping erratum that states if an overflow
|
||||
* bit already exists and another occurs, the previous
|
||||
* Overflow bit gets cleared. There's no workaround.
|
||||
* Fixed in B stepping or later.
|
||||
*/
|
||||
pmnc = xscale1pmu_read_pmnc();
|
||||
|
||||
/*
|
||||
* Write the value back to clear the overflow flags. Overflow
|
||||
* flags remain in pmnc for use below. We also disable the PMU
|
||||
* while we process the interrupt.
|
||||
*/
|
||||
xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
|
||||
|
||||
if (!(pmnc & XSCALE1_OVERFLOWED_MASK))
|
||||
return IRQ_NONE;
|
||||
|
||||
regs = get_irq_regs();
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
for (idx = 0; idx <= armpmu->num_events; ++idx) {
|
||||
struct perf_event *event = cpuc->events[idx];
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx))
|
||||
continue;
|
||||
|
||||
hwc = &event->hw;
|
||||
armpmu_event_update(event, hwc, idx);
|
||||
data.period = event->hw.last_period;
|
||||
if (!armpmu_event_set_period(event, hwc, idx))
|
||||
continue;
|
||||
|
||||
if (perf_event_overflow(event, 0, &data, regs))
|
||||
armpmu->disable(hwc, idx);
|
||||
}
|
||||
|
||||
irq_work_run();
|
||||
|
||||
/*
|
||||
* Re-enable the PMU.
|
||||
*/
|
||||
pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE;
|
||||
xscale1pmu_write_pmnc(pmnc);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void
|
||||
xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
|
||||
switch (idx) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
mask = 0;
|
||||
evt = XSCALE1_CCOUNT_INT_EN;
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
mask = XSCALE1_COUNT0_EVT_MASK;
|
||||
evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) |
|
||||
XSCALE1_COUNT0_INT_EN;
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
mask = XSCALE1_COUNT1_EVT_MASK;
|
||||
evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) |
|
||||
XSCALE1_COUNT1_INT_EN;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
unsigned long val, mask, evt, flags;
|
||||
|
||||
switch (idx) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
mask = XSCALE1_CCOUNT_INT_EN;
|
||||
evt = 0;
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK;
|
||||
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT;
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK;
|
||||
evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val &= ~mask;
|
||||
val |= evt;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int
|
||||
xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc,
|
||||
struct hw_perf_event *event)
|
||||
{
|
||||
if (XSCALE_PERFCTR_CCNT == event->config_base) {
|
||||
if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask))
|
||||
return -EAGAIN;
|
||||
|
||||
return XSCALE_CYCLE_COUNTER;
|
||||
} else {
|
||||
if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask))
|
||||
return XSCALE_COUNTER1;
|
||||
|
||||
if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask))
|
||||
return XSCALE_COUNTER0;
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
xscale1pmu_start(void)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val |= XSCALE_PMU_ENABLE;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
xscale1pmu_stop(void)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = xscale1pmu_read_pmnc();
|
||||
val &= ~XSCALE_PMU_ENABLE;
|
||||
xscale1pmu_write_pmnc(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
xscale1pmu_read_counter(int counter)
|
||||
{
|
||||
u32 val = 0;
|
||||
|
||||
switch (counter) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val));
|
||||
break;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xscale1pmu_write_counter(int counter, u32 val)
|
||||
{
|
||||
switch (counter) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct arm_pmu xscale1pmu = {
|
||||
.id = ARM_PERF_PMU_ID_XSCALE1,
|
||||
.name = "xscale1",
|
||||
.handle_irq = xscale1pmu_handle_irq,
|
||||
.enable = xscale1pmu_enable_event,
|
||||
.disable = xscale1pmu_disable_event,
|
||||
.read_counter = xscale1pmu_read_counter,
|
||||
.write_counter = xscale1pmu_write_counter,
|
||||
.get_event_idx = xscale1pmu_get_event_idx,
|
||||
.start = xscale1pmu_start,
|
||||
.stop = xscale1pmu_stop,
|
||||
.cache_map = &xscale_perf_cache_map,
|
||||
.event_map = &xscale_perf_map,
|
||||
.raw_event_mask = 0xFF,
|
||||
.num_events = 3,
|
||||
.max_period = (1LLU << 32) - 1,
|
||||
};
|
||||
|
||||
const struct arm_pmu *__init xscale1pmu_init(void)
|
||||
{
|
||||
return &xscale1pmu;
|
||||
}
|
||||
|
||||
#define XSCALE2_OVERFLOWED_MASK 0x01f
|
||||
#define XSCALE2_CCOUNT_OVERFLOW 0x001
|
||||
#define XSCALE2_COUNT0_OVERFLOW 0x002
|
||||
#define XSCALE2_COUNT1_OVERFLOW 0x004
|
||||
#define XSCALE2_COUNT2_OVERFLOW 0x008
|
||||
#define XSCALE2_COUNT3_OVERFLOW 0x010
|
||||
#define XSCALE2_CCOUNT_INT_EN 0x001
|
||||
#define XSCALE2_COUNT0_INT_EN 0x002
|
||||
#define XSCALE2_COUNT1_INT_EN 0x004
|
||||
#define XSCALE2_COUNT2_INT_EN 0x008
|
||||
#define XSCALE2_COUNT3_INT_EN 0x010
|
||||
#define XSCALE2_COUNT0_EVT_SHFT 0
|
||||
#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT)
|
||||
#define XSCALE2_COUNT1_EVT_SHFT 8
|
||||
#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT)
|
||||
#define XSCALE2_COUNT2_EVT_SHFT 16
|
||||
#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT)
|
||||
#define XSCALE2_COUNT3_EVT_SHFT 24
|
||||
#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT)
|
||||
|
||||
static inline u32
|
||||
xscale2pmu_read_pmnc(void)
|
||||
{
|
||||
u32 val;
|
||||
asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val));
|
||||
/* bits 1-2 and 4-23 are read-unpredictable */
|
||||
return val & 0xff000009;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xscale2pmu_write_pmnc(u32 val)
|
||||
{
|
||||
/* bits 4-23 are write-as-0, 24-31 are write ignored */
|
||||
val &= 0xf;
|
||||
asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val));
|
||||
}
|
||||
|
||||
static inline u32
|
||||
xscale2pmu_read_overflow_flags(void)
|
||||
{
|
||||
u32 val;
|
||||
asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xscale2pmu_write_overflow_flags(u32 val)
|
||||
{
|
||||
asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val));
|
||||
}
|
||||
|
||||
static inline u32
|
||||
xscale2pmu_read_event_select(void)
|
||||
{
|
||||
u32 val;
|
||||
asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xscale2pmu_write_event_select(u32 val)
|
||||
{
|
||||
asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val));
|
||||
}
|
||||
|
||||
static inline u32
|
||||
xscale2pmu_read_int_enable(void)
|
||||
{
|
||||
u32 val;
|
||||
asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val));
|
||||
return val;
|
||||
}
|
||||
|
||||
static void
|
||||
xscale2pmu_write_int_enable(u32 val)
|
||||
{
|
||||
asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val));
|
||||
}
|
||||
|
||||
static inline int
|
||||
xscale2_pmnc_counter_has_overflowed(unsigned long of_flags,
|
||||
enum xscale_counters counter)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (counter) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
ret = of_flags & XSCALE2_CCOUNT_OVERFLOW;
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
ret = of_flags & XSCALE2_COUNT0_OVERFLOW;
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
ret = of_flags & XSCALE2_COUNT1_OVERFLOW;
|
||||
break;
|
||||
case XSCALE_COUNTER2:
|
||||
ret = of_flags & XSCALE2_COUNT2_OVERFLOW;
|
||||
break;
|
||||
case XSCALE_COUNTER3:
|
||||
ret = of_flags & XSCALE2_COUNT3_OVERFLOW;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", counter);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static irqreturn_t
|
||||
xscale2pmu_handle_irq(int irq_num, void *dev)
|
||||
{
|
||||
unsigned long pmnc, of_flags;
|
||||
struct perf_sample_data data;
|
||||
struct cpu_hw_events *cpuc;
|
||||
struct pt_regs *regs;
|
||||
int idx;
|
||||
|
||||
/* Disable the PMU. */
|
||||
pmnc = xscale2pmu_read_pmnc();
|
||||
xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE);
|
||||
|
||||
/* Check the overflow flag register. */
|
||||
of_flags = xscale2pmu_read_overflow_flags();
|
||||
if (!(of_flags & XSCALE2_OVERFLOWED_MASK))
|
||||
return IRQ_NONE;
|
||||
|
||||
/* Clear the overflow bits. */
|
||||
xscale2pmu_write_overflow_flags(of_flags);
|
||||
|
||||
regs = get_irq_regs();
|
||||
|
||||
perf_sample_data_init(&data, 0);
|
||||
|
||||
cpuc = &__get_cpu_var(cpu_hw_events);
|
||||
for (idx = 0; idx <= armpmu->num_events; ++idx) {
|
||||
struct perf_event *event = cpuc->events[idx];
|
||||
struct hw_perf_event *hwc;
|
||||
|
||||
if (!test_bit(idx, cpuc->active_mask))
|
||||
continue;
|
||||
|
||||
if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx))
|
||||
continue;
|
||||
|
||||
hwc = &event->hw;
|
||||
armpmu_event_update(event, hwc, idx);
|
||||
data.period = event->hw.last_period;
|
||||
if (!armpmu_event_set_period(event, hwc, idx))
|
||||
continue;
|
||||
|
||||
if (perf_event_overflow(event, 0, &data, regs))
|
||||
armpmu->disable(hwc, idx);
|
||||
}
|
||||
|
||||
irq_work_run();
|
||||
|
||||
/*
|
||||
* Re-enable the PMU.
|
||||
*/
|
||||
pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE;
|
||||
xscale2pmu_write_pmnc(pmnc);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static void
|
||||
xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
unsigned long flags, ien, evtsel;
|
||||
|
||||
ien = xscale2pmu_read_int_enable();
|
||||
evtsel = xscale2pmu_read_event_select();
|
||||
|
||||
switch (idx) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
ien |= XSCALE2_CCOUNT_INT_EN;
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
ien |= XSCALE2_COUNT0_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
|
||||
evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT;
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
ien |= XSCALE2_COUNT1_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
|
||||
evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT;
|
||||
break;
|
||||
case XSCALE_COUNTER2:
|
||||
ien |= XSCALE2_COUNT2_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
|
||||
evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT;
|
||||
break;
|
||||
case XSCALE_COUNTER3:
|
||||
ien |= XSCALE2_COUNT3_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
|
||||
evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
xscale2pmu_write_event_select(evtsel);
|
||||
xscale2pmu_write_int_enable(ien);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx)
|
||||
{
|
||||
unsigned long flags, ien, evtsel;
|
||||
|
||||
ien = xscale2pmu_read_int_enable();
|
||||
evtsel = xscale2pmu_read_event_select();
|
||||
|
||||
switch (idx) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
ien &= ~XSCALE2_CCOUNT_INT_EN;
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
ien &= ~XSCALE2_COUNT0_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT0_EVT_MASK;
|
||||
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT;
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
ien &= ~XSCALE2_COUNT1_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT1_EVT_MASK;
|
||||
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT;
|
||||
break;
|
||||
case XSCALE_COUNTER2:
|
||||
ien &= ~XSCALE2_COUNT2_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT2_EVT_MASK;
|
||||
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT;
|
||||
break;
|
||||
case XSCALE_COUNTER3:
|
||||
ien &= ~XSCALE2_COUNT3_INT_EN;
|
||||
evtsel &= ~XSCALE2_COUNT3_EVT_MASK;
|
||||
evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT;
|
||||
break;
|
||||
default:
|
||||
WARN_ONCE(1, "invalid counter number (%d)\n", idx);
|
||||
return;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
xscale2pmu_write_event_select(evtsel);
|
||||
xscale2pmu_write_int_enable(ien);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static int
|
||||
xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc,
|
||||
struct hw_perf_event *event)
|
||||
{
|
||||
int idx = xscale1pmu_get_event_idx(cpuc, event);
|
||||
if (idx >= 0)
|
||||
goto out;
|
||||
|
||||
if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask))
|
||||
idx = XSCALE_COUNTER3;
|
||||
else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask))
|
||||
idx = XSCALE_COUNTER2;
|
||||
out:
|
||||
return idx;
|
||||
}
|
||||
|
||||
static void
|
||||
xscale2pmu_start(void)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64;
|
||||
val |= XSCALE_PMU_ENABLE;
|
||||
xscale2pmu_write_pmnc(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static void
|
||||
xscale2pmu_stop(void)
|
||||
{
|
||||
unsigned long flags, val;
|
||||
|
||||
spin_lock_irqsave(&pmu_lock, flags);
|
||||
val = xscale2pmu_read_pmnc();
|
||||
val &= ~XSCALE_PMU_ENABLE;
|
||||
xscale2pmu_write_pmnc(val);
|
||||
spin_unlock_irqrestore(&pmu_lock, flags);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
xscale2pmu_read_counter(int counter)
|
||||
{
|
||||
u32 val = 0;
|
||||
|
||||
switch (counter) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER2:
|
||||
asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER3:
|
||||
asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val));
|
||||
break;
|
||||
}
|
||||
|
||||
return val;
|
||||
}
|
||||
|
||||
static inline void
|
||||
xscale2pmu_write_counter(int counter, u32 val)
|
||||
{
|
||||
switch (counter) {
|
||||
case XSCALE_CYCLE_COUNTER:
|
||||
asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER0:
|
||||
asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER1:
|
||||
asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER2:
|
||||
asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val));
|
||||
break;
|
||||
case XSCALE_COUNTER3:
|
||||
asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static const struct arm_pmu xscale2pmu = {
|
||||
.id = ARM_PERF_PMU_ID_XSCALE2,
|
||||
.name = "xscale2",
|
||||
.handle_irq = xscale2pmu_handle_irq,
|
||||
.enable = xscale2pmu_enable_event,
|
||||
.disable = xscale2pmu_disable_event,
|
||||
.read_counter = xscale2pmu_read_counter,
|
||||
.write_counter = xscale2pmu_write_counter,
|
||||
.get_event_idx = xscale2pmu_get_event_idx,
|
||||
.start = xscale2pmu_start,
|
||||
.stop = xscale2pmu_stop,
|
||||
.cache_map = &xscale_perf_cache_map,
|
||||
.event_map = &xscale_perf_map,
|
||||
.raw_event_mask = 0xFF,
|
||||
.num_events = 5,
|
||||
.max_period = (1LLU << 32) - 1,
|
||||
};
|
||||
|
||||
const struct arm_pmu *__init xscale2pmu_init(void)
|
||||
{
|
||||
return &xscale2pmu;
|
||||
}
|
||||
#else
|
||||
const struct arm_pmu *__init xscale1pmu_init(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const struct arm_pmu *__init xscale2pmu_init(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
#endif /* CONFIG_CPU_XSCALE */
|
69
arch/arm/kernel/sched_clock.c
Normal file
69
arch/arm/kernel/sched_clock.c
Normal file
@ -0,0 +1,69 @@
|
||||
/*
|
||||
* sched_clock.c: support for extending counters to full 64-bit ns counter
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/timer.h>
|
||||
|
||||
#include <asm/sched_clock.h>
|
||||
|
||||
static void sched_clock_poll(unsigned long wrap_ticks);
|
||||
static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0);
|
||||
static void (*sched_clock_update_fn)(void);
|
||||
|
||||
static void sched_clock_poll(unsigned long wrap_ticks)
|
||||
{
|
||||
mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks));
|
||||
sched_clock_update_fn();
|
||||
}
|
||||
|
||||
void __init init_sched_clock(struct clock_data *cd, void (*update)(void),
|
||||
unsigned int clock_bits, unsigned long rate)
|
||||
{
|
||||
unsigned long r, w;
|
||||
u64 res, wrap;
|
||||
char r_unit;
|
||||
|
||||
sched_clock_update_fn = update;
|
||||
|
||||
/* calculate the mult/shift to convert counter ticks to ns. */
|
||||
clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60);
|
||||
|
||||
r = rate;
|
||||
if (r >= 4000000) {
|
||||
r /= 1000000;
|
||||
r_unit = 'M';
|
||||
} else {
|
||||
r /= 1000;
|
||||
r_unit = 'k';
|
||||
}
|
||||
|
||||
/* calculate how many ns until we wrap */
|
||||
wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift);
|
||||
do_div(wrap, NSEC_PER_MSEC);
|
||||
w = wrap;
|
||||
|
||||
/* calculate the ns resolution of this counter */
|
||||
res = cyc_to_ns(1ULL, cd->mult, cd->shift);
|
||||
pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n",
|
||||
clock_bits, r, r_unit, res, w);
|
||||
|
||||
/*
|
||||
* Start the timer to keep sched_clock() properly updated and
|
||||
* sets the initial epoch.
|
||||
*/
|
||||
sched_clock_timer.data = msecs_to_jiffies(w - (w / 10));
|
||||
sched_clock_poll(sched_clock_timer.data);
|
||||
|
||||
/*
|
||||
* Ensure that sched_clock() starts off at 0ns
|
||||
*/
|
||||
cd->epoch_ns = 0;
|
||||
}
|
@ -16,6 +16,7 @@
|
||||
#include <linux/cache.h>
|
||||
#include <linux/profile.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/ftrace.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/cpu.h>
|
||||
@ -456,7 +457,7 @@ static void ipi_timer(void)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_LOCAL_TIMERS
|
||||
asmlinkage void __exception do_local_timer(struct pt_regs *regs)
|
||||
asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs)
|
||||
{
|
||||
struct pt_regs *old_regs = set_irq_regs(regs);
|
||||
int cpu = smp_processor_id();
|
||||
@ -543,7 +544,7 @@ static void ipi_cpu_stop(unsigned int cpu)
|
||||
*
|
||||
* Bit 0 - Inter-processor function call
|
||||
*/
|
||||
asmlinkage void __exception do_IPI(struct pt_regs *regs)
|
||||
asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs)
|
||||
{
|
||||
unsigned int cpu = smp_processor_id();
|
||||
struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
|
||||
|
@ -101,6 +101,7 @@ SECTIONS
|
||||
__exception_text_start = .;
|
||||
*(.exception.text)
|
||||
__exception_text_end = .;
|
||||
IRQENTRY_TEXT
|
||||
TEXT_TEXT
|
||||
SCHED_TEXT
|
||||
LOCK_TEXT
|
||||
|
@ -101,7 +101,6 @@ static struct clocksource clk32k = {
|
||||
.rating = 150,
|
||||
.read = read_clk32k,
|
||||
.mask = CLOCKSOURCE_MASK(20),
|
||||
.shift = 10,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -201,8 +200,7 @@ void __init at91rm9200_timer_init(void)
|
||||
clockevents_register_device(&clkevt);
|
||||
|
||||
/* register clocksource */
|
||||
clk32k.mult = clocksource_hz2mult(AT91_SLOW_CLOCK, clk32k.shift);
|
||||
clocksource_register(&clk32k);
|
||||
clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK);
|
||||
}
|
||||
|
||||
struct sys_timer at91rm9200_timer = {
|
||||
|
@ -51,7 +51,6 @@ static struct clocksource pit_clk = {
|
||||
.name = "pit",
|
||||
.rating = 175,
|
||||
.read = read_pit_clk,
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -163,10 +162,9 @@ static void __init at91sam926x_pit_init(void)
|
||||
* Register clocksource. The high order bits of PIV are unused,
|
||||
* so this isn't a 32-bit counter unless we get clockevent irqs.
|
||||
*/
|
||||
pit_clk.mult = clocksource_hz2mult(pit_rate, pit_clk.shift);
|
||||
bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */;
|
||||
pit_clk.mask = CLOCKSOURCE_MASK(bits);
|
||||
clocksource_register(&pit_clk);
|
||||
clocksource_register_hz(&pit_clk, pit_rate);
|
||||
|
||||
/* Set up irq handler */
|
||||
setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq);
|
||||
|
@ -294,7 +294,6 @@ static struct clocksource clocksource_bcmring_timer1 = {
|
||||
.rating = 200,
|
||||
.read = bcmring_get_cycles_timer1,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -303,7 +302,6 @@ static struct clocksource clocksource_bcmring_timer3 = {
|
||||
.rating = 100,
|
||||
.read = bcmring_get_cycles_timer3,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -316,10 +314,8 @@ static int __init bcmring_clocksource_init(void)
|
||||
writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
|
||||
TIMER1_VA_BASE + TIMER_CTRL);
|
||||
|
||||
clocksource_bcmring_timer1.mult =
|
||||
clocksource_khz2mult(TIMER1_FREQUENCY_MHZ * 1000,
|
||||
clocksource_bcmring_timer1.shift);
|
||||
clocksource_register(&clocksource_bcmring_timer1);
|
||||
clocksource_register_khz(&clocksource_bcmring_timer1,
|
||||
TIMER1_FREQUENCY_MHZ * 1000);
|
||||
|
||||
/* setup timer3 as free-running clocksource */
|
||||
writel(0, TIMER3_VA_BASE + TIMER_CTRL);
|
||||
@ -328,10 +324,8 @@ static int __init bcmring_clocksource_init(void)
|
||||
writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC,
|
||||
TIMER3_VA_BASE + TIMER_CTRL);
|
||||
|
||||
clocksource_bcmring_timer3.mult =
|
||||
clocksource_khz2mult(TIMER3_FREQUENCY_KHZ,
|
||||
clocksource_bcmring_timer3.shift);
|
||||
clocksource_register(&clocksource_bcmring_timer3);
|
||||
clocksource_register_khz(&clocksource_bcmring_timer3,
|
||||
TIMER3_FREQUENCY_KHZ);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -276,7 +276,6 @@ static struct clocksource clocksource_davinci = {
|
||||
.rating = 300,
|
||||
.read = read_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 24,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -378,10 +377,8 @@ static void __init davinci_timer_init(void)
|
||||
|
||||
/* setup clocksource */
|
||||
clocksource_davinci.name = id_to_name[clocksource_id];
|
||||
clocksource_davinci.mult =
|
||||
clocksource_khz2mult(davinci_clock_tick_rate/1000,
|
||||
clocksource_davinci.shift);
|
||||
if (clocksource_register(&clocksource_davinci))
|
||||
if (clocksource_register_hz(&clocksource_davinci,
|
||||
davinci_clock_tick_rate))
|
||||
printk(err, clocksource_davinci.name);
|
||||
|
||||
/* setup clockevent */
|
||||
|
@ -372,7 +372,6 @@ static struct clocksource clocksource_timersp = {
|
||||
.rating = 200,
|
||||
.read = timersp_read,
|
||||
.mask = CLOCKSOURCE_MASK(16),
|
||||
.shift = 16,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -390,8 +389,7 @@ static void integrator_clocksource_init(u32 khz)
|
||||
writel(ctrl, base + TIMER_CTRL);
|
||||
writel(0xffff, base + TIMER_LOAD);
|
||||
|
||||
cs->mult = clocksource_khz2mult(khz, cs->shift);
|
||||
clocksource_register(cs);
|
||||
clocksource_register_khz(cs, khz);
|
||||
}
|
||||
|
||||
static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE;
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/sched_clock.h>
|
||||
|
||||
#include <asm/mach/map.h>
|
||||
#include <asm/mach/irq.h>
|
||||
@ -398,6 +399,23 @@ void __init ixp4xx_sys_init(void)
|
||||
ixp4xx_exp_bus_size >> 20);
|
||||
}
|
||||
|
||||
/*
|
||||
* sched_clock()
|
||||
*/
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
u32 cyc = *IXP4XX_OSTS;
|
||||
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
static void notrace ixp4xx_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = *IXP4XX_OSTS;
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
/*
|
||||
* clocksource
|
||||
*/
|
||||
@ -411,7 +429,6 @@ static struct clocksource clocksource_ixp4xx = {
|
||||
.rating = 200,
|
||||
.read = ixp4xx_get_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -419,21 +436,9 @@ unsigned long ixp4xx_timer_freq = FREQ;
|
||||
EXPORT_SYMBOL(ixp4xx_timer_freq);
|
||||
static void __init ixp4xx_clocksource_init(void)
|
||||
{
|
||||
clocksource_ixp4xx.mult =
|
||||
clocksource_hz2mult(ixp4xx_timer_freq,
|
||||
clocksource_ixp4xx.shift);
|
||||
clocksource_register(&clocksource_ixp4xx);
|
||||
}
|
||||
init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq);
|
||||
|
||||
/*
|
||||
* sched_clock()
|
||||
*/
|
||||
unsigned long long sched_clock(void)
|
||||
{
|
||||
cycle_t cyc = ixp4xx_get_cycles(NULL);
|
||||
struct clocksource *cs = &clocksource_ixp4xx;
|
||||
|
||||
return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
|
||||
clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -38,7 +38,6 @@ static cycle_t lpc32xx_clksrc_read(struct clocksource *cs)
|
||||
|
||||
static struct clocksource lpc32xx_clksrc = {
|
||||
.name = "lpc32xx_clksrc",
|
||||
.shift = 24,
|
||||
.rating = 300,
|
||||
.read = lpc32xx_clksrc_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
@ -171,9 +170,7 @@ static void __init lpc32xx_timer_init(void)
|
||||
__raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE));
|
||||
__raw_writel(LCP32XX_TIMER_CNTR_TCR_EN,
|
||||
LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE));
|
||||
lpc32xx_clksrc.mult = clocksource_hz2mult(clkrate,
|
||||
lpc32xx_clksrc.shift);
|
||||
clocksource_register(&lpc32xx_clksrc);
|
||||
clocksource_register_hz(&lpc32xx_clksrc, clkrate);
|
||||
}
|
||||
|
||||
struct sys_timer lpc32xx_timer = {
|
||||
|
@ -26,8 +26,8 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cnt32_to_63.h>
|
||||
|
||||
#include <asm/sched_clock.h>
|
||||
#include <mach/addr-map.h>
|
||||
#include <mach/regs-timers.h>
|
||||
#include <mach/regs-apbc.h>
|
||||
@ -42,23 +42,7 @@
|
||||
#define MAX_DELTA (0xfffffffe)
|
||||
#define MIN_DELTA (16)
|
||||
|
||||
#define TCR2NS_SCALE_FACTOR 10
|
||||
|
||||
static unsigned long tcr2ns_scale;
|
||||
|
||||
static void __init set_tcr2ns_scale(unsigned long tcr_rate)
|
||||
{
|
||||
unsigned long long v = 1000000000ULL << TCR2NS_SCALE_FACTOR;
|
||||
do_div(v, tcr_rate);
|
||||
tcr2ns_scale = v;
|
||||
/*
|
||||
* We want an even value to automatically clear the top bit
|
||||
* returned by cnt32_to_63() without an additional run time
|
||||
* instruction. So if the LSB is 1 then round it up.
|
||||
*/
|
||||
if (tcr2ns_scale & 1)
|
||||
tcr2ns_scale++;
|
||||
}
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
/*
|
||||
* FIXME: the timer needs some delay to stablize the counter capture
|
||||
@ -75,10 +59,16 @@ static inline uint32_t timer_read(void)
|
||||
return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0));
|
||||
}
|
||||
|
||||
unsigned long long sched_clock(void)
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
unsigned long long v = cnt32_to_63(timer_read());
|
||||
return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR;
|
||||
u32 cyc = timer_read();
|
||||
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
static void notrace mmp_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = timer_read();
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
static irqreturn_t timer_interrupt(int irq, void *dev_id)
|
||||
@ -146,7 +136,6 @@ static cycle_t clksrc_read(struct clocksource *cs)
|
||||
|
||||
static struct clocksource cksrc = {
|
||||
.name = "clocksource",
|
||||
.shift = 20,
|
||||
.rating = 200,
|
||||
.read = clksrc_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
@ -186,17 +175,15 @@ void __init timer_init(int irq)
|
||||
{
|
||||
timer_config();
|
||||
|
||||
set_tcr2ns_scale(CLOCK_TICK_RATE);
|
||||
init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE);
|
||||
|
||||
ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift);
|
||||
ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt);
|
||||
ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt);
|
||||
ckevt.cpumask = cpumask_of(0);
|
||||
|
||||
cksrc.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc.shift);
|
||||
|
||||
setup_irq(irq, &timer_irq);
|
||||
|
||||
clocksource_register(&cksrc);
|
||||
clocksource_register_hz(&cksrc, CLOCK_TICK_RATE);
|
||||
clockevents_register_device(&ckevt);
|
||||
}
|
||||
|
@ -49,6 +49,8 @@ endchoice
|
||||
|
||||
config MSM_SOC_REV_A
|
||||
bool
|
||||
config ARCH_MSM_SCORPIONMP
|
||||
bool
|
||||
|
||||
config ARCH_MSM_ARM11
|
||||
bool
|
||||
|
@ -137,7 +137,6 @@ static struct msm_clock msm_clocks[] = {
|
||||
.rating = 200,
|
||||
.read = msm_gpt_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 17,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
},
|
||||
.irq = {
|
||||
@ -164,7 +163,6 @@ static struct msm_clock msm_clocks[] = {
|
||||
.rating = 300,
|
||||
.read = msm_dgt_read,
|
||||
.mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)),
|
||||
.shift = 24 - MSM_DGT_SHIFT,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
},
|
||||
.irq = {
|
||||
@ -205,8 +203,7 @@ static void __init msm_timer_init(void)
|
||||
ce->min_delta_ns = clockevent_delta2ns(4, ce);
|
||||
ce->cpumask = cpumask_of(0);
|
||||
|
||||
cs->mult = clocksource_hz2mult(clock->freq, cs->shift);
|
||||
res = clocksource_register(cs);
|
||||
res = clocksource_register_hz(cs, clock->freq);
|
||||
if (res)
|
||||
printk(KERN_ERR "msm_timer_init: clocksource_register "
|
||||
"failed for %s\n", cs->name);
|
||||
|
@ -114,7 +114,6 @@ static struct clocksource clocksource_netx = {
|
||||
.rating = 200,
|
||||
.read = netx_get_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -151,9 +150,7 @@ static void __init netx_timer_init(void)
|
||||
writel(NETX_GPIO_COUNTER_CTRL_RUN,
|
||||
NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE));
|
||||
|
||||
clocksource_netx.mult =
|
||||
clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_netx.shift);
|
||||
clocksource_register(&clocksource_netx);
|
||||
clocksource_register_hz(&clocksource_netx, CLOCK_TICK_RATE);
|
||||
|
||||
netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
|
||||
netx_clockevent.shift);
|
||||
|
@ -35,7 +35,6 @@ static struct clocksource ns9360_clocksource = {
|
||||
.rating = 300,
|
||||
.read = ns9360_clocksource_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -148,10 +147,7 @@ static void __init ns9360_timer_init(void)
|
||||
|
||||
__raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE));
|
||||
|
||||
ns9360_clocksource.mult = clocksource_hz2mult(ns9360_cpuclock(),
|
||||
ns9360_clocksource.shift);
|
||||
|
||||
clocksource_register(&ns9360_clocksource);
|
||||
clocksource_register_hz(&ns9360_clocksource, ns9360_cpuclock());
|
||||
|
||||
latch = SH_DIV(ns9360_cpuclock(), HZ, 0);
|
||||
|
||||
|
@ -208,7 +208,6 @@ static struct clocksource clocksource_mpu = {
|
||||
.rating = 300,
|
||||
.read = mpu_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 24,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -217,13 +216,10 @@ static void __init omap_init_clocksource(unsigned long rate)
|
||||
static char err[] __initdata = KERN_ERR
|
||||
"%s: can't register clocksource!\n";
|
||||
|
||||
clocksource_mpu.mult
|
||||
= clocksource_khz2mult(rate/1000, clocksource_mpu.shift);
|
||||
|
||||
setup_irq(INT_TIMER2, &omap_mpu_timer2_irq);
|
||||
omap_mpu_timer_start(1, ~0, 1);
|
||||
|
||||
if (clocksource_register(&clocksource_mpu))
|
||||
if (clocksource_register_hz(&clocksource_mpu, rate))
|
||||
printk(err, clocksource_mpu.name);
|
||||
}
|
||||
|
||||
|
@ -195,7 +195,6 @@ static struct clocksource clocksource_gpt = {
|
||||
.rating = 300,
|
||||
.read = clocksource_read_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 24,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -220,9 +219,7 @@ static void __init omap2_gp_clocksource_init(void)
|
||||
|
||||
omap_dm_timer_set_load_start(gpt, 1, 0);
|
||||
|
||||
clocksource_gpt.mult =
|
||||
clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift);
|
||||
if (clocksource_register(&clocksource_gpt))
|
||||
if (clocksource_register_hz(&clocksource_gpt, tick_rate))
|
||||
printk(err2, clocksource_gpt.name);
|
||||
}
|
||||
#endif
|
||||
|
@ -17,11 +17,11 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cnt32_to_63.h>
|
||||
|
||||
#include <asm/div64.h>
|
||||
#include <asm/mach/irq.h>
|
||||
#include <asm/mach/time.h>
|
||||
#include <asm/sched_clock.h>
|
||||
#include <mach/regs-ost.h>
|
||||
|
||||
/*
|
||||
@ -32,29 +32,18 @@
|
||||
* long as there is always less than 582 seconds between successive
|
||||
* calls to sched_clock() which should always be the case in practice.
|
||||
*/
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
#define OSCR2NS_SCALE_FACTOR 10
|
||||
|
||||
static unsigned long oscr2ns_scale;
|
||||
|
||||
static void __init set_oscr2ns_scale(unsigned long oscr_rate)
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
unsigned long long v = 1000000000ULL << OSCR2NS_SCALE_FACTOR;
|
||||
do_div(v, oscr_rate);
|
||||
oscr2ns_scale = v;
|
||||
/*
|
||||
* We want an even value to automatically clear the top bit
|
||||
* returned by cnt32_to_63() without an additional run time
|
||||
* instruction. So if the LSB is 1 then round it up.
|
||||
*/
|
||||
if (oscr2ns_scale & 1)
|
||||
oscr2ns_scale++;
|
||||
u32 cyc = OSCR;
|
||||
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
unsigned long long sched_clock(void)
|
||||
static void notrace pxa_update_sched_clock(void)
|
||||
{
|
||||
unsigned long long v = cnt32_to_63(OSCR);
|
||||
return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR;
|
||||
u32 cyc = OSCR;
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
|
||||
@ -127,7 +116,6 @@ static struct clocksource cksrc_pxa_oscr0 = {
|
||||
.rating = 200,
|
||||
.read = pxa_read_oscr,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -145,7 +133,7 @@ static void __init pxa_timer_init(void)
|
||||
OIER = 0;
|
||||
OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3;
|
||||
|
||||
set_oscr2ns_scale(clock_tick_rate);
|
||||
init_sched_clock(&cd, pxa_update_sched_clock, 32, clock_tick_rate);
|
||||
|
||||
ckevt_pxa_osmr0.mult =
|
||||
div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift);
|
||||
@ -155,12 +143,9 @@ static void __init pxa_timer_init(void)
|
||||
clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1;
|
||||
ckevt_pxa_osmr0.cpumask = cpumask_of(0);
|
||||
|
||||
cksrc_pxa_oscr0.mult =
|
||||
clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift);
|
||||
|
||||
setup_irq(IRQ_OST0, &pxa_ost0_irq);
|
||||
|
||||
clocksource_register(&cksrc_pxa_oscr0);
|
||||
clocksource_register_hz(&cksrc_pxa_oscr0, clock_tick_rate);
|
||||
clockevents_register_device(&ckevt_pxa_osmr0);
|
||||
}
|
||||
|
||||
|
@ -52,6 +52,8 @@
|
||||
#include <mach/irqs.h>
|
||||
#include <asm/hardware/timer-sp.h>
|
||||
|
||||
#include <plat/sched_clock.h>
|
||||
|
||||
#include "core.h"
|
||||
|
||||
#ifdef CONFIG_ZONE_DMA
|
||||
@ -654,6 +656,12 @@ void realview_leds_event(led_event_t ledevt)
|
||||
}
|
||||
#endif /* CONFIG_LEDS */
|
||||
|
||||
/*
|
||||
* The sched_clock counter
|
||||
*/
|
||||
#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + \
|
||||
REALVIEW_SYS_24MHz_OFFSET)
|
||||
|
||||
/*
|
||||
* Where is the timer (VA)?
|
||||
*/
|
||||
@ -669,6 +677,8 @@ void __init realview_timer_init(unsigned int timer_irq)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
versatile_sched_clock_init(REFCOUNTER, 24000000);
|
||||
|
||||
/*
|
||||
* set clock frequency:
|
||||
* REALVIEW_REFCLK is 32KHz
|
||||
|
@ -211,7 +211,6 @@ struct clocksource pwm_clocksource = {
|
||||
.rating = 250,
|
||||
.read = s5pv310_pwm4_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS ,
|
||||
};
|
||||
|
||||
@ -230,10 +229,7 @@ static void __init s5pv310_clocksource_init(void)
|
||||
s5pv310_pwm_init(4, ~0);
|
||||
s5pv310_pwm_start(4, 1);
|
||||
|
||||
pwm_clocksource.mult =
|
||||
clocksource_khz2mult(clock_rate/1000, pwm_clocksource.shift);
|
||||
|
||||
if (clocksource_register(&pwm_clocksource))
|
||||
if (clocksource_register_hz(&pwm_clocksource, clock_rate))
|
||||
panic("%s: can't register clocksource\n", pwm_clocksource.name);
|
||||
}
|
||||
|
||||
|
@ -16,9 +16,7 @@
|
||||
#include <linux/pm.h>
|
||||
#include <linux/cpufreq.h>
|
||||
#include <linux/ioport.h>
|
||||
#include <linux/sched.h> /* just for sched_clock() - funny that */
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/cnt32_to_63.h>
|
||||
|
||||
#include <asm/div64.h>
|
||||
#include <mach/hardware.h>
|
||||
@ -109,27 +107,6 @@ unsigned int sa11x0_getspeed(unsigned int cpu)
|
||||
return cclk_frequency_100khz[PPCR & 0xf] * 100;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is the SA11x0 sched_clock implementation. This has
|
||||
* a resolution of 271ns, and a maximum value of 32025597s (370 days).
|
||||
*
|
||||
* The return value is guaranteed to be monotonic in that range as
|
||||
* long as there is always less than 582 seconds between successive
|
||||
* calls to this function.
|
||||
*
|
||||
* ( * 1E9 / 3686400 => * 78125 / 288)
|
||||
*/
|
||||
unsigned long long sched_clock(void)
|
||||
{
|
||||
unsigned long long v = cnt32_to_63(OSCR);
|
||||
|
||||
/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
|
||||
v *= 78125<<1;
|
||||
do_div(v, 288<<1);
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
/*
|
||||
* Default power-off for SA1100
|
||||
*/
|
||||
|
@ -12,12 +12,39 @@
|
||||
#include <linux/errno.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <linux/sched.h> /* just for sched_clock() - funny that */
|
||||
#include <linux/timex.h>
|
||||
#include <linux/clockchips.h>
|
||||
|
||||
#include <asm/mach/time.h>
|
||||
#include <asm/sched_clock.h>
|
||||
#include <mach/hardware.h>
|
||||
|
||||
/*
|
||||
* This is the SA11x0 sched_clock implementation.
|
||||
*/
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
/*
|
||||
* Constants generated by clocks_calc_mult_shift(m, s, 3.6864MHz,
|
||||
* NSEC_PER_SEC, 60).
|
||||
* This gives a resolution of about 271ns and a wrap period of about 19min.
|
||||
*/
|
||||
#define SC_MULT 2275555556u
|
||||
#define SC_SHIFT 23
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
u32 cyc = OSCR;
|
||||
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
|
||||
}
|
||||
|
||||
static void notrace sa1100_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = OSCR;
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
#define MIN_OSCR_DELTA 2
|
||||
|
||||
static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id)
|
||||
@ -81,7 +108,6 @@ static struct clocksource cksrc_sa1100_oscr = {
|
||||
.rating = 200,
|
||||
.read = sa1100_read_oscr,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -97,6 +123,9 @@ static void __init sa1100_timer_init(void)
|
||||
OIER = 0; /* disable any timer interrupts */
|
||||
OSSR = 0xf; /* clear status on all timers */
|
||||
|
||||
init_fixed_sched_clock(&cd, sa1100_update_sched_clock, 32,
|
||||
3686400, SC_MULT, SC_SHIFT);
|
||||
|
||||
ckevt_sa1100_osmr0.mult =
|
||||
div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift);
|
||||
ckevt_sa1100_osmr0.max_delta_ns =
|
||||
@ -105,12 +134,9 @@ static void __init sa1100_timer_init(void)
|
||||
clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1;
|
||||
ckevt_sa1100_osmr0.cpumask = cpumask_of(0);
|
||||
|
||||
cksrc_sa1100_oscr.mult =
|
||||
clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift);
|
||||
|
||||
setup_irq(IRQ_OST0, &sa1100_timer_irq);
|
||||
|
||||
clocksource_register(&cksrc_sa1100_oscr);
|
||||
clocksource_register_hz(&cksrc_sa1100_oscr, CLOCK_TICK_RATE);
|
||||
clockevents_register_device(&ckevt_sa1100_osmr0);
|
||||
}
|
||||
|
||||
|
@ -35,7 +35,6 @@ static struct clocksource clocksource_tcc = {
|
||||
.rating = 200,
|
||||
.read = tcc_get_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 28,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -103,9 +102,7 @@ static int __init tcc_clockevent_init(struct clk *clock)
|
||||
{
|
||||
unsigned int c = clk_get_rate(clock);
|
||||
|
||||
clocksource_tcc.mult = clocksource_hz2mult(c,
|
||||
clocksource_tcc.shift);
|
||||
clocksource_register(&clocksource_tcc);
|
||||
clocksource_register_hz(&clocksource_tcc, c);
|
||||
|
||||
clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC,
|
||||
clockevent_tcc.shift);
|
||||
|
@ -18,6 +18,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
@ -25,10 +26,10 @@
|
||||
#include <linux/clocksource.h>
|
||||
#include <linux/clk.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/cnt32_to_63.h>
|
||||
|
||||
#include <asm/mach/time.h>
|
||||
#include <asm/localtimer.h>
|
||||
#include <asm/sched_clock.h>
|
||||
|
||||
#include <mach/iomap.h>
|
||||
#include <mach/irqs.h>
|
||||
@ -91,7 +92,7 @@ static void tegra_timer_set_mode(enum clock_event_mode mode,
|
||||
|
||||
static cycle_t tegra_clocksource_read(struct clocksource *cs)
|
||||
{
|
||||
return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US));
|
||||
return timer_readl(TIMERUS_CNTR_1US);
|
||||
}
|
||||
|
||||
static struct clock_event_device tegra_clockevent = {
|
||||
@ -106,14 +107,29 @@ static struct clocksource tegra_clocksource = {
|
||||
.name = "timer_us",
|
||||
.rating = 300,
|
||||
.read = tegra_clocksource_read,
|
||||
.mask = 0x7FFFFFFFFFFFFFFFULL,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
unsigned long long sched_clock(void)
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
/*
|
||||
* Constants generated by clocks_calc_mult_shift(m, s, 1MHz, NSEC_PER_SEC, 60).
|
||||
* This gives a resolution of about 1us and a wrap period of about 1h11min.
|
||||
*/
|
||||
#define SC_MULT 4194304000u
|
||||
#define SC_SHIFT 22
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
return clocksource_cyc2ns(tegra_clocksource.read(&tegra_clocksource),
|
||||
tegra_clocksource.mult, tegra_clocksource.shift);
|
||||
u32 cyc = timer_readl(TIMERUS_CNTR_1US);
|
||||
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
|
||||
}
|
||||
|
||||
static void notrace tegra_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = timer_readl(TIMERUS_CNTR_1US);
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id)
|
||||
@ -158,6 +174,9 @@ static void __init tegra_init_timer(void)
|
||||
WARN(1, "Unknown clock rate");
|
||||
}
|
||||
|
||||
init_fixed_sched_clock(&cd, tegra_update_sched_clock, 32,
|
||||
1000000, SC_MULT, SC_SHIFT);
|
||||
|
||||
if (clocksource_register_hz(&tegra_clocksource, 1000000)) {
|
||||
printk(KERN_ERR "Failed to register clocksource\n");
|
||||
BUG();
|
||||
|
@ -9,6 +9,7 @@
|
||||
* Author: Linus Walleij <linus.walleij@stericsson.com>
|
||||
*/
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/clockchips.h>
|
||||
@ -21,6 +22,7 @@
|
||||
#include <mach/hardware.h>
|
||||
|
||||
/* Generic stuff */
|
||||
#include <asm/sched_clock.h>
|
||||
#include <asm/mach/map.h>
|
||||
#include <asm/mach/time.h>
|
||||
#include <asm/mach/irq.h>
|
||||
@ -352,12 +354,18 @@ static struct clocksource clocksource_u300_1mhz = {
|
||||
* this wraps around for now, since it is just a relative time
|
||||
* stamp. (Inspired by OMAP implementation.)
|
||||
*/
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
return clocksource_cyc2ns(clocksource_u300_1mhz.read(
|
||||
&clocksource_u300_1mhz),
|
||||
clocksource_u300_1mhz.mult,
|
||||
clocksource_u300_1mhz.shift);
|
||||
u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
|
||||
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
static void notrace u300_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC);
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
|
||||
@ -375,6 +383,8 @@ static void __init u300_timer_init(void)
|
||||
clk_enable(clk);
|
||||
rate = clk_get_rate(clk);
|
||||
|
||||
init_sched_clock(&cd, u300_update_sched_clock, 32, rate);
|
||||
|
||||
/*
|
||||
* Disable the "OS" and "DD" timers - these are designed for Symbian!
|
||||
* Example usage in cnh1601578 cpu subsystem pd_timer_app.c
|
||||
@ -412,9 +422,7 @@ static void __init u300_timer_init(void)
|
||||
writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE,
|
||||
U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2);
|
||||
|
||||
clocksource_calc_mult_shift(&clocksource_u300_1mhz,
|
||||
rate, APPTIMER_MIN_RANGE);
|
||||
if (clocksource_register(&clocksource_u300_1mhz))
|
||||
if (clocksource_register_hz(&clocksource_u300_1mhz, rate))
|
||||
printk(KERN_ERR "timer: failed to initialize clock "
|
||||
"source %s\n", clocksource_u300_1mhz.name);
|
||||
|
||||
|
@ -51,6 +51,8 @@
|
||||
#include <mach/platform.h>
|
||||
#include <asm/hardware/timer-sp.h>
|
||||
|
||||
#include <plat/sched_clock.h>
|
||||
|
||||
#include "core.h"
|
||||
|
||||
/*
|
||||
@ -885,6 +887,12 @@ void __init versatile_init(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* The sched_clock counter
|
||||
*/
|
||||
#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + \
|
||||
VERSATILE_SYS_24MHz_OFFSET)
|
||||
|
||||
/*
|
||||
* Where is the timer (VA)?
|
||||
*/
|
||||
@ -900,6 +908,8 @@ static void __init versatile_timer_init(void)
|
||||
{
|
||||
u32 val;
|
||||
|
||||
versatile_sched_clock_init(REFCOUNTER, 24000000);
|
||||
|
||||
/*
|
||||
* set clock frequency:
|
||||
* VERSATILE_REFCLK is 32KHz
|
||||
|
@ -18,11 +18,12 @@
|
||||
#include <asm/mach/map.h>
|
||||
#include <asm/mach/time.h>
|
||||
#include <asm/hardware/arm_timer.h>
|
||||
#include <asm/hardware/timer-sp.h>
|
||||
|
||||
#include <mach/clkdev.h>
|
||||
#include <mach/motherboard.h>
|
||||
|
||||
#include <asm/hardware/timer-sp.h>
|
||||
#include <plat/sched_clock.h>
|
||||
|
||||
#include "core.h"
|
||||
|
||||
@ -50,6 +51,8 @@ void __init v2m_map_io(struct map_desc *tile, size_t num)
|
||||
|
||||
static void __init v2m_timer_init(void)
|
||||
{
|
||||
versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000);
|
||||
|
||||
writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL);
|
||||
writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL);
|
||||
|
||||
|
@ -153,7 +153,6 @@ static struct clocksource clocksource_nuc900 = {
|
||||
.rating = 200,
|
||||
.read = nuc900_get_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(TDR_SHIFT),
|
||||
.shift = 10,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -176,9 +175,7 @@ static void __init nuc900_clocksource_init(void)
|
||||
val |= (COUNTEN | PERIOD | PRESCALE);
|
||||
__raw_writel(val, REG_TCSR1);
|
||||
|
||||
clocksource_nuc900.mult =
|
||||
clocksource_khz2mult((rate / 1000), clocksource_nuc900.shift);
|
||||
clocksource_register(&clocksource_nuc900);
|
||||
clocksource_register_hz(&clocksource_nuc900, rate);
|
||||
}
|
||||
|
||||
static void __init nuc900_timer_init(void)
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/time.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/timex.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/io.h>
|
||||
@ -24,6 +25,7 @@
|
||||
#include <linux/clockchips.h>
|
||||
#include <mach/hardware.h>
|
||||
#include <asm/irq.h>
|
||||
#include <asm/sched_clock.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/mach/irq.h>
|
||||
#include <asm/mach/time.h>
|
||||
@ -50,15 +52,21 @@ static struct clocksource iop_clocksource = {
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
/*
|
||||
* IOP sched_clock() implementation via its clocksource.
|
||||
*/
|
||||
unsigned long long sched_clock(void)
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
cycle_t cyc = iop_clocksource_read(NULL);
|
||||
struct clocksource *cs = &iop_clocksource;
|
||||
u32 cyc = 0xffffffffu - read_tcr1();
|
||||
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
return clocksource_cyc2ns(cyc, cs->mult, cs->shift);
|
||||
static void notrace iop_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = 0xffffffffu - read_tcr1();
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -88,6 +96,7 @@ static void iop_set_mode(enum clock_event_mode mode,
|
||||
case CLOCK_EVT_MODE_PERIODIC:
|
||||
write_tmr0(tmr & ~IOP_TMR_EN);
|
||||
write_tcr0(ticks_per_jiffy - 1);
|
||||
write_trr0(ticks_per_jiffy - 1);
|
||||
tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN);
|
||||
break;
|
||||
case CLOCK_EVT_MODE_ONESHOT:
|
||||
@ -143,6 +152,8 @@ void __init iop_init_time(unsigned long tick_rate)
|
||||
{
|
||||
u32 timer_ctl;
|
||||
|
||||
init_sched_clock(&cd, iop_update_sched_clock, 32, tick_rate);
|
||||
|
||||
ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ);
|
||||
iop_tick_rate = tick_rate;
|
||||
|
||||
@ -153,6 +164,7 @@ void __init iop_init_time(unsigned long tick_rate)
|
||||
* Set up interrupting clockevent timer 0.
|
||||
*/
|
||||
write_tmr0(timer_ctl & ~IOP_TMR_EN);
|
||||
write_tisr(1);
|
||||
setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq);
|
||||
clockevents_calc_mult_shift(&iop_clockevent,
|
||||
tick_rate, IOP_MIN_RANGE);
|
||||
@ -162,9 +174,6 @@ void __init iop_init_time(unsigned long tick_rate)
|
||||
clockevent_delta2ns(0xf, &iop_clockevent);
|
||||
iop_clockevent.cpumask = cpumask_of(0);
|
||||
clockevents_register_device(&iop_clockevent);
|
||||
write_trr0(ticks_per_jiffy - 1);
|
||||
write_tcr0(ticks_per_jiffy - 1);
|
||||
write_tmr0(timer_ctl);
|
||||
|
||||
/*
|
||||
* Set up free-running clocksource timer 1.
|
||||
@ -172,7 +181,5 @@ void __init iop_init_time(unsigned long tick_rate)
|
||||
write_trr1(0xffffffff);
|
||||
write_tcr1(0xffffffff);
|
||||
write_tmr1(timer_ctl);
|
||||
clocksource_calc_mult_shift(&iop_clocksource, tick_rate,
|
||||
IOP_MIN_RANGE);
|
||||
clocksource_register(&iop_clocksource);
|
||||
clocksource_register_hz(&iop_clocksource, tick_rate);
|
||||
}
|
||||
|
@ -93,7 +93,6 @@ static struct clocksource clocksource_epit = {
|
||||
.rating = 200,
|
||||
.read = epit_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -101,9 +100,7 @@ static int __init epit_clocksource_init(struct clk *timer_clk)
|
||||
{
|
||||
unsigned int c = clk_get_rate(timer_clk);
|
||||
|
||||
clocksource_epit.mult = clocksource_hz2mult(c,
|
||||
clocksource_epit.shift);
|
||||
clocksource_register(&clocksource_epit);
|
||||
clocksource_register_hz(&clocksource_epit, c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -120,7 +120,6 @@ static struct clocksource clocksource_mxc = {
|
||||
.rating = 200,
|
||||
.read = mx1_2_get_cycles,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 20,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -131,9 +130,7 @@ static int __init mxc_clocksource_init(struct clk *timer_clk)
|
||||
if (timer_is_v2())
|
||||
clocksource_mxc.read = v2_get_cycles;
|
||||
|
||||
clocksource_mxc.mult = clocksource_hz2mult(c,
|
||||
clocksource_mxc.shift);
|
||||
clocksource_register(&clocksource_mxc);
|
||||
clocksource_register_hz(&clocksource_mxc, c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -14,6 +14,7 @@ if PLAT_NOMADIK
|
||||
|
||||
config HAS_MTU
|
||||
bool
|
||||
select HAVE_SCHED_CLOCK
|
||||
help
|
||||
Support for Multi Timer Unit. MTU provides access
|
||||
to multiple interrupt generating programmable
|
||||
|
@ -17,9 +17,9 @@
|
||||
#include <linux/clk.h>
|
||||
#include <linux/jiffies.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/cnt32_to_63.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/sched.h>
|
||||
#include <asm/mach/time.h>
|
||||
#include <asm/sched_clock.h>
|
||||
|
||||
#include <plat/mtu.h>
|
||||
|
||||
@ -52,81 +52,24 @@ static struct clocksource nmdk_clksrc = {
|
||||
* Override the global weak sched_clock symbol with this
|
||||
* local implementation which uses the clocksource to get some
|
||||
* better resolution when scheduling the kernel.
|
||||
*
|
||||
* Because the hardware timer period may be quite short
|
||||
* (32.3 secs on the 133 MHz MTU timer selection on ux500)
|
||||
* and because cnt32_to_63() needs to be called at least once per
|
||||
* half period to work properly, a kernel keepwarm() timer is set up
|
||||
* to ensure this requirement is always met.
|
||||
*
|
||||
* Also the sched_clock timer will wrap around at some point,
|
||||
* here we set it to run continously for a year.
|
||||
*/
|
||||
#define SCHED_CLOCK_MIN_WRAP 3600*24*365
|
||||
static struct timer_list cnt32_to_63_keepwarm_timer;
|
||||
static u32 sched_mult;
|
||||
static u32 sched_shift;
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
u64 cycles;
|
||||
u32 cyc;
|
||||
|
||||
if (unlikely(!mtu_base))
|
||||
return 0;
|
||||
|
||||
cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0)));
|
||||
/*
|
||||
* sched_mult is guaranteed to be even so will
|
||||
* shift out bit 63
|
||||
*/
|
||||
return (cycles * sched_mult) >> sched_shift;
|
||||
cyc = -readl(mtu_base + MTU_VAL(0));
|
||||
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
/* Just kick sched_clock every so often */
|
||||
static void cnt32_to_63_keepwarm(unsigned long data)
|
||||
static void notrace nomadik_update_sched_clock(void)
|
||||
{
|
||||
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
|
||||
(void) sched_clock();
|
||||
}
|
||||
|
||||
/*
|
||||
* Set up a timer to keep sched_clock():s 32_to_63 algorithm warm
|
||||
* once in half a 32bit timer wrap interval.
|
||||
*/
|
||||
static void __init nmdk_sched_clock_init(unsigned long rate)
|
||||
{
|
||||
u32 v;
|
||||
unsigned long delta;
|
||||
u64 days;
|
||||
|
||||
/* Find the apropriate mult and shift factors */
|
||||
clocks_calc_mult_shift(&sched_mult, &sched_shift,
|
||||
rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP);
|
||||
/* We need to multiply by an even number to get rid of bit 63 */
|
||||
if (sched_mult & 1)
|
||||
sched_mult++;
|
||||
|
||||
/* Let's see what we get, take max counter and scale it */
|
||||
days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift;
|
||||
do_div(days, NSEC_PER_SEC);
|
||||
do_div(days, (3600*24));
|
||||
|
||||
pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n",
|
||||
(64 - sched_shift), rate, (unsigned long) days);
|
||||
|
||||
/*
|
||||
* Program a timer to kick us at half 32bit wraparound
|
||||
* Formula: seconds per wrap = (2^32) / f
|
||||
*/
|
||||
v = 0xFFFFFFFFUL / rate;
|
||||
/* We want half of the wrap time to keep cnt32_to_63 warm */
|
||||
v /= 2;
|
||||
pr_debug("sched_clock: prescaled timer rate: %lu Hz, "
|
||||
"initialize keepwarm timer every %d seconds\n", rate, v);
|
||||
/* Convert seconds to jiffies */
|
||||
delta = msecs_to_jiffies(v*1000);
|
||||
setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta);
|
||||
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta));
|
||||
u32 cyc = -readl(mtu_base + MTU_VAL(0));
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
/* Clockevent device: use one-shot mode */
|
||||
@ -222,7 +165,6 @@ void __init nmdk_timer_init(void)
|
||||
} else {
|
||||
cr |= MTU_CRn_PRESCALE_1;
|
||||
}
|
||||
clocksource_calc_mult_shift(&nmdk_clksrc, rate, MTU_MIN_RANGE);
|
||||
|
||||
/* Timer 0 is the free running clocksource */
|
||||
writel(cr, mtu_base + MTU_CR(0));
|
||||
@ -233,11 +175,11 @@ void __init nmdk_timer_init(void)
|
||||
/* Now the clock source is ready */
|
||||
nmdk_clksrc.read = nmdk_read_timer;
|
||||
|
||||
if (clocksource_register(&nmdk_clksrc))
|
||||
if (clocksource_register_hz(&nmdk_clksrc, rate))
|
||||
pr_err("timer: failed to initialize clock source %s\n",
|
||||
nmdk_clksrc.name);
|
||||
|
||||
nmdk_sched_clock_init(rate);
|
||||
init_sched_clock(&cd, nomadik_update_sched_clock, 32, rate);
|
||||
|
||||
/* Timer 1 is used for events */
|
||||
|
||||
|
@ -15,8 +15,11 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/clk.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/err.h>
|
||||
#include <linux/io.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <asm/sched_clock.h>
|
||||
|
||||
#include <plat/common.h>
|
||||
#include <plat/board.h>
|
||||
@ -45,7 +48,7 @@
|
||||
static u32 offset_32k __read_mostly;
|
||||
|
||||
#ifdef CONFIG_ARCH_OMAP16XX
|
||||
static cycle_t omap16xx_32k_read(struct clocksource *cs)
|
||||
static cycle_t notrace omap16xx_32k_read(struct clocksource *cs)
|
||||
{
|
||||
return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k;
|
||||
}
|
||||
@ -54,7 +57,7 @@ static cycle_t omap16xx_32k_read(struct clocksource *cs)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_OMAP2420
|
||||
static cycle_t omap2420_32k_read(struct clocksource *cs)
|
||||
static cycle_t notrace omap2420_32k_read(struct clocksource *cs)
|
||||
{
|
||||
return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k;
|
||||
}
|
||||
@ -63,7 +66,7 @@ static cycle_t omap2420_32k_read(struct clocksource *cs)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_OMAP2430
|
||||
static cycle_t omap2430_32k_read(struct clocksource *cs)
|
||||
static cycle_t notrace omap2430_32k_read(struct clocksource *cs)
|
||||
{
|
||||
return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k;
|
||||
}
|
||||
@ -72,7 +75,7 @@ static cycle_t omap2430_32k_read(struct clocksource *cs)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_OMAP3
|
||||
static cycle_t omap34xx_32k_read(struct clocksource *cs)
|
||||
static cycle_t notrace omap34xx_32k_read(struct clocksource *cs)
|
||||
{
|
||||
return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k;
|
||||
}
|
||||
@ -81,7 +84,7 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ARCH_OMAP4
|
||||
static cycle_t omap44xx_32k_read(struct clocksource *cs)
|
||||
static cycle_t notrace omap44xx_32k_read(struct clocksource *cs)
|
||||
{
|
||||
return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k;
|
||||
}
|
||||
@ -93,7 +96,7 @@ static cycle_t omap44xx_32k_read(struct clocksource *cs)
|
||||
* Kernel assumes that sched_clock can be called early but may not have
|
||||
* things ready yet.
|
||||
*/
|
||||
static cycle_t omap_32k_read_dummy(struct clocksource *cs)
|
||||
static cycle_t notrace omap_32k_read_dummy(struct clocksource *cs)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
@ -103,7 +106,6 @@ static struct clocksource clocksource_32k = {
|
||||
.rating = 250,
|
||||
.read = omap_32k_read_dummy,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
.shift = 10,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -111,10 +113,25 @@ static struct clocksource clocksource_32k = {
|
||||
* Returns current time from boot in nsecs. It's OK for this to wrap
|
||||
* around for now, as it's just a relative time stamp.
|
||||
*/
|
||||
unsigned long long sched_clock(void)
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
/*
|
||||
* Constants generated by clocks_calc_mult_shift(m, s, 32768, NSEC_PER_SEC, 60).
|
||||
* This gives a resolution of about 30us and a wrap period of about 36hrs.
|
||||
*/
|
||||
#define SC_MULT 4000000000u
|
||||
#define SC_SHIFT 17
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k),
|
||||
clocksource_32k.mult, clocksource_32k.shift);
|
||||
u32 cyc = clocksource_32k.read(&clocksource_32k);
|
||||
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT);
|
||||
}
|
||||
|
||||
static void notrace omap_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = clocksource_32k.read(&clocksource_32k);
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -168,13 +185,13 @@ static int __init omap_init_clocksource_32k(void)
|
||||
if (!IS_ERR(sync_32k_ick))
|
||||
clk_enable(sync_32k_ick);
|
||||
|
||||
clocksource_32k.mult = clocksource_hz2mult(32768,
|
||||
clocksource_32k.shift);
|
||||
|
||||
offset_32k = clocksource_32k.read(&clocksource_32k);
|
||||
|
||||
if (clocksource_register(&clocksource_32k))
|
||||
if (clocksource_register_hz(&clocksource_32k, 32768))
|
||||
printk(err, clocksource_32k.name);
|
||||
|
||||
init_fixed_sched_clock(&cd, omap_update_sched_clock, 32,
|
||||
32768, SC_MULT, SC_SHIFT);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@ -13,11 +13,11 @@
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/cnt32_to_63.h>
|
||||
#include <linux/timer.h>
|
||||
#include <linux/clockchips.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/irq.h>
|
||||
#include <asm/sched_clock.h>
|
||||
#include <asm/mach/time.h>
|
||||
#include <mach/bridge-regs.h>
|
||||
#include <mach/hardware.h>
|
||||
@ -44,52 +44,26 @@ static u32 ticks_per_jiffy;
|
||||
|
||||
/*
|
||||
* Orion's sched_clock implementation. It has a resolution of
|
||||
* at least 7.5ns (133MHz TCLK) and a maximum value of 834 days.
|
||||
*
|
||||
* Because the hardware timer period is quite short (21 secs if
|
||||
* 200MHz TCLK) and because cnt32_to_63() needs to be called at
|
||||
* least once per half period to work properly, a kernel timer is
|
||||
* set up to ensure this requirement is always met.
|
||||
* at least 7.5ns (133MHz TCLK).
|
||||
*/
|
||||
#define TCLK2NS_SCALE_FACTOR 8
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
|
||||
static unsigned long tclk2ns_scale;
|
||||
|
||||
unsigned long long sched_clock(void)
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL));
|
||||
return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR;
|
||||
u32 cyc = 0xffffffff - readl(TIMER0_VAL);
|
||||
return cyc_to_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
static struct timer_list cnt32_to_63_keepwarm_timer;
|
||||
|
||||
static void cnt32_to_63_keepwarm(unsigned long data)
|
||||
static void notrace orion_update_sched_clock(void)
|
||||
{
|
||||
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
|
||||
(void) sched_clock();
|
||||
u32 cyc = 0xffffffff - readl(TIMER0_VAL);
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
static void __init setup_sched_clock(unsigned long tclk)
|
||||
{
|
||||
unsigned long long v;
|
||||
unsigned long data;
|
||||
|
||||
v = NSEC_PER_SEC;
|
||||
v <<= TCLK2NS_SCALE_FACTOR;
|
||||
v += tclk/2;
|
||||
do_div(v, tclk);
|
||||
/*
|
||||
* We want an even value to automatically clear the top bit
|
||||
* returned by cnt32_to_63() without an additional run time
|
||||
* instruction. So if the LSB is 1 then round it up.
|
||||
*/
|
||||
if (v & 1)
|
||||
v++;
|
||||
tclk2ns_scale = v;
|
||||
|
||||
data = (0xffffffffUL / tclk / 2 - 2) * HZ;
|
||||
setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data);
|
||||
mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data));
|
||||
init_sched_clock(&cd, orion_update_sched_clock, 32, tclk);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -102,7 +76,6 @@ static cycle_t orion_clksrc_read(struct clocksource *cs)
|
||||
|
||||
static struct clocksource orion_clksrc = {
|
||||
.name = "orion_clocksource",
|
||||
.shift = 20,
|
||||
.rating = 300,
|
||||
.read = orion_clksrc_read,
|
||||
.mask = CLOCKSOURCE_MASK(32),
|
||||
@ -245,8 +218,7 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk)
|
||||
writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK);
|
||||
u = readl(TIMER_CTRL);
|
||||
writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL);
|
||||
orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift);
|
||||
clocksource_register(&orion_clksrc);
|
||||
clocksource_register_hz(&orion_clksrc, tclk);
|
||||
|
||||
/*
|
||||
* Setup clockevent timer (interrupt-driven.)
|
||||
|
@ -81,8 +81,6 @@ static struct clocksource clksrc = {
|
||||
.rating = 200, /* its a pretty decent clock */
|
||||
.read = clocksource_read_cycles,
|
||||
.mask = 0xFFFF, /* 16 bits */
|
||||
.mult = 0, /* to be computed */
|
||||
.shift = 0, /* to be computed */
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -105,10 +103,8 @@ static void spear_clocksource_init(void)
|
||||
val |= CTRL_ENABLE ;
|
||||
writew(val, gpt_base + CR(CLKSRC));
|
||||
|
||||
clocksource_calc_mult_shift(&clksrc, tick_rate, SPEAR_MIN_RANGE);
|
||||
|
||||
/* register the clocksource */
|
||||
clocksource_register(&clksrc);
|
||||
clocksource_register_hz(&clksrc, tick_rate);
|
||||
}
|
||||
|
||||
static struct clock_event_device clkevt = {
|
||||
|
@ -89,7 +89,6 @@ static struct clocksource cksrc_stmp3xxx = {
|
||||
.rating = 250,
|
||||
.read = stmp3xxx_clock_read,
|
||||
.mask = CLOCKSOURCE_MASK(16),
|
||||
.shift = 10,
|
||||
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
||||
};
|
||||
|
||||
@ -106,8 +105,6 @@ static struct irqaction stmp3xxx_timer_irq = {
|
||||
*/
|
||||
static void __init stmp3xxx_init_timer(void)
|
||||
{
|
||||
cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE,
|
||||
cksrc_stmp3xxx.shift);
|
||||
ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC,
|
||||
ckevt_timrot.shift);
|
||||
ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot);
|
||||
@ -140,7 +137,7 @@ static void __init stmp3xxx_init_timer(void)
|
||||
|
||||
setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq);
|
||||
|
||||
clocksource_register(&cksrc_stmp3xxx);
|
||||
clocksource_register_hz(&cksrc_stmp3xxx, CLOCK_TICK_RATE);
|
||||
clockevents_register_device(&ckevt_timrot);
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
obj-y := clock.o
|
||||
obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o
|
||||
obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o
|
||||
ifneq ($(CONFIG_ARCH_INTEGRATOR),y)
|
||||
obj-y += sched-clock.o
|
||||
endif
|
||||
ifeq ($(CONFIG_LEDS_CLASS),y)
|
||||
obj-$(CONFIG_ARCH_REALVIEW) += leds.o
|
||||
obj-$(CONFIG_ARCH_VERSATILE) += leds.o
|
||||
|
6
arch/arm/plat-versatile/include/plat/sched_clock.h
Normal file
6
arch/arm/plat-versatile/include/plat/sched_clock.h
Normal file
@ -0,0 +1,6 @@
|
||||
#ifndef ARM_PLAT_SCHED_CLOCK_H
|
||||
#define ARM_PLAT_SCHED_CLOCK_H
|
||||
|
||||
void versatile_sched_clock_init(void __iomem *, unsigned long);
|
||||
|
||||
#endif
|
@ -18,36 +18,41 @@
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
#include <linux/cnt32_to_63.h>
|
||||
#include <linux/io.h>
|
||||
#include <asm/div64.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <mach/hardware.h>
|
||||
#include <mach/platform.h>
|
||||
#include <asm/sched_clock.h>
|
||||
#include <plat/sched_clock.h>
|
||||
|
||||
#ifdef VERSATILE_SYS_BASE
|
||||
#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET)
|
||||
#endif
|
||||
|
||||
#ifdef REALVIEW_SYS_BASE
|
||||
#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET)
|
||||
#endif
|
||||
static DEFINE_CLOCK_DATA(cd);
|
||||
static void __iomem *ctr;
|
||||
|
||||
/*
|
||||
* This is the Realview and Versatile sched_clock implementation. This
|
||||
* has a resolution of 41.7ns, and a maximum value of about 35583 days.
|
||||
*
|
||||
* The return value is guaranteed to be monotonic in that range as
|
||||
* long as there is always less than 89 seconds between successive
|
||||
* calls to this function.
|
||||
* Constants generated by clocks_calc_mult_shift(m, s, 24MHz, NSEC_PER_SEC, 60).
|
||||
* This gives a resolution of about 41ns and a wrap period of about 178s.
|
||||
*/
|
||||
unsigned long long sched_clock(void)
|
||||
#define SC_MULT 2796202667u
|
||||
#define SC_SHIFT 26
|
||||
|
||||
unsigned long long notrace sched_clock(void)
|
||||
{
|
||||
unsigned long long v = cnt32_to_63(readl(REFCOUNTER));
|
||||
|
||||
/* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */
|
||||
v *= 125<<1;
|
||||
do_div(v, 3<<1);
|
||||
|
||||
return v;
|
||||
if (ctr) {
|
||||
u32 cyc = readl(ctr);
|
||||
return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0,
|
||||
SC_MULT, SC_SHIFT);
|
||||
} else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void notrace versatile_update_sched_clock(void)
|
||||
{
|
||||
u32 cyc = readl(ctr);
|
||||
update_sched_clock(&cd, cyc, (u32)~0);
|
||||
}
|
||||
|
||||
void __init versatile_sched_clock_init(void __iomem *reg, unsigned long rate)
|
||||
{
|
||||
ctr = reg;
|
||||
init_fixed_sched_clock(&cd, versatile_update_sched_clock,
|
||||
32, rate, SC_MULT, SC_SHIFT);
|
||||
}
|
||||
|
@ -152,6 +152,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec)
|
||||
*/
|
||||
for (sft = 32; sft > 0; sft--) {
|
||||
tmp = (u64) to << sft;
|
||||
tmp += from / 2;
|
||||
do_div(tmp, from);
|
||||
if ((tmp >> sftacc) == 0)
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user