From ec763f0de879fa1a64b7641098271107f5e32c67 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Fri, 19 Nov 2010 21:11:02 +0530 Subject: [PATCH 01/54] ARM: versatile: ensure sched_clock() is notrace Include sched.h to ensure sched_clock() has the notrace annotation. Signed-off-by: Rabin Vincent --- arch/arm/plat-versatile/sched-clock.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index 9768cf7e83d7..9696ddc238c9 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -20,6 +20,7 @@ */ #include #include +#include #include #include From 61b5cb1c3bff8875d2fd289c7b6ac344f95261fa Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 20:51:58 +0530 Subject: [PATCH 02/54] ARM: place C irq handlers in IRQ_ENTRY for ftrace When FUNCTION_GRAPH_TRACER is enabled, place do_IRQ() and friends in the IRQ_ENTRY section so that the irq-related features of the function graph tracer work. Signed-off-by: Rabin Vincent --- arch/arm/include/asm/system.h | 5 +++++ arch/arm/include/asm/traps.h | 23 +++++++++++++++++++++-- arch/arm/kernel/irq.c | 4 +++- arch/arm/kernel/smp.c | 5 +++-- arch/arm/kernel/vmlinux.lds.S | 1 + 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h index 1120f18a6b17..ec4327a4653d 100644 --- a/arch/arm/include/asm/system.h +++ b/arch/arm/include/asm/system.h @@ -63,6 +63,11 @@ #include #define __exception __attribute__((section(".exception.text"))) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +#define __exception_irq_entry __irq_entry +#else +#define __exception_irq_entry __exception +#endif struct thread_info; struct task_struct; diff --git a/arch/arm/include/asm/traps.h b/arch/arm/include/asm/traps.h index 491960bf4260..124475afb007 100644 --- a/arch/arm/include/asm/traps.h +++ b/arch/arm/include/asm/traps.h @@ -15,13 +15,32 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +static inline int __in_irqentry_text(unsigned long ptr) +{ + extern char __irqentry_text_start[]; + extern char __irqentry_text_end[]; + + return ptr >= (unsigned long)&__irqentry_text_start && + ptr < (unsigned long)&__irqentry_text_end; +} +#else +static inline int __in_irqentry_text(unsigned long ptr) +{ + return 0; +} +#endif + static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; extern char __exception_text_end[]; + int in; - return ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; + in = ptr >= (unsigned long)&__exception_text_start && + ptr < (unsigned long)&__exception_text_end; + + return in ? : __in_irqentry_text(ptr); } extern void __init early_trap_init(void); diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692a..6d616333340f 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -105,7 +106,8 @@ unlock: * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c1959590252..bbca89872c18 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -457,7 +458,7 @@ static void ipi_timer(void) } #ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception do_local_timer(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); @@ -544,7 +545,7 @@ static void ipi_cpu_stop(unsigned int cpu) * * Bit 0 - Inter-processor function call */ -asmlinkage void __exception do_IPI(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct ipi_data *ipi = &per_cpu(ipi_data, cpu); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cead8893b46b..897c1a8f1694 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS __exception_text_start = .; *(.exception.text) __exception_text_end = .; + IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT From d3b9dc9dd2b994f396741f7086ffe7a48bacb165 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 17:39:47 +0530 Subject: [PATCH 03/54] ARM: ftrace: use gas macros to avoid code duplication Use assembler macros to avoid copy/pasting code between the implementations of the two variants of the mcount call. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 166 ++++++++++++++++++--------------- 1 file changed, 90 insertions(+), 76 deletions(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8bfa98757cd2..fe1d5862b19f 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -141,98 +141,112 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) - -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] - - .global ftrace_call -ftrace_call: - bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) - -#ifdef CONFIG_OLD_MCOUNT -ENTRY(mcount) - stmdb sp!, {lr} - ldr lr, [fp, #-4] - ldmia sp!, {pc} -ENDPROC(mcount) - -ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(ftrace_caller_old) -#endif - -#else - -ENTRY(__gnu_mcount_nc) - stmdb sp!, {r0-r3, lr} +.macro __mcount suffix + mcount_enter ldr r0, =ftrace_trace_function ldr r2, [r0] adr r0, .Lftrace_stub cmp r0, r2 - bne gnu_trace - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip + bne 1f + mcount_exit -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr +1: mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) + adr lr, BSYM(2f) mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter + + mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function + sub r0, r0, #MCOUNT_INSN_SIZE + + .globl ftrace_call\suffix +ftrace_call\suffix: + bl ftrace_stub + + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT /* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. + * mcount */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] ldmia sp!, {r0-r3, pc} +.endm + +ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE + stmdb sp!, {lr} + ldr lr, [fp, #-4] + ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) + +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller_old) + __ftrace_caller _old +ENDPROC(ftrace_caller_old) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif + +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit + ldmia sp!, {r0-r3, ip, lr} + mov pc, ip +.endm + +ENTRY(__gnu_mcount_nc) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} + mov pc, ip +#else + __mcount +#endif +ENDPROC(__gnu_mcount_nc) + +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) + __ftrace_caller +ENDPROC(ftrace_caller) +#endif + +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit ENTRY(ftrace_stub) .Lftrace_stub: From 376cfa8730c08c0394d0aa1d4a80fd8c9971f323 Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Sat, 9 Oct 2010 22:24:38 +0530 Subject: [PATCH 04/54] ARM: ftrace: function graph tracer support Cc: Tim Bird [rabin@rab.in: rebase on top of latest code, keep code in ftrace.c instead of separate file, check for ftrace_graph_entry also] Signed-off-by: Rabin Vincent --- arch/arm/kernel/Makefile | 3 ++- arch/arm/kernel/entry-common.S | 46 ++++++++++++++++++++++++++++++++++ arch/arm/kernel/ftrace.c | 34 +++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fbb..679851a9f589 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif @@ -33,6 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index fe1d5862b19f..9f1766211668 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -148,6 +148,20 @@ ENDPROC(ret_from_fork) adr r0, .Lftrace_stub cmp r0, r2 bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif + mcount_exit 1: mcount_get_lr r1 @ lr of instrumented func @@ -172,6 +186,15 @@ ftrace_call\suffix: mcount_exit .endm +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) + mov r1, lr @ instrumented routine (func) + sub r1, r1, #MCOUNT_INSN_SIZE + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm + #ifdef CONFIG_OLD_MCOUNT /* * mcount @@ -206,6 +229,12 @@ ENTRY(ftrace_caller_old) ENDPROC(ftrace_caller_old) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit @@ -244,10 +273,27 @@ ENTRY(ftrace_caller) ENDPROC(ftrace_caller) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif + ENTRY(ftrace_stub) .Lftrace_stub: mov pc, lr diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea7..7a702a502871 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -24,6 +24,7 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -193,3 +194,36 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From dd686eb13959e49a1112fd608c124ab711050582 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 6 Nov 2010 23:03:21 +0530 Subject: [PATCH 05/54] ARM: ftrace: graph tracer + dynamic ftrace Support the graph tracer + dynamic ftrace combination on ARM. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 12 ++++++ arch/arm/kernel/ftrace.c | 69 +++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 5 deletions(-) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 9f1766211668..aae802ee12f8 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -183,12 +183,24 @@ ENDPROC(ret_from_fork) ftrace_call\suffix: bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + mcount_exit .endm .macro __ftrace_graph_caller sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) +#else + @ called from __mcount, untouched in lr mov r1, lr @ instrumented routine (func) +#endif sub r1, r1, #MCOUNT_INSN_SIZE mov r2, fp @ frame pointer bl prepare_ftrace_return diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 7a702a502871..c0062ad1e847 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -60,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -84,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; return (second << 16) | first; } #else -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { + unsigned long opcode = 0xea000000; long offset; + if (link) + opcode |= 1 << 24; + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -104,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) offset = (offset >> 2) & 0x00ffffff; - return 0xeb000000 | offset; + return opcode | offset; } #endif +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -226,4 +238,51 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, *parent = old; } } + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From 0e341af835fdf553820a1fa98341b93ab32ce466 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 6 Nov 2010 23:03:53 +0530 Subject: [PATCH 06/54] ARM: ftrace: enable function graph tracer Add the options to enable the function graph tracer on ARM. Function graph tracer support requires frame pointers, so exclude Thumb-2 and also make sure FRAME_POINTER gets enabled when FUNCTION_GRAPH_TRACER is used, since FUNCTION_TRACER doesn't "select FRAME_POINTER" when ARM_UNWIND is used. Therefore, with GCC 4.4.0+, you get plain function tracing without frame pointers, but you'll need them if you want function graph tracing. Acked-by: Catalin Marinas Signed-off-by: Rabin Vincent --- arch/arm/Kconfig | 1 + arch/arm/Kconfig.debug | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 8ae3d48d504c..bd620a481bee 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -14,6 +14,7 @@ config ARM select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) + select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 2fd0b99afc4b..eac62085f5b2 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -23,7 +23,7 @@ config STRICT_DEVMEM config FRAME_POINTER bool depends on !THUMB2_KERNEL - default y if !ARM_UNWIND + default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER help If you say N here, the resulting kernel will be slightly smaller and faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled, From 89c3dedf477da9c8d42d3a63d16875e52f74108b Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Mon, 22 Nov 2010 12:35:41 -0800 Subject: [PATCH 07/54] arm: kconfig: enable SMP for MSM targets This just adds ARCH_MSM_SCORPIONMP to allow SMP selection for MSM. MSM is unique in that it doesn't enable SCU or TWD. Signed-off-by: Daniel Walker --- arch/arm/Kconfig | 9 +++++---- arch/arm/mach-msm/Kconfig | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index db524e75c4a2..1a61017a54c8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1205,10 +1205,11 @@ config SMP depends on EXPERIMENTAL depends on GENERIC_CLOCKEVENTS depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ - MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\ - ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 + MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \ + ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \ + ARCH_MSM_SCORPIONMP select USE_GENERIC_SMP_HELPERS - select HAVE_ARM_SCU + select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -1291,7 +1292,7 @@ config LOCAL_TIMERS bool "Use local timer interrupts" depends on SMP default y - select HAVE_ARM_TWD + select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP help Enable support for local timers on SMP platforms, rather then the legacy IPI broadcast method. Local timers allows the system diff --git a/arch/arm/mach-msm/Kconfig b/arch/arm/mach-msm/Kconfig index dbbcfeb919db..31e5fd63ec9a 100644 --- a/arch/arm/mach-msm/Kconfig +++ b/arch/arm/mach-msm/Kconfig @@ -49,6 +49,8 @@ endchoice config MSM_SOC_REV_A bool +config ARCH_MSM_SCORPIONMP + bool config ARCH_MSM_ARM11 bool From 176bfc44417544724e6df0831a7f576f4a56283d Mon Sep 17 00:00:00 2001 From: Daniel Walker Date: Tue, 7 Sep 2010 13:33:05 -0700 Subject: [PATCH 08/54] arm: kconfig: dis-allow hotplug on MSM MSM doesn't support hotplug, so we prevent it from being selected in Kconfig. Signed-off-by: Daniel Walker --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 1a61017a54c8..c8fa1443e2c1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1284,6 +1284,7 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && HOTPLUG && EXPERIMENTAL + depends on !ARCH_MSM help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. From 84fee97a026ca085f08381054513f9e24689a303 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:13:56 +0000 Subject: [PATCH 09/54] ARM: perf: consolidate common PMU behaviour The functions for mapping PMU events (perf, cache and raw) are common between all PMU types and differ only in the data on which they operate. This patch implements common definitions of these mapping functions and changes the arm_pmu struct to hold pointers to the data which they require. This is in anticipation of separating out the PMU-specific code into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 131 ++++++++++------------------------- 1 file changed, 38 insertions(+), 93 deletions(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492a..c49e1701a2f6 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -84,14 +84,17 @@ struct arm_pmu { irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; int num_events; u64 max_period; }; @@ -136,10 +139,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int armpmu_map_cache_event(u64 config) { @@ -157,7 +156,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -165,6 +164,19 @@ armpmu_map_cache_event(u64 config) return ret; } +static int +armpmu_map_event(u64 config) +{ + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; +} + +static int +armpmu_map_raw_event(u64 config) +{ + return (int)(config & armpmu->raw_event_mask); +} + static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, @@ -458,11 +470,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); + mapping = armpmu_map_event(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); + mapping = armpmu_map_raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -1121,30 +1133,6 @@ armv6pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int -armv6pmu_event_map(int config) -{ - int mapping = armv6_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int -armv6mpcore_pmu_event_map(int config) -{ - int mapping = armv6mpcore_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -armv6pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) @@ -1240,13 +1228,14 @@ static const struct arm_pmu armv6pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, - .event_map = armv6pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -1263,13 +1252,14 @@ static const struct arm_pmu armv6mpcore_pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, - .event_map = armv6mpcore_pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2093,27 +2083,6 @@ static void armv7pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int armv7_a8_pmu_event_map(int config) -{ - int mapping = armv7_a8_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int armv7_a9_pmu_event_map(int config) -{ - int mapping = armv7_a9_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 armv7pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { @@ -2144,12 +2113,12 @@ static struct arm_pmu armv7pmu = { .handle_irq = armv7pmu_handle_irq, .enable = armv7pmu_enable_event, .disable = armv7pmu_disable_event, - .raw_event = armv7pmu_raw_event, .read_counter = armv7pmu_read_counter, .write_counter = armv7pmu_write_counter, .get_event_idx = armv7pmu_get_event_idx, .start = armv7pmu_start, .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, .max_period = (1LLU << 32) - 1, }; @@ -2318,21 +2287,6 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) #define XSCALE_PMU_CNT64 0x008 -static inline int -xscalepmu_event_map(int config) -{ - int mapping = xscale_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -xscalepmu_raw_event(u64 config) -{ - return config & 0xff; -} - #define XSCALE1_OVERFLOWED_MASK 0x700 #define XSCALE1_CCOUNT_OVERFLOW 0x400 #define XSCALE1_COUNT0_OVERFLOW 0x100 @@ -2598,13 +2552,14 @@ static const struct arm_pmu xscale1pmu = { .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale1pmu_read_counter, .write_counter = xscale1pmu_write_counter, .get_event_idx = xscale1pmu_get_event_idx, .start = xscale1pmu_start, .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2953,13 +2908,14 @@ static const struct arm_pmu xscale2pmu = { .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale2pmu_read_counter, .write_counter = xscale2pmu_write_counter, .get_event_idx = xscale2pmu_get_event_idx, .start = xscale2pmu_start, .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 5, .max_period = (1LLU << 32) - 1, }; @@ -2978,20 +2934,14 @@ init_hw_perf_events(void) case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ armpmu = &armv6pmu; - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, - sizeof(armv6_perf_cache_map)); break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; - memcpy(armpmu_perf_cache_map, - armv6mpcore_perf_cache_map, - sizeof(armv6mpcore_perf_cache_map)); break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, - sizeof(armv7_a8_perf_cache_map)); - armv7pmu.event_map = armv7_a8_pmu_event_map; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3000,9 +2950,8 @@ init_hw_perf_events(void) break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, - sizeof(armv7_a9_perf_cache_map)); - armv7pmu.event_map = armv7_a9_pmu_event_map; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3016,13 +2965,9 @@ init_hw_perf_events(void) switch (part_number) { case 1: armpmu = &xscale1pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; case 2: armpmu = &xscale2pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; } } From 59a98a1e56edea4d7d9c5f4ce9d50e271a04993c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:18:36 +0000 Subject: [PATCH 10/54] ARM: perf: avoid exposing internal stop function for v6 PMU Unlike other pmu functions, armv6pmu_pmu_stop is not declared static. This patch adds the missing keyword. Acked-by: Jamie Iles Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index c49e1701a2f6..35319b8e4d4b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1121,7 +1121,7 @@ armv6pmu_start(void) spin_unlock_irqrestore(&pmu_lock, flags); } -void +static void armv6pmu_stop(void) { unsigned long flags, val; From 3cb314bae2191b432a7e898abf865db880f6d07d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:37:46 +0000 Subject: [PATCH 11/54] ARM: perf: add _init() functions to PMUs In preparation for separating the PMU-specific code, this patch adds self-contained init functions to each PMU, therefore removing any PMU-specific knowledge from the PMU-agnostic init_hw_perf_events function. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 65 +++++++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 35319b8e4d4b..acc4e91dd300 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1240,6 +1240,11 @@ static const struct arm_pmu armv6pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + /* * ARMv6mpcore is almost identical to single core ARMv6 with the exception * that some of the events have different enumerations and that there is no @@ -1264,6 +1269,11 @@ static const struct arm_pmu armv6mpcore_pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} + /* * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. * @@ -2136,6 +2146,25 @@ static u32 __init armv7_reset_read_pmnc(void) return nb_cnt + 1; } +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + + /* * ARMv5 [xscale] Performance counter handling code. * @@ -2564,6 +2593,11 @@ static const struct arm_pmu xscale1pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + #define XSCALE2_OVERFLOWED_MASK 0x01f #define XSCALE2_CCOUNT_OVERFLOW 0x001 #define XSCALE2_COUNT0_OVERFLOW 0x002 @@ -2920,6 +2954,11 @@ static const struct arm_pmu xscale2pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} + static int __init init_hw_perf_events(void) { @@ -2933,30 +2972,16 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - armpmu = &armv6pmu; + armpmu = armv6pmu_init(); break; case 0xB020: /* ARM11mpcore */ - armpmu = &armv6mpcore_pmu; + armpmu = armv6mpcore_pmu_init(); break; case 0xC080: /* Cortex-A8 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a8_pmu_init(); break; case 0xC090: /* Cortex-A9 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a9_pmu_init(); break; } /* Intel CPUs [xscale]. */ @@ -2964,10 +2989,10 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - armpmu = &xscale1pmu; + armpmu = xscale1pmu_init(); break; case 2: - armpmu = &xscale2pmu; + armpmu = xscale2pmu_init(); break; } } From 629948310e4270e9b32c37b4a65a8cd5d6ebf38a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 18:45:27 +0000 Subject: [PATCH 12/54] ARM: perf: encode PMU name in arm_pmu structure Currently, perf uses the PMU ID as an index into a string table to look up the name of a given PMU. This patch encodes the name of a PMU directly into the arm_pmu structure so that PMU-specific code can be factored out into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index acc4e91dd300..ac4e9a1ed80b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -69,18 +69,9 @@ struct cpu_hw_events { }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; - struct arm_pmu { enum arm_perf_pmu_ids id; + const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); @@ -1225,6 +1216,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, static const struct arm_pmu armv6pmu = { .id = ARM_PERF_PMU_ID_V6, + .name = "v6", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, @@ -1254,6 +1246,7 @@ const struct arm_pmu *__init armv6pmu_init(void) */ static const struct arm_pmu armv6mpcore_pmu = { .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, @@ -2149,6 +2142,7 @@ static u32 __init armv7_reset_read_pmnc(void) const struct arm_pmu *__init armv7_a8_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; armv7pmu.cache_map = &armv7_a8_perf_cache_map; armv7pmu.event_map = &armv7_a8_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2158,6 +2152,7 @@ const struct arm_pmu *__init armv7_a8_pmu_init(void) const struct arm_pmu *__init armv7_a9_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; armv7pmu.cache_map = &armv7_a9_perf_cache_map; armv7pmu.event_map = &armv7_a9_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2578,6 +2573,7 @@ xscale1pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale1pmu = { .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, @@ -2939,6 +2935,7 @@ xscale2pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale2pmu = { .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, @@ -2999,7 +2996,7 @@ init_hw_perf_events(void) if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - arm_pmu_names[armpmu->id], armpmu->num_events); + armpmu->name, armpmu->num_events); } else { pr_info("no hardware support available\n"); } From 43eab87828fee65f89f4088736b2b7a187390a2f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 19:04:32 +0000 Subject: [PATCH 13/54] ARM: perf: separate PMU backends into multiple files The ARM perf_event.c file contains all PMU backends and, as new PMUs are introduced, will continue to grow. This patch follows the example of x86 and splits the PMU implementations into separate files which are then #included back into the main file. Compile-time guards are added to each PMU file to avoid compiling in code that is not relevant for the version of the architecture which we are targetting. Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2357 +-------------------------- arch/arm/kernel/perf_event_v6.c | 672 ++++++++ arch/arm/kernel/perf_event_v7.c | 906 ++++++++++ arch/arm/kernel/perf_event_xscale.c | 807 +++++++++ 4 files changed, 2390 insertions(+), 2352 deletions(-) create mode 100644 arch/arm/kernel/perf_event_v6.c create mode 100644 arch/arm/kernel/perf_event_v7.c create mode 100644 arch/arm/kernel/perf_event_xscale.c diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ac4e9a1ed80b..421a4bb88fed 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -606,2355 +604,10 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { - return ARMV6_COUNTER1; - } - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { - return ARMV6_COUNTER0; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, flags, evt = 0; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Unlike UP ARMv6, we don't have a way of stopping the counters. We - * simply disable the interrupt reporting. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static const struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, - .name = "v6", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6_perf_cache_map, - .event_map = &armv6_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init armv6pmu_init(void) -{ - return &armv6pmu; -} - -/* - * ARMv6mpcore is almost identical to single core ARMv6 with the exception - * that some of the events have different enumerations and that there is no - * *hack* to stop the programmable counters. To stop the counters we simply - * disable the interrupt reporting and update the event. When unthrottling we - * reset the period and enable the interrupt reporting. - */ -static const struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, - .name = "v6mpcore", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6mpcore_perf_cache_map, - .event_map = &armv6mpcore_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init armv6mpcore_pmu_init(void) -{ - return &armv6mpcore_pmu; -} - -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -/* Common ARMv7 event types */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_IFETCH_MISS = 0x01, - ARMV7_PERFCTR_ITLB_MISS = 0x02, - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_DREAD = 0x06, - ARMV7_PERFCTR_DWRITE = 0x07, - - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; - -/* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, - ARMV7_PERFCTR_L2_ACCESS = 0x43, - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, - ARMV7_PERFCTR_L2_NEON = 0x4E, - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, - ARMV7_PERFCTR_L1_INST = 0x50, - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, - ARMV7_PERFCTR_OP_EXECUTED = 0x55, - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, - ARMV7_PERFCTR_CYCLES_INST = 0x57, - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, - - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, - ARMV7_PERFCTR_PMU_EVENTS = 0x72, -}; - -/* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, - - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, - - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, - ARMV7_PERFCTR_DATA_EVICTION = 0x65, - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, - - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, - - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, - - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, - - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, - - ARMV7_PERFCTR_ISB_INST = 0x90, - ARMV7_PERFCTR_DSB_INST = 0x91, - ARMV7_PERFCTR_DMB_INST = 0x92, - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, - - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 -}; - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Perf Events counters - */ -enum armv7_counters { - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ - ARMV7_COUNTER0 = 2, /* First event counter */ -}; - -/* - * The cycle counter is ARMV7_CYCLE_COUNTER. - * The first event counter is ARMV7_COUNTER0. - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). - */ -#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * Available counters - */ -#define ARMV7_CNT0 0 /* First event counter */ -#define ARMV7_CCNT 31 /* Cycle counter */ - -/* Perf Event to low level counters mapping */ -#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) - -/* - * CNTENS: counters enable reg - */ -#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) - -/* - * CNTENC: counters disable reg - */ -#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) - -/* - * INTENS: counters overflow interrupt enable reg - */ -#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENS_C (1 << ARMV7_CCNT) - -/* - * INTENC: counters overflow interrupt disable reg - */ -#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENC_C (1 << ARMV7_CCNT) - -/* - * EVTSEL: Event selection reg - */ -#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ - -/* - * SELECT: Counter selection reg - */ -#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_FLAG_C (1 << ARMV7_CCNT) -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -static inline unsigned long armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(unsigned long val) -{ - val &= ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, - enum armv7_counters counter) -{ - int ret = 0; - - if (counter == ARMV7_CYCLE_COUNTER) - ret = pmnc & ARMV7_FLAG_C; - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) - ret = pmnc & ARMV7_FLAG_P(counter); - else - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), counter); - - return ret; -} - -static inline int armv7_pmnc_select_counter(unsigned int idx) -{ - u32 val; - - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { - pr_err("CPU%u selecting wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); - - return idx; -} - -static inline u32 armv7pmu_read_counter(int idx) -{ - unsigned long value = 0; - - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r" (value)); - } else - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - - return value; -} - -static inline void armv7pmu_write_counter(int idx, u32 value) -{ - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r" (value)); - } else - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); -} - -static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) -{ - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } -} - -static inline u32 armv7_pmnc_enable_counter(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENS_C; - else - val = ARMV7_CNTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_counter(unsigned int idx) -{ - u32 val; - - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENC_C; - else - val = ARMV7_CNTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_enable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENS_C; - else - val = ARMV7_INTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENC_C; - else - val = ARMV7_INTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(void) -{ - u32 val; - unsigned int cnt; - - printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); - - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - } -} -#endif - -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't need to set the event if it's a cycle count - */ - if (idx != ARMV7_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Disable counter and interrupt - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_stop(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx; - - /* Always place a cycle counter into the cycle counter. */ - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .raw_event_mask = 0xFF, - .max_period = (1LLU << 32) - 1, -}; - -static u32 __init armv7_reset_read_pmnc(void) -{ - u32 nb_cnt; - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); - - /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; -} - -const struct arm_pmu *__init armv7_a8_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.name = "ARMv7 Cortex-A8"; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} - -const struct arm_pmu *__init armv7_a9_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.name = "ARMv7 Cortex-A9"; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} - - -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Based on xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 1, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - if (XSCALE_PERFCTR_CCNT == event->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { - return XSCALE_COUNTER1; - } - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { - return XSCALE_COUNTER0; - } - - return -EAGAIN; - } -} - -static void -xscale1pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale1pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale1pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, - .name = "xscale1", - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init xscale1pmu_init(void) -{ - return &xscale1pmu; -} - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void -xscale2pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale2pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale2pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, - .name = "xscale2", - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; - -const struct arm_pmu *__init xscale2pmu_init(void) -{ - return &xscale2pmu; -} +/* Include the PMU-specific implementations. */ +#include "perf_event_xscale.c" +#include "perf_event_v6.c" +#include "perf_event_v7.c" static int __init init_hw_perf_events(void) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c new file mode 100644 index 000000000000..7aeb07da9076 --- /dev/null +++ b/arch/arm/kernel/perf_event_v6.c @@ -0,0 +1,672 @@ +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#ifdef CONFIG_CPU_V6 +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 1, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +enum armv6mpcore_perf_types { + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u32 +armv6pmu_read_counter(int counter) +{ + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void +armv6pmu_write_counter(int counter, + u32 value) +{ + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +void +armv6pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t +armv6pmu_handle_irq(int irq_num, + void *dev) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void +armv6pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void +armv6pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, flags, evt = 0; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Unlike UP ARMv6, we don't have a way of stopping the counters. We + * simply disable the interrupt reporting. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static const struct arm_pmu armv6pmu = { + .id = ARM_PERF_PMU_ID_V6, + .name = "v6", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + +/* + * ARMv6mpcore is almost identical to single core ARMv6 with the exception + * that some of the events have different enumerations and that there is no + * *hack* to stop the programmable counters. To stop the counters we simply + * disable the interrupt reporting and update the event. When unthrottling we + * reset the period and enable the interrupt reporting. + */ +static const struct arm_pmu armv6mpcore_pmu = { + .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6mpcore_pmu_disable_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} +#else +const struct arm_pmu *__init armv6pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V6 */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c new file mode 100644 index 000000000000..4d0423969df9 --- /dev/null +++ b/arch/arm/kernel/perf_event_v7.c @@ -0,0 +1,906 @@ +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#ifdef CONFIG_CPU_V7 +/* Common ARMv7 event types */ +enum armv7_perf_types { + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV7_PERFCTR_IFETCH_MISS = 0x01, + ARMV7_PERFCTR_ITLB_MISS = 0x02, + ARMV7_PERFCTR_DCACHE_REFILL = 0x03, + ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, + ARMV7_PERFCTR_DTLB_REFILL = 0x05, + ARMV7_PERFCTR_DREAD = 0x06, + ARMV7_PERFCTR_DWRITE = 0x07, + + ARMV7_PERFCTR_EXC_TAKEN = 0x09, + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV7_PERFCTR_CID_WRITE = 0x0B, + /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ + ARMV7_PERFCTR_PC_WRITE = 0x0C, + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, + + ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, + + ARMV7_PERFCTR_CPU_CYCLES = 0xFF +}; + +/* ARMv7 Cortex-A8 specific event types */ +enum armv7_a8_perf_types { + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, + + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, + + ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, + ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, + ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, + ARMV7_PERFCTR_L2_ACCESS = 0x43, + ARMV7_PERFCTR_L2_CACH_MISS = 0x44, + ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, + ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, + ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, + ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, + ARMV7_PERFCTR_L1_DATA_MISS = 0x49, + ARMV7_PERFCTR_L1_INST_MISS = 0x4A, + ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, + ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, + ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, + ARMV7_PERFCTR_L2_NEON = 0x4E, + ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, + ARMV7_PERFCTR_L1_INST = 0x50, + ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, + ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, + ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, + ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, + ARMV7_PERFCTR_OP_EXECUTED = 0x55, + ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, + ARMV7_PERFCTR_CYCLES_INST = 0x57, + ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, + ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, + ARMV7_PERFCTR_NEON_CYCLES = 0x5A, + + ARMV7_PERFCTR_PMU0_EVENTS = 0x70, + ARMV7_PERFCTR_PMU1_EVENTS = 0x71, + ARMV7_PERFCTR_PMU_EVENTS = 0x72, +}; + +/* ARMv7 Cortex-A9 specific event types */ +enum armv7_a9_perf_types { + ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, + ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, + ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, + + ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, + ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, + + ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, + ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, + ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, + ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, + ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, + ARMV7_PERFCTR_DATA_EVICTION = 0x65, + ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, + ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, + + ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, + + ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, + ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, + ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, + ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, + ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, + + ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, + ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, + ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, + ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, + ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, + ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, + ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, + + ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, + ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, + + ARMV7_PERFCTR_ISB_INST = 0x90, + ARMV7_PERFCTR_DSB_INST = 0x91, + ARMV7_PERFCTR_DMB_INST = 0x92, + ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, + + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, + ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, + ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, + ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, + ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 +}; + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events counters + */ +enum armv7_counters { + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ + ARMV7_COUNTER0 = 2, /* First event counter */ +}; + +/* + * The cycle counter is ARMV7_CYCLE_COUNTER. + * The first event counter is ARMV7_COUNTER0. + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). + */ +#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * Available counters + */ +#define ARMV7_CNT0 0 /* First event counter */ +#define ARMV7_CCNT 31 /* Cycle counter */ + +/* Perf Event to low level counters mapping */ +#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) + +/* + * CNTENS: counters enable reg + */ +#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENS_C (1 << ARMV7_CCNT) + +/* + * INTENC: counters overflow interrupt disable reg + */ +#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENC_C (1 << ARMV7_CCNT) + +/* + * EVTSEL: Event selection reg + */ +#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ + +/* + * SELECT: Counter selection reg + */ +#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_FLAG_C (1 << ARMV7_CCNT) +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +static inline unsigned long armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(unsigned long val) +{ + val &= ARMV7_PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, + enum armv7_counters counter) +{ + int ret = 0; + + if (counter == ARMV7_CYCLE_COUNTER) + ret = pmnc & ARMV7_FLAG_C; + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) + ret = pmnc & ARMV7_FLAG_P(counter); + else + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), counter); + + return ret; +} + +static inline int armv7_pmnc_select_counter(unsigned int idx) +{ + u32 val; + + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { + pr_err("CPU%u selecting wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return idx; +} + +static inline u32 armv7pmu_read_counter(int idx) +{ + unsigned long value = 0; + + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r" (value)); + } else + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + + return value; +} + +static inline void armv7pmu_write_counter(int idx, u32 value) +{ + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r" (value)); + } else + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); +} + +static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) +{ + if (armv7_pmnc_select_counter(idx) == idx) { + val &= ARMV7_EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENS_C; + else + val = ARMV7_CNTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int idx) +{ + u32 val; + + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENC_C; + else + val = ARMV7_CNTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_enable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENS_C; + else + val = ARMV7_INTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENC_C; + else + val = ARMV7_INTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(void) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + } +} +#endif + +void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't need to set the event if it's a cycle count + */ + if (idx != ARMV7_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Disable counter and interrupt + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_stop(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + + /* Always place a cycle counter into the cycle counter. */ + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static struct arm_pmu armv7pmu = { + .handle_irq = armv7pmu_handle_irq, + .enable = armv7pmu_enable_event, + .disable = armv7pmu_disable_event, + .read_counter = armv7pmu_read_counter, + .write_counter = armv7pmu_write_counter, + .get_event_idx = armv7pmu_get_event_idx, + .start = armv7pmu_start, + .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv7_reset_read_pmnc(void) +{ + u32 nb_cnt; + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} +#else +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c new file mode 100644 index 000000000000..4e9592789d40 --- /dev/null +++ b/arch/arm/kernel/perf_event_xscale.c @@ -0,0 +1,807 @@ +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#ifdef CONFIG_CPU_XSCALE +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 1, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + if (XSCALE_PERFCTR_CCNT == event->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void +xscale1pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale1pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale1pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale1pmu = { + .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", + .handle_irq = xscale1pmu_handle_irq, + .enable = xscale1pmu_enable_event, + .disable = xscale1pmu_disable_event, + .read_counter = xscale1pmu_read_counter, + .write_counter = xscale1pmu_write_counter, + .get_event_idx = xscale1pmu_get_event_idx, + .start = xscale1pmu_start, + .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void +xscale2pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale2pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale2pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale2pmu = { + .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", + .handle_irq = xscale2pmu_handle_irq, + .enable = xscale2pmu_enable_event, + .disable = xscale2pmu_disable_event, + .read_counter = xscale2pmu_read_counter, + .write_counter = xscale2pmu_write_counter, + .get_event_idx = xscale2pmu_get_event_idx, + .start = xscale2pmu_start, + .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, + .num_events = 5, + .max_period = (1LLU << 32) - 1, +}; + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} +#else +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return NULL; +} + +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return NULL; +} +#endif /* CONFIG_CPU_XSCALE */ From 40cc52440073903f3b7477a3391c4ca0761445d0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sun, 19 Dec 2010 15:43:34 +0000 Subject: [PATCH 14/54] ARM: clockevents: fix IOP clock events initialization Ensure that no interrupt is pending before registering the clock event device, and properly initialize the periodic tick in the ->set_mode callback. Signed-off-by: Russell King --- arch/arm/plat-iop/time.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 85d3e55ca4a9..d615e6fbe502 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -87,6 +87,7 @@ static void iop_set_mode(enum clock_event_mode mode, case CLOCK_EVT_MODE_PERIODIC: write_tmr0(tmr & ~IOP_TMR_EN); write_tcr0(ticks_per_jiffy - 1); + write_trr0(ticks_per_jiffy - 1); tmr |= (IOP_TMR_RELOAD | IOP_TMR_EN); break; case CLOCK_EVT_MODE_ONESHOT: @@ -152,6 +153,7 @@ void __init iop_init_time(unsigned long tick_rate) * Set up interrupting clockevent timer 0. */ write_tmr0(timer_ctl & ~IOP_TMR_EN); + write_tisr(1); setup_irq(IRQ_IOP_TIMER0, &iop_timer_irq); clockevents_calc_mult_shift(&iop_clockevent, tick_rate, IOP_MIN_RANGE); @@ -161,9 +163,6 @@ void __init iop_init_time(unsigned long tick_rate) clockevent_delta2ns(0xf, &iop_clockevent); iop_clockevent.cpumask = cpumask_of(0); clockevents_register_device(&iop_clockevent); - write_trr0(ticks_per_jiffy - 1); - write_tcr0(ticks_per_jiffy - 1); - write_tmr0(timer_ctl); /* * Set up free-running clocksource timer 1. From 132b16325fae0742a02075894af16197e83febe8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:14:55 +0000 Subject: [PATCH 15/54] ARM: AT91: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Jean-Christophe PLAGNIOL-VILLARD Signed-off-by: Russell King --- arch/arm/mach-at91/at91rm9200_time.c | 4 +--- arch/arm/mach-at91/at91sam926x_time.c | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/arm/mach-at91/at91rm9200_time.c b/arch/arm/mach-at91/at91rm9200_time.c index 2500f41d8d2d..1dd69c85dfec 100644 --- a/arch/arm/mach-at91/at91rm9200_time.c +++ b/arch/arm/mach-at91/at91rm9200_time.c @@ -101,7 +101,6 @@ static struct clocksource clk32k = { .rating = 150, .read = read_clk32k, .mask = CLOCKSOURCE_MASK(20), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -201,8 +200,7 @@ void __init at91rm9200_timer_init(void) clockevents_register_device(&clkevt); /* register clocksource */ - clk32k.mult = clocksource_hz2mult(AT91_SLOW_CLOCK, clk32k.shift); - clocksource_register(&clk32k); + clocksource_register_hz(&clk32k, AT91_SLOW_CLOCK); } struct sys_timer at91rm9200_timer = { diff --git a/arch/arm/mach-at91/at91sam926x_time.c b/arch/arm/mach-at91/at91sam926x_time.c index 608a63240b64..4ba85499fa97 100644 --- a/arch/arm/mach-at91/at91sam926x_time.c +++ b/arch/arm/mach-at91/at91sam926x_time.c @@ -51,7 +51,6 @@ static struct clocksource pit_clk = { .name = "pit", .rating = 175, .read = read_pit_clk, - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -163,10 +162,9 @@ static void __init at91sam926x_pit_init(void) * Register clocksource. The high order bits of PIV are unused, * so this isn't a 32-bit counter unless we get clockevent irqs. */ - pit_clk.mult = clocksource_hz2mult(pit_rate, pit_clk.shift); bits = 12 /* PICNT */ + ilog2(pit_cycle) /* PIV */; pit_clk.mask = CLOCKSOURCE_MASK(bits); - clocksource_register(&pit_clk); + clocksource_register_hz(&pit_clk, pit_rate); /* Set up irq handler */ setup_irq(AT91_ID_SYS, &at91sam926x_pit_irq); From 6eda51192fb1c767f792e92c67b9a4fd73c1fcba Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:16:39 +0000 Subject: [PATCH 16/54] ARM: bcmring: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-By: Scott Branden Acked-By: Jiandong Zheng Signed-off-by: Russell King --- arch/arm/mach-bcmring/core.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/arm/mach-bcmring/core.c b/arch/arm/mach-bcmring/core.c index d3f959e92b2d..b91b1b0fdff3 100644 --- a/arch/arm/mach-bcmring/core.c +++ b/arch/arm/mach-bcmring/core.c @@ -294,7 +294,6 @@ static struct clocksource clocksource_bcmring_timer1 = { .rating = 200, .read = bcmring_get_cycles_timer1, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -303,7 +302,6 @@ static struct clocksource clocksource_bcmring_timer3 = { .rating = 100, .read = bcmring_get_cycles_timer3, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -316,10 +314,8 @@ static int __init bcmring_clocksource_init(void) writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, TIMER1_VA_BASE + TIMER_CTRL); - clocksource_bcmring_timer1.mult = - clocksource_khz2mult(TIMER1_FREQUENCY_MHZ * 1000, - clocksource_bcmring_timer1.shift); - clocksource_register(&clocksource_bcmring_timer1); + clocksource_register_khz(&clocksource_bcmring_timer1, + TIMER1_FREQUENCY_MHZ * 1000); /* setup timer3 as free-running clocksource */ writel(0, TIMER3_VA_BASE + TIMER_CTRL); @@ -328,10 +324,8 @@ static int __init bcmring_clocksource_init(void) writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, TIMER3_VA_BASE + TIMER_CTRL); - clocksource_bcmring_timer3.mult = - clocksource_khz2mult(TIMER3_FREQUENCY_KHZ, - clocksource_bcmring_timer3.shift); - clocksource_register(&clocksource_bcmring_timer3); + clocksource_register_khz(&clocksource_bcmring_timer3, + TIMER3_FREQUENCY_KHZ); return 0; } From 7c044be50d4bdc33e039a65956b8a927feecaaa3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:17:12 +0000 Subject: [PATCH 17/54] ARM: davinci: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Russell King --- arch/arm/mach-davinci/time.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/arm/mach-davinci/time.c b/arch/arm/mach-davinci/time.c index 0f21c36e65dd..c1486716de77 100644 --- a/arch/arm/mach-davinci/time.c +++ b/arch/arm/mach-davinci/time.c @@ -276,7 +276,6 @@ static struct clocksource clocksource_davinci = { .rating = 300, .read = read_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -378,10 +377,8 @@ static void __init davinci_timer_init(void) /* setup clocksource */ clocksource_davinci.name = id_to_name[clocksource_id]; - clocksource_davinci.mult = - clocksource_khz2mult(davinci_clock_tick_rate/1000, - clocksource_davinci.shift); - if (clocksource_register(&clocksource_davinci)) + if (clocksource_register_hz(&clocksource_davinci, + davinci_clock_tick_rate)) printk(err, clocksource_davinci.name); /* setup clockevent */ From 08963dab7c6b0ba6a687cb008299eabbbeff7282 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:17:24 +0000 Subject: [PATCH 18/54] ARM: integrator: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/mach-integrator/integrator_ap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/mach-integrator/integrator_ap.c b/arch/arm/mach-integrator/integrator_ap.c index 548208f11179..2774df8021dc 100644 --- a/arch/arm/mach-integrator/integrator_ap.c +++ b/arch/arm/mach-integrator/integrator_ap.c @@ -372,7 +372,6 @@ static struct clocksource clocksource_timersp = { .rating = 200, .read = timersp_read, .mask = CLOCKSOURCE_MASK(16), - .shift = 16, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -390,8 +389,7 @@ static void integrator_clocksource_init(u32 khz) writel(ctrl, base + TIMER_CTRL); writel(0xffff, base + TIMER_LOAD); - cs->mult = clocksource_khz2mult(khz, cs->shift); - clocksource_register(cs); + clocksource_register_khz(cs, khz); } static void __iomem * const clkevt_base = (void __iomem *)TIMER1_VA_BASE; From b7c7c50adcf0d9c901542c5db8d795fe226bb454 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:17:40 +0000 Subject: [PATCH 19/54] ARM: ixp4xx: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Tested-by: Mikael Pettersson Signed-off-by: Russell King --- arch/arm/mach-ixp4xx/common.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 0bce09799d18..82fc003aae8b 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -411,7 +411,6 @@ static struct clocksource clocksource_ixp4xx = { .rating = 200, .read = ixp4xx_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -419,10 +418,7 @@ unsigned long ixp4xx_timer_freq = FREQ; EXPORT_SYMBOL(ixp4xx_timer_freq); static void __init ixp4xx_clocksource_init(void) { - clocksource_ixp4xx.mult = - clocksource_hz2mult(ixp4xx_timer_freq, - clocksource_ixp4xx.shift); - clocksource_register(&clocksource_ixp4xx); + clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq); } /* From 594cbf2f4ceaaacc706eb1302ff139a61092ec6f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:17:52 +0000 Subject: [PATCH 20/54] ARM: lpc32xx: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/mach-lpc32xx/timer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-lpc32xx/timer.c b/arch/arm/mach-lpc32xx/timer.c index 630dd4a74b26..6162ac308c20 100644 --- a/arch/arm/mach-lpc32xx/timer.c +++ b/arch/arm/mach-lpc32xx/timer.c @@ -38,7 +38,6 @@ static cycle_t lpc32xx_clksrc_read(struct clocksource *cs) static struct clocksource lpc32xx_clksrc = { .name = "lpc32xx_clksrc", - .shift = 24, .rating = 300, .read = lpc32xx_clksrc_read, .mask = CLOCKSOURCE_MASK(32), @@ -171,9 +170,7 @@ static void __init lpc32xx_timer_init(void) __raw_writel(0, LCP32XX_TIMER_MCR(LPC32XX_TIMER1_BASE)); __raw_writel(LCP32XX_TIMER_CNTR_TCR_EN, LCP32XX_TIMER_TCR(LPC32XX_TIMER1_BASE)); - lpc32xx_clksrc.mult = clocksource_hz2mult(clkrate, - lpc32xx_clksrc.shift); - clocksource_register(&lpc32xx_clksrc); + clocksource_register_hz(&lpc32xx_clksrc, clkrate); } struct sys_timer lpc32xx_timer = { From 5975f496e07e0f1e396939308b815f99907440ea Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:18:04 +0000 Subject: [PATCH 21/54] ARM: mmp: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/mach-mmp/time.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 66528193f939..0c0ab0931735 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -146,7 +146,6 @@ static cycle_t clksrc_read(struct clocksource *cs) static struct clocksource cksrc = { .name = "clocksource", - .shift = 20, .rating = 200, .read = clksrc_read, .mask = CLOCKSOURCE_MASK(32), @@ -193,10 +192,8 @@ void __init timer_init(int irq) ckevt.min_delta_ns = clockevent_delta2ns(MIN_DELTA, &ckevt); ckevt.cpumask = cpumask_of(0); - cksrc.mult = clocksource_hz2mult(CLOCK_TICK_RATE, cksrc.shift); - setup_irq(irq, &timer_irq); - clocksource_register(&cksrc); + clocksource_register_hz(&cksrc, CLOCK_TICK_RATE); clockevents_register_device(&ckevt); } From ff9c9772480f06bc7164217b93ebb99fc088539f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:18:12 +0000 Subject: [PATCH 22/54] ARM: MSM: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Tested-By: Jeff Ohlstein Acked-by: David Brown Signed-off-by: Russell King --- arch/arm/mach-msm/timer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-msm/timer.c b/arch/arm/mach-msm/timer.c index 950100f19d07..595be7fea31a 100644 --- a/arch/arm/mach-msm/timer.c +++ b/arch/arm/mach-msm/timer.c @@ -137,7 +137,6 @@ static struct msm_clock msm_clocks[] = { .rating = 200, .read = msm_gpt_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 17, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }, .irq = { @@ -164,7 +163,6 @@ static struct msm_clock msm_clocks[] = { .rating = 300, .read = msm_dgt_read, .mask = CLOCKSOURCE_MASK((32 - MSM_DGT_SHIFT)), - .shift = 24 - MSM_DGT_SHIFT, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }, .irq = { @@ -205,8 +203,7 @@ static void __init msm_timer_init(void) ce->min_delta_ns = clockevent_delta2ns(4, ce); ce->cpumask = cpumask_of(0); - cs->mult = clocksource_hz2mult(clock->freq, cs->shift); - res = clocksource_register(cs); + res = clocksource_register_hz(cs, clock->freq); if (res) printk(KERN_ERR "msm_timer_init: clocksource_register " "failed for %s\n", cs->name); From 4f9272bfe8fca7e6d9e2f44eb78fd2c3ecdf9a0e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:18:21 +0000 Subject: [PATCH 23/54] ARM: netx: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/mach-netx/time.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-netx/time.c b/arch/arm/mach-netx/time.c index 82801dbf0579..f12f22d09b6c 100644 --- a/arch/arm/mach-netx/time.c +++ b/arch/arm/mach-netx/time.c @@ -114,7 +114,6 @@ static struct clocksource clocksource_netx = { .rating = 200, .read = netx_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -151,9 +150,7 @@ static void __init netx_timer_init(void) writel(NETX_GPIO_COUNTER_CTRL_RUN, NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE)); - clocksource_netx.mult = - clocksource_hz2mult(CLOCK_TICK_RATE, clocksource_netx.shift); - clocksource_register(&clocksource_netx); + clocksource_register_hz(&clocksource_netx, CLOCK_TICK_RATE); netx_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, netx_clockevent.shift); From b460ddbbe29a45cc13e3f13314ec9aed7e9412f2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:18:32 +0000 Subject: [PATCH 24/54] ARM: ns9xxx: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/mach-ns9xxx/time-ns9360.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/mach-ns9xxx/time-ns9360.c b/arch/arm/mach-ns9xxx/time-ns9360.c index 77281260358a..9ca32f55728b 100644 --- a/arch/arm/mach-ns9xxx/time-ns9360.c +++ b/arch/arm/mach-ns9xxx/time-ns9360.c @@ -35,7 +35,6 @@ static struct clocksource ns9360_clocksource = { .rating = 300, .read = ns9360_clocksource_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -148,10 +147,7 @@ static void __init ns9360_timer_init(void) __raw_writel(tc, SYS_TC(TIMER_CLOCKSOURCE)); - ns9360_clocksource.mult = clocksource_hz2mult(ns9360_cpuclock(), - ns9360_clocksource.shift); - - clocksource_register(&ns9360_clocksource); + clocksource_register_hz(&ns9360_clocksource, ns9360_cpuclock()); latch = SH_DIV(ns9360_cpuclock(), HZ, 0); From 8437c25e78c3af2b31bf6c8942494e34e267f446 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:18:44 +0000 Subject: [PATCH 25/54] ARM: omap: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Tested-by: Santosh Shilimkar Signed-off-by: Russell King --- arch/arm/mach-omap1/time.c | 6 +----- arch/arm/mach-omap2/timer-gp.c | 5 +---- arch/arm/plat-omap/counter_32k.c | 6 +----- 3 files changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/arm/mach-omap1/time.c b/arch/arm/mach-omap1/time.c index 1be6a214d88d..abb34ff2041b 100644 --- a/arch/arm/mach-omap1/time.c +++ b/arch/arm/mach-omap1/time.c @@ -208,7 +208,6 @@ static struct clocksource clocksource_mpu = { .rating = 300, .read = mpu_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -217,13 +216,10 @@ static void __init omap_init_clocksource(unsigned long rate) static char err[] __initdata = KERN_ERR "%s: can't register clocksource!\n"; - clocksource_mpu.mult - = clocksource_khz2mult(rate/1000, clocksource_mpu.shift); - setup_irq(INT_TIMER2, &omap_mpu_timer2_irq); omap_mpu_timer_start(1, ~0, 1); - if (clocksource_register(&clocksource_mpu)) + if (clocksource_register_hz(&clocksource_mpu, rate)) printk(err, clocksource_mpu.name); } diff --git a/arch/arm/mach-omap2/timer-gp.c b/arch/arm/mach-omap2/timer-gp.c index e13c29eecf2b..a7816dbdc6b1 100644 --- a/arch/arm/mach-omap2/timer-gp.c +++ b/arch/arm/mach-omap2/timer-gp.c @@ -195,7 +195,6 @@ static struct clocksource clocksource_gpt = { .rating = 300, .read = clocksource_read_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 24, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -220,9 +219,7 @@ static void __init omap2_gp_clocksource_init(void) omap_dm_timer_set_load_start(gpt, 1, 0); - clocksource_gpt.mult = - clocksource_khz2mult(tick_rate/1000, clocksource_gpt.shift); - if (clocksource_register(&clocksource_gpt)) + if (clocksource_register_hz(&clocksource_gpt, tick_rate)) printk(err2, clocksource_gpt.name); } #endif diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 155fe43a672b..8f149f51cb46 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -102,7 +102,6 @@ static struct clocksource clocksource_32k = { .rating = 250, .read = omap_32k_read_dummy, .mask = CLOCKSOURCE_MASK(32), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -167,12 +166,9 @@ static int __init omap_init_clocksource_32k(void) if (sync_32k_ick) clk_enable(sync_32k_ick); - clocksource_32k.mult = clocksource_hz2mult(32768, - clocksource_32k.shift); - offset_32k = clocksource_32k.read(&clocksource_32k); - if (clocksource_register(&clocksource_32k)) + if (clocksource_register_hz(&clocksource_32k, 32768)) printk(err, clocksource_32k.name); } return 0; From f62ae0ce4369effc432ea25df09c88d7aa08965e Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:19:11 +0000 Subject: [PATCH 26/54] ARM: PXA: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Tested-by: Eric Miao Signed-off-by: Russell King --- arch/arm/mach-pxa/time.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index 293e40aeaf29..caf92c0bfba5 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -127,7 +127,6 @@ static struct clocksource cksrc_pxa_oscr0 = { .rating = 200, .read = pxa_read_oscr, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -155,12 +154,9 @@ static void __init pxa_timer_init(void) clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_pxa_osmr0) + 1; ckevt_pxa_osmr0.cpumask = cpumask_of(0); - cksrc_pxa_oscr0.mult = - clocksource_hz2mult(clock_tick_rate, cksrc_pxa_oscr0.shift); - setup_irq(IRQ_OST0, &pxa_ost0_irq); - clocksource_register(&cksrc_pxa_oscr0); + clocksource_register_hz(&cksrc_pxa_oscr0, clock_tick_rate); clockevents_register_device(&ckevt_pxa_osmr0); } From 71c874529904818a2888a650425369e0842ff28d Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:19:22 +0000 Subject: [PATCH 27/54] ARM: s5pv310: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Kukjin Kim Signed-off-by: Russell King --- arch/arm/mach-s5pv310/time.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/mach-s5pv310/time.c b/arch/arm/mach-s5pv310/time.c index 01b012ad1bfd..b262d4615331 100644 --- a/arch/arm/mach-s5pv310/time.c +++ b/arch/arm/mach-s5pv310/time.c @@ -211,7 +211,6 @@ struct clocksource pwm_clocksource = { .rating = 250, .read = s5pv310_pwm4_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS , }; @@ -230,10 +229,7 @@ static void __init s5pv310_clocksource_init(void) s5pv310_pwm_init(4, ~0); s5pv310_pwm_start(4, 1); - pwm_clocksource.mult = - clocksource_khz2mult(clock_rate/1000, pwm_clocksource.shift); - - if (clocksource_register(&pwm_clocksource)) + if (clocksource_register_hz(&pwm_clocksource, clock_rate)) panic("%s: can't register clocksource\n", pwm_clocksource.name); } From 2c760b5b52a56e8b92c384dc9611e23fd4980d03 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:19:35 +0000 Subject: [PATCH 28/54] ARM: SA11x0: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/mach-sa1100/time.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index 74b6e0e570b6..96154f578cd5 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -81,7 +81,6 @@ static struct clocksource cksrc_sa1100_oscr = { .rating = 200, .read = sa1100_read_oscr, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -105,12 +104,9 @@ static void __init sa1100_timer_init(void) clockevent_delta2ns(MIN_OSCR_DELTA * 2, &ckevt_sa1100_osmr0) + 1; ckevt_sa1100_osmr0.cpumask = cpumask_of(0); - cksrc_sa1100_oscr.mult = - clocksource_hz2mult(CLOCK_TICK_RATE, cksrc_sa1100_oscr.shift); - setup_irq(IRQ_OST0, &sa1100_timer_irq); - clocksource_register(&cksrc_sa1100_oscr); + clocksource_register_hz(&cksrc_sa1100_oscr, CLOCK_TICK_RATE); clockevents_register_device(&ckevt_sa1100_osmr0); } From 6b46340a18e7552656097119d121b4b42049dde1 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:19:44 +0000 Subject: [PATCH 29/54] ARM: tcc8k: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/mach-tcc8k/time.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-tcc8k/time.c b/arch/arm/mach-tcc8k/time.c index 78d06008841d..e0a8d609afe1 100644 --- a/arch/arm/mach-tcc8k/time.c +++ b/arch/arm/mach-tcc8k/time.c @@ -35,7 +35,6 @@ static struct clocksource clocksource_tcc = { .rating = 200, .read = tcc_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 28, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -103,9 +102,7 @@ static int __init tcc_clockevent_init(struct clk *clock) { unsigned int c = clk_get_rate(clock); - clocksource_tcc.mult = clocksource_hz2mult(c, - clocksource_tcc.shift); - clocksource_register(&clocksource_tcc); + clocksource_register_hz(&clocksource_tcc, c); clockevent_tcc.mult = div_sc(c, NSEC_PER_SEC, clockevent_tcc.shift); From da797478c1ba05a858c7719a02ce0856a5bb69ce Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:19:55 +0000 Subject: [PATCH 30/54] ARM: U300: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/mach-u300/timer.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index 3fc4472719be..377ff7ff9182 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -412,9 +412,7 @@ static void __init u300_timer_init(void) writel(U300_TIMER_APP_EGPT2_TIMER_ENABLE, U300_TIMER_APP_VBASE + U300_TIMER_APP_EGPT2); - clocksource_calc_mult_shift(&clocksource_u300_1mhz, - rate, APPTIMER_MIN_RANGE); - if (clocksource_register(&clocksource_u300_1mhz)) + if (clocksource_register_hz(&clocksource_u300_1mhz, rate)) printk(KERN_ERR "timer: failed to initialize clock " "source %s\n", clocksource_u300_1mhz.name); From 894cf56b1a64b65360086c2c890c2ae7f257793f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:20:06 +0000 Subject: [PATCH 31/54] ARM: nuc: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Wan zongshun Signed-off-by: Russell King --- arch/arm/mach-w90x900/time.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/mach-w90x900/time.c b/arch/arm/mach-w90x900/time.c index b80f769bc135..4b089cb930dc 100644 --- a/arch/arm/mach-w90x900/time.c +++ b/arch/arm/mach-w90x900/time.c @@ -153,7 +153,6 @@ static struct clocksource clocksource_nuc900 = { .rating = 200, .read = nuc900_get_cycles, .mask = CLOCKSOURCE_MASK(TDR_SHIFT), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -176,9 +175,7 @@ static void __init nuc900_clocksource_init(void) val |= (COUNTEN | PERIOD | PRESCALE); __raw_writel(val, REG_TCSR1); - clocksource_nuc900.mult = - clocksource_khz2mult((rate / 1000), clocksource_nuc900.shift); - clocksource_register(&clocksource_nuc900); + clocksource_register_hz(&clocksource_nuc900, rate); } static void __init nuc900_timer_init(void) From d28b116b9239b603f790a5ed6f0c5fe329561b49 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:20:23 +0000 Subject: [PATCH 32/54] ARM: iop: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/plat-iop/time.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index d615e6fbe502..4332dc458ff9 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -170,7 +170,5 @@ void __init iop_init_time(unsigned long tick_rate) write_trr1(0xffffffff); write_tcr1(0xffffffff); write_tmr1(timer_ctl); - clocksource_calc_mult_shift(&iop_clocksource, tick_rate, - IOP_MIN_RANGE); - clocksource_register(&iop_clocksource); + clocksource_register_hz(&iop_clocksource, tick_rate); } From 509e1f0fd7627a5b8d2da9e8c2b4ab1c2743c5f6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:20:35 +0000 Subject: [PATCH 33/54] ARM: mxc: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Sascha Hauer Signed-off-by: Russell King --- arch/arm/plat-mxc/epit.c | 5 +---- arch/arm/plat-mxc/time.c | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm/plat-mxc/epit.c b/arch/arm/plat-mxc/epit.c index ee9582f4972e..d69d343ff61f 100644 --- a/arch/arm/plat-mxc/epit.c +++ b/arch/arm/plat-mxc/epit.c @@ -93,7 +93,6 @@ static struct clocksource clocksource_epit = { .rating = 200, .read = epit_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -101,9 +100,7 @@ static int __init epit_clocksource_init(struct clk *timer_clk) { unsigned int c = clk_get_rate(timer_clk); - clocksource_epit.mult = clocksource_hz2mult(c, - clocksource_epit.shift); - clocksource_register(&clocksource_epit); + clocksource_register_hz(&clocksource_epit, c); return 0; } diff --git a/arch/arm/plat-mxc/time.c b/arch/arm/plat-mxc/time.c index f9a1b059a76c..9f0c2610595e 100644 --- a/arch/arm/plat-mxc/time.c +++ b/arch/arm/plat-mxc/time.c @@ -120,7 +120,6 @@ static struct clocksource clocksource_mxc = { .rating = 200, .read = mx1_2_get_cycles, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -131,9 +130,7 @@ static int __init mxc_clocksource_init(struct clk *timer_clk) if (timer_is_v2()) clocksource_mxc.read = v2_get_cycles; - clocksource_mxc.mult = clocksource_hz2mult(c, - clocksource_mxc.shift); - clocksource_register(&clocksource_mxc); + clocksource_register_hz(&clocksource_mxc, c); return 0; } From 8492fd28987732e42f33a3711299927e8905d448 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:20:49 +0000 Subject: [PATCH 34/54] ARM: nomadik: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/plat-nomadik/timer.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index 63cdc6025bd7..b0bd6dfea52b 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c @@ -222,7 +222,6 @@ void __init nmdk_timer_init(void) } else { cr |= MTU_CRn_PRESCALE_1; } - clocksource_calc_mult_shift(&nmdk_clksrc, rate, MTU_MIN_RANGE); /* Timer 0 is the free running clocksource */ writel(cr, mtu_base + MTU_CR(0)); @@ -233,7 +232,7 @@ void __init nmdk_timer_init(void) /* Now the clock source is ready */ nmdk_clksrc.read = nmdk_read_timer; - if (clocksource_register(&nmdk_clksrc)) + if (clocksource_register_hz(&nmdk_clksrc, rate)) pr_err("timer: failed to initialize clock source %s\n", nmdk_clksrc.name); From e0387320c32d4b9852e6436233274003e82318a6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:21:21 +0000 Subject: [PATCH 35/54] ARM: spear: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Viresh Kumar Signed-off-by: Russell King --- arch/arm/plat-spear/time.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/plat-spear/time.c b/arch/arm/plat-spear/time.c index ab211652e4ca..839c88df9994 100644 --- a/arch/arm/plat-spear/time.c +++ b/arch/arm/plat-spear/time.c @@ -81,8 +81,6 @@ static struct clocksource clksrc = { .rating = 200, /* its a pretty decent clock */ .read = clocksource_read_cycles, .mask = 0xFFFF, /* 16 bits */ - .mult = 0, /* to be computed */ - .shift = 0, /* to be computed */ .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -105,10 +103,8 @@ static void spear_clocksource_init(void) val |= CTRL_ENABLE ; writew(val, gpt_base + CR(CLKSRC)); - clocksource_calc_mult_shift(&clksrc, tick_rate, SPEAR_MIN_RANGE); - /* register the clocksource */ - clocksource_register(&clksrc); + clocksource_register_hz(&clksrc, tick_rate); } static struct clock_event_device clkevt = { From 1d0ac3cdf3d31ba84499c3a914aa2b54eecbf2af Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:21:33 +0000 Subject: [PATCH 36/54] ARM: orion: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/plat-orion/time.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 715a30177f28..11e2583e85e4 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -102,7 +102,6 @@ static cycle_t orion_clksrc_read(struct clocksource *cs) static struct clocksource orion_clksrc = { .name = "orion_clocksource", - .shift = 20, .rating = 300, .read = orion_clksrc_read, .mask = CLOCKSOURCE_MASK(32), @@ -245,8 +244,7 @@ void __init orion_time_init(unsigned int irq, unsigned int tclk) writel(u & ~BRIDGE_INT_TIMER0, BRIDGE_MASK); u = readl(TIMER_CTRL); writel(u | TIMER0_EN | TIMER0_RELOAD_EN, TIMER_CTRL); - orion_clksrc.mult = clocksource_hz2mult(tclk, orion_clksrc.shift); - clocksource_register(&orion_clksrc); + clocksource_register_hz(&orion_clksrc, tclk); /* * Setup clockevent timer (interrupt-driven.) From 2218543fe0c40ce36fa165c5003072cc8b236c34 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:21:41 +0000 Subject: [PATCH 37/54] ARM: stmp: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Signed-off-by: Russell King --- arch/arm/plat-stmp3xxx/timer.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/arch/arm/plat-stmp3xxx/timer.c b/arch/arm/plat-stmp3xxx/timer.c index 063c7bc0e740..c395630a6edc 100644 --- a/arch/arm/plat-stmp3xxx/timer.c +++ b/arch/arm/plat-stmp3xxx/timer.c @@ -89,7 +89,6 @@ static struct clocksource cksrc_stmp3xxx = { .rating = 250, .read = stmp3xxx_clock_read, .mask = CLOCKSOURCE_MASK(16), - .shift = 10, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -106,8 +105,6 @@ static struct irqaction stmp3xxx_timer_irq = { */ static void __init stmp3xxx_init_timer(void) { - cksrc_stmp3xxx.mult = clocksource_hz2mult(CLOCK_TICK_RATE, - cksrc_stmp3xxx.shift); ckevt_timrot.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt_timrot.shift); ckevt_timrot.min_delta_ns = clockevent_delta2ns(2, &ckevt_timrot); @@ -140,7 +137,7 @@ static void __init stmp3xxx_init_timer(void) setup_irq(IRQ_TIMER0, &stmp3xxx_timer_irq); - clocksource_register(&cksrc_stmp3xxx); + clocksource_register_hz(&cksrc_stmp3xxx, CLOCK_TICK_RATE); clockevents_register_device(&ckevt_timrot); } From 11e718a6535c0c7a375b24c3ad60a98b988090c8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 13 Dec 2010 13:21:52 +0000 Subject: [PATCH 38/54] ARM: realview/versatile: update clock source registration In d7e81c2 (clocksource: Add clocksource_register_hz/khz interface) new interfaces were added which simplify (and optimize) the selection of the divisor shift/mult constants. Switch over to using this new interface. Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/plat-versatile/timer-sp.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/arch/arm/plat-versatile/timer-sp.c b/arch/arm/plat-versatile/timer-sp.c index fb0d1c299718..f09941b84381 100644 --- a/arch/arm/plat-versatile/timer-sp.c +++ b/arch/arm/plat-versatile/timer-sp.c @@ -46,7 +46,6 @@ static struct clocksource clocksource_sp804 = { .rating = 200, .read = sp804_read, .mask = CLOCKSOURCE_MASK(32), - .shift = 20, .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; @@ -63,8 +62,7 @@ void __init sp804_clocksource_init(void __iomem *base) writel(TIMER_CTRL_32BIT | TIMER_CTRL_ENABLE | TIMER_CTRL_PERIODIC, clksrc_base + TIMER_CTRL); - cs->mult = clocksource_khz2mult(TIMER_FREQ_KHZ, cs->shift); - clocksource_register(cs); + clocksource_register_khz(cs, TIMER_FREQ_KHZ); } From 684e94cbcb5add60356d124166e40feb2174f0f1 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Wed, 17 Nov 2010 16:20:15 -0800 Subject: [PATCH 39/54] ARM: tegra: timer: Separate clocksource and sched_clock tegra_clocksource_read should not use cnt32_to_63, wrapping is already handled in the clocksource code. Move the cnt32_to_63 into the sched_clock function, and replace the use of clocksource mult and shift with a multiplication by 1000 to convert us to ns. Acked-by: John Stultz Acked-by: Linus Walleij Tested-by: Olof Johansson Signed-off-by: Colin Cross Signed-off-by: Russell King --- arch/arm/mach-tegra/timer.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c index 9057d6fd1d31..b49f2f5025f2 100644 --- a/arch/arm/mach-tegra/timer.c +++ b/arch/arm/mach-tegra/timer.c @@ -91,7 +91,7 @@ static void tegra_timer_set_mode(enum clock_event_mode mode, static cycle_t tegra_clocksource_read(struct clocksource *cs) { - return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US)); + return timer_readl(TIMERUS_CNTR_1US); } static struct clock_event_device tegra_clockevent = { @@ -106,14 +106,13 @@ static struct clocksource tegra_clocksource = { .name = "timer_us", .rating = 300, .read = tegra_clocksource_read, - .mask = 0x7FFFFFFFFFFFFFFFULL, + .mask = CLOCKSOURCE_MASK(32), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; unsigned long long sched_clock(void) { - return clocksource_cyc2ns(tegra_clocksource.read(&tegra_clocksource), - tegra_clocksource.mult, tegra_clocksource.shift); + return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US)) * 1000; } static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id) From 5e06b6492e53ab2a4e467763a9ee9f70b032c301 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 19:19:25 +0000 Subject: [PATCH 40/54] ARM: ensure all sched_clock() implementations are notrace marked ftrace requires sched_clock() to be notrace. Ensure that all implementations are so marked. Also make sure that they include linux/sched.h Also ensure OMAP clocksource read functions are marked notrace as they're used for sched_clock() too. Tested-by: Santosh Shilimkar Tested-by: Will Deacon Tested-by: Mikael Pettersson Tested-by: Eric Miao Tested-by: Olof Johansson Signed-off-by: Russell King --- arch/arm/mach-ixp4xx/common.c | 2 +- arch/arm/mach-mmp/time.c | 2 +- arch/arm/mach-pxa/time.c | 2 +- arch/arm/mach-sa1100/generic.c | 2 +- arch/arm/mach-tegra/timer.c | 3 ++- arch/arm/mach-u300/timer.c | 1 + arch/arm/plat-iop/time.c | 3 ++- arch/arm/plat-nomadik/timer.c | 1 + arch/arm/plat-omap/counter_32k.c | 15 ++++++++------- arch/arm/plat-orion/time.c | 2 +- arch/arm/plat-versatile/sched-clock.c | 2 +- 11 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 82fc003aae8b..e0b91d8ef644 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -424,7 +424,7 @@ static void __init ixp4xx_clocksource_init(void) /* * sched_clock() */ -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { cycle_t cyc = ixp4xx_get_cycles(NULL); struct clocksource *cs = &clocksource_ixp4xx; diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index 0c0ab0931735..a2ea33d04e63 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -75,7 +75,7 @@ static inline uint32_t timer_read(void) return __raw_readl(TIMERS_VIRT_BASE + TMR_CVWR(0)); } -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { unsigned long long v = cnt32_to_63(timer_read()); return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR; diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index caf92c0bfba5..b8d9dff00ad1 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -51,7 +51,7 @@ static void __init set_oscr2ns_scale(unsigned long oscr_rate) oscr2ns_scale++; } -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { unsigned long long v = cnt32_to_63(OSCR); return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR; diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 3c1fcd696714..33b4969e9d51 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -119,7 +119,7 @@ unsigned int sa11x0_getspeed(unsigned int cpu) * * ( * 1E9 / 3686400 => * 78125 / 288) */ -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { unsigned long long v = cnt32_to_63(OSCR); diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c index b49f2f5025f2..c52bd84652e5 100644 --- a/arch/arm/mach-tegra/timer.c +++ b/arch/arm/mach-tegra/timer.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include @@ -110,7 +111,7 @@ static struct clocksource tegra_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US)) * 1000; } diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index 377ff7ff9182..6c68fe797903 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -9,6 +9,7 @@ * Author: Linus Walleij */ #include +#include #include #include #include diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index 4332dc458ff9..d03a93295e60 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -52,7 +53,7 @@ static struct clocksource iop_clocksource = { /* * IOP sched_clock() implementation via its clocksource. */ -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { cycle_t cyc = iop_clocksource_read(NULL); struct clocksource *cs = &iop_clocksource; diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index b0bd6dfea52b..c3b8a2246b99 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index 8f149f51cb46..aed301bfa2f9 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,7 @@ static u32 offset_32k __read_mostly; #ifdef CONFIG_ARCH_OMAP16XX -static cycle_t omap16xx_32k_read(struct clocksource *cs) +static cycle_t notrace omap16xx_32k_read(struct clocksource *cs) { return omap_readl(OMAP16XX_TIMER_32K_SYNCHRONIZED) - offset_32k; } @@ -53,7 +54,7 @@ static cycle_t omap16xx_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP2420 -static cycle_t omap2420_32k_read(struct clocksource *cs) +static cycle_t notrace omap2420_32k_read(struct clocksource *cs) { return omap_readl(OMAP2420_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -62,7 +63,7 @@ static cycle_t omap2420_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP2430 -static cycle_t omap2430_32k_read(struct clocksource *cs) +static cycle_t notrace omap2430_32k_read(struct clocksource *cs) { return omap_readl(OMAP2430_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -71,7 +72,7 @@ static cycle_t omap2430_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP3 -static cycle_t omap34xx_32k_read(struct clocksource *cs) +static cycle_t notrace omap34xx_32k_read(struct clocksource *cs) { return omap_readl(OMAP3430_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -80,7 +81,7 @@ static cycle_t omap34xx_32k_read(struct clocksource *cs) #endif #ifdef CONFIG_ARCH_OMAP4 -static cycle_t omap44xx_32k_read(struct clocksource *cs) +static cycle_t notrace omap44xx_32k_read(struct clocksource *cs) { return omap_readl(OMAP4430_32KSYNCT_BASE + 0x10) - offset_32k; } @@ -92,7 +93,7 @@ static cycle_t omap44xx_32k_read(struct clocksource *cs) * Kernel assumes that sched_clock can be called early but may not have * things ready yet. */ -static cycle_t omap_32k_read_dummy(struct clocksource *cs) +static cycle_t notrace omap_32k_read_dummy(struct clocksource *cs) { return 0; } @@ -109,7 +110,7 @@ static struct clocksource clocksource_32k = { * Returns current time from boot in nsecs. It's OK for this to wrap * around for now, as it's just a relative time stamp. */ -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k), clocksource_32k.mult, clocksource_32k.shift); diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 11e2583e85e4..123f96f4194f 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -55,7 +55,7 @@ static u32 ticks_per_jiffy; static unsigned long tclk2ns_scale; -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL)); return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR; diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index 9696ddc238c9..42efd14ed4b3 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -42,7 +42,7 @@ * long as there is always less than 89 seconds between successive * calls to this function. */ -unsigned long long sched_clock(void) +unsigned long long notrace sched_clock(void) { unsigned long long v = cnt32_to_63(readl(REFCOUNTER)); From b5776c4a6d0afc13697e8452b9ebe1cc4d961b74 Mon Sep 17 00:00:00 2001 From: john stultz Date: Thu, 16 Dec 2010 19:03:27 +0000 Subject: [PATCH 41/54] Fix rounding in clocks_calc_mult_shift() Russell King reports: | On the ARM dev boards, we have a 32-bit counter running at 24MHz. Calling | clocks_calc_mult_shift(&mult, &shift, 24MHz, NSEC_PER_SEC, 60) gives | us a multiplier of 2796202666 and a shift of 26. | | Over a large counter delta, this produces an error - lets take a count | from 362976315 to 4280663372: | | (4280663372-362976315) * 2796202666 / 2^26 - (4280663372-362976315) * (1000/24) | => -38.91872422891230269990 | | Can we do better? | | (4280663372-362976315) * 2796202667 / 2^26 - (4280663372-362976315) * (1000/24) | 19.45936211449532822051 | | which is about twice as good as the 2796202666 multiplier. | | Looking at the equivalent divisions obtained, 2796202666 / 2^26 gives | 41.66666665673255920410ns per tick, whereas 2796202667 / 2^26 gives | 41.66666667163372039794ns. The actual value wanted is 1000/24 = | 41.66666666666666666666ns. Fix this by ensuring we round to nearest when calculating the multiplier. Signed-off-by: John Stultz Tested-by: Santosh Shilimkar Tested-by: Will Deacon Tested-by: Mikael Pettersson Tested-by: Eric Miao Tested-by: Olof Johansson Tested-by: Jamie Iles Signed-off-by: Russell King --- kernel/time/clocksource.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index c18d7efa1b4b..df140cd3ea47 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -152,6 +152,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec) */ for (sft = 32; sft > 0; sft--) { tmp = (u64) to << sft; + tmp += from / 2; do_div(tmp, from); if ((tmp >> sftacc) == 0) break; From 112f38a4a31668eb6a7d91d128296a26afdf7c4b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 19:23:07 +0000 Subject: [PATCH 42/54] ARM: sched_clock: provide common infrastructure for sched_clock() Provide common sched_clock() infrastructure for platforms to use to create a 64-bit ns based sched_clock() implementation from a counter running at a non-variable clock rate. This implementation is based upon maintaining an epoch for the counter and an epoch for the nanosecond time. When we desire a sched_clock() time, we calculate the number of counter ticks since the last epoch update, convert this to nanoseconds and add to the epoch nanoseconds. We regularly refresh these epochs within the counter wrap interval. We perform a similar calculation as above, and store the new epochs. We read and write the epochs in such a way that sched_clock() can easily (and locklessly) detect when an update is in progress, and repeat the loading of these constants when they're known not to be stable. The one caveat is that sched_clock() is not called in the middle of an update. We achieve that by disabling IRQs. Finally, if the clock rate is known at compile time, the counter to ns conversion factors can be specified, allowing sched_clock() to be tightly optimized. We ensure that these factors are correct by providing an initialization function which performs a run-time check. Acked-by: Peter Zijlstra Tested-by: Santosh Shilimkar Tested-by: Will Deacon Tested-by: Mikael Pettersson Tested-by: Eric Miao Tested-by: Olof Johansson Tested-by: Jamie Iles Reviewed-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/Kconfig | 3 + arch/arm/include/asm/sched_clock.h | 118 +++++++++++++++++++++++++++++ arch/arm/kernel/Makefile | 1 + arch/arm/kernel/sched_clock.c | 69 +++++++++++++++++ 4 files changed, 191 insertions(+) create mode 100644 arch/arm/include/asm/sched_clock.h create mode 100644 arch/arm/kernel/sched_clock.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 49778bb43782..ed7a0a729d9c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -38,6 +38,9 @@ config HAVE_PWM config SYS_SUPPORTS_APM_EMULATION bool +config HAVE_SCHED_CLOCK + bool + config GENERIC_GPIO bool diff --git a/arch/arm/include/asm/sched_clock.h b/arch/arm/include/asm/sched_clock.h new file mode 100644 index 000000000000..a84628be1a7b --- /dev/null +++ b/arch/arm/include/asm/sched_clock.h @@ -0,0 +1,118 @@ +/* + * sched_clock.h: support for extending counters to full 64-bit ns counter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef ASM_SCHED_CLOCK +#define ASM_SCHED_CLOCK + +#include +#include + +struct clock_data { + u64 epoch_ns; + u32 epoch_cyc; + u32 epoch_cyc_copy; + u32 mult; + u32 shift; +}; + +#define DEFINE_CLOCK_DATA(name) struct clock_data name + +static inline u64 cyc_to_ns(u64 cyc, u32 mult, u32 shift) +{ + return (cyc * mult) >> shift; +} + +/* + * Atomically update the sched_clock epoch. Your update callback will + * be called from a timer before the counter wraps - read the current + * counter value, and call this function to safely move the epochs + * forward. Only use this from the update callback. + */ +static inline void update_sched_clock(struct clock_data *cd, u32 cyc, u32 mask) +{ + unsigned long flags; + u64 ns = cd->epoch_ns + + cyc_to_ns((cyc - cd->epoch_cyc) & mask, cd->mult, cd->shift); + + /* + * Write epoch_cyc and epoch_ns in a way that the update is + * detectable in cyc_to_fixed_sched_clock(). + */ + raw_local_irq_save(flags); + cd->epoch_cyc = cyc; + smp_wmb(); + cd->epoch_ns = ns; + smp_wmb(); + cd->epoch_cyc_copy = cyc; + raw_local_irq_restore(flags); +} + +/* + * If your clock rate is known at compile time, using this will allow + * you to optimize the mult/shift loads away. This is paired with + * init_fixed_sched_clock() to ensure that your mult/shift are correct. + */ +static inline unsigned long long cyc_to_fixed_sched_clock(struct clock_data *cd, + u32 cyc, u32 mask, u32 mult, u32 shift) +{ + u64 epoch_ns; + u32 epoch_cyc; + + /* + * Load the epoch_cyc and epoch_ns atomically. We do this by + * ensuring that we always write epoch_cyc, epoch_ns and + * epoch_cyc_copy in strict order, and read them in strict order. + * If epoch_cyc and epoch_cyc_copy are not equal, then we're in + * the middle of an update, and we should repeat the load. + */ + do { + epoch_cyc = cd->epoch_cyc; + smp_rmb(); + epoch_ns = cd->epoch_ns; + smp_rmb(); + } while (epoch_cyc != cd->epoch_cyc_copy); + + return epoch_ns + cyc_to_ns((cyc - epoch_cyc) & mask, mult, shift); +} + +/* + * Otherwise, you need to use this, which will obtain the mult/shift + * from the clock_data structure. Use init_sched_clock() with this. + */ +static inline unsigned long long cyc_to_sched_clock(struct clock_data *cd, + u32 cyc, u32 mask) +{ + return cyc_to_fixed_sched_clock(cd, cyc, mask, cd->mult, cd->shift); +} + +/* + * Initialize the clock data - calculate the appropriate multiplier + * and shift. Also setup a timer to ensure that the epoch is refreshed + * at the appropriate time interval, which will call your update + * handler. + */ +void init_sched_clock(struct clock_data *, void (*)(void), + unsigned int, unsigned long); + +/* + * Use this initialization function rather than init_sched_clock() if + * you're using cyc_to_fixed_sched_clock, which will warn if your + * constants are incorrect. + */ +static inline void init_fixed_sched_clock(struct clock_data *cd, + void (*update)(void), unsigned int bits, unsigned long rate, + u32 mult, u32 shift) +{ + init_sched_clock(cd, update, bits, rate); + if (cd->mult != mult || cd->shift != shift) { + pr_crit("sched_clock: wrong multiply/shift: %u>>%u vs calculated %u>>%u\n" + "sched_clock: fix multiply/shift to avoid scheduler hiccups\n", + mult, shift, cd->mult, cd->shift); + } +} + +#endif diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 679851a9f589..fd3ec49bfba6 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o +obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o diff --git a/arch/arm/kernel/sched_clock.c b/arch/arm/kernel/sched_clock.c new file mode 100644 index 000000000000..2cdcc9287c74 --- /dev/null +++ b/arch/arm/kernel/sched_clock.c @@ -0,0 +1,69 @@ +/* + * sched_clock.c: support for extending counters to full 64-bit ns counter + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include + +#include + +static void sched_clock_poll(unsigned long wrap_ticks); +static DEFINE_TIMER(sched_clock_timer, sched_clock_poll, 0, 0); +static void (*sched_clock_update_fn)(void); + +static void sched_clock_poll(unsigned long wrap_ticks) +{ + mod_timer(&sched_clock_timer, round_jiffies(jiffies + wrap_ticks)); + sched_clock_update_fn(); +} + +void __init init_sched_clock(struct clock_data *cd, void (*update)(void), + unsigned int clock_bits, unsigned long rate) +{ + unsigned long r, w; + u64 res, wrap; + char r_unit; + + sched_clock_update_fn = update; + + /* calculate the mult/shift to convert counter ticks to ns. */ + clocks_calc_mult_shift(&cd->mult, &cd->shift, rate, NSEC_PER_SEC, 60); + + r = rate; + if (r >= 4000000) { + r /= 1000000; + r_unit = 'M'; + } else { + r /= 1000; + r_unit = 'k'; + } + + /* calculate how many ns until we wrap */ + wrap = cyc_to_ns((1ULL << clock_bits) - 1, cd->mult, cd->shift); + do_div(wrap, NSEC_PER_MSEC); + w = wrap; + + /* calculate the ns resolution of this counter */ + res = cyc_to_ns(1ULL, cd->mult, cd->shift); + pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lums\n", + clock_bits, r, r_unit, res, w); + + /* + * Start the timer to keep sched_clock() properly updated and + * sets the initial epoch. + */ + sched_clock_timer.data = msecs_to_jiffies(w - (w / 10)); + sched_clock_poll(sched_clock_timer.data); + + /* + * Ensure that sched_clock() starts off at 0ns + */ + cd->epoch_ns = 0; +} From 5b0d495c067d843c52fc1f9edbca29d5e9368b10 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:23:13 +0000 Subject: [PATCH 43/54] ARM: ixp4xx: convert sched_clock() to use new infrastructure Convert ixp4xx to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Tested-by: Mikael Pettersson Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/mach-ixp4xx/common.c | 31 ++++++++++++++++++++----------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ed7a0a729d9c..6f58bce687f3 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -437,6 +437,7 @@ config ARCH_IXP4XX select CPU_XSCALE select GENERIC_GPIO select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select DMABOUNCE if PCI help Support for Intel's IXP4XX (XScale) family of processors. diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index e0b91d8ef644..4dbfcbb9163c 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -398,6 +399,23 @@ void __init ixp4xx_sys_init(void) ixp4xx_exp_bus_size >> 20); } +/* + * sched_clock() + */ +static DEFINE_CLOCK_DATA(cd); + +unsigned long long notrace sched_clock(void) +{ + u32 cyc = *IXP4XX_OSTS; + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} + +static void notrace ixp4xx_update_sched_clock(void) +{ + u32 cyc = *IXP4XX_OSTS; + update_sched_clock(&cd, cyc, (u32)~0); +} + /* * clocksource */ @@ -418,20 +436,11 @@ unsigned long ixp4xx_timer_freq = FREQ; EXPORT_SYMBOL(ixp4xx_timer_freq); static void __init ixp4xx_clocksource_init(void) { + init_sched_clock(&cd, ixp4xx_update_sched_clock, 32, ixp4xx_timer_freq); + clocksource_register_hz(&clocksource_ixp4xx, ixp4xx_timer_freq); } -/* - * sched_clock() - */ -unsigned long long notrace sched_clock(void) -{ - cycle_t cyc = ixp4xx_get_cycles(NULL); - struct clocksource *cs = &clocksource_ixp4xx; - - return clocksource_cyc2ns(cyc, cs->mult, cs->shift); -} - /* * clockevents */ From 28bb7bc61a8cd48716d38d9c153fdc524f09870a Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:46:48 +0000 Subject: [PATCH 44/54] ARM: mmp: convert sched_clock() to use new infrastructure Convert mmp to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/mach-mmp/time.c | 32 +++++++++++--------------------- 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 6f58bce687f3..0208e1e9ed79 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -513,6 +513,7 @@ config ARCH_MMP select ARCH_REQUIRE_GPIOLIB select COMMON_CLKDEV select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select TICK_ONESHOT select PLAT_PXA select SPARSE_IRQ diff --git a/arch/arm/mach-mmp/time.c b/arch/arm/mach-mmp/time.c index a2ea33d04e63..aeb9ae23e6ce 100644 --- a/arch/arm/mach-mmp/time.c +++ b/arch/arm/mach-mmp/time.c @@ -26,8 +26,8 @@ #include #include #include -#include +#include #include #include #include @@ -42,23 +42,7 @@ #define MAX_DELTA (0xfffffffe) #define MIN_DELTA (16) -#define TCR2NS_SCALE_FACTOR 10 - -static unsigned long tcr2ns_scale; - -static void __init set_tcr2ns_scale(unsigned long tcr_rate) -{ - unsigned long long v = 1000000000ULL << TCR2NS_SCALE_FACTOR; - do_div(v, tcr_rate); - tcr2ns_scale = v; - /* - * We want an even value to automatically clear the top bit - * returned by cnt32_to_63() without an additional run time - * instruction. So if the LSB is 1 then round it up. - */ - if (tcr2ns_scale & 1) - tcr2ns_scale++; -} +static DEFINE_CLOCK_DATA(cd); /* * FIXME: the timer needs some delay to stablize the counter capture @@ -77,8 +61,14 @@ static inline uint32_t timer_read(void) unsigned long long notrace sched_clock(void) { - unsigned long long v = cnt32_to_63(timer_read()); - return (v * tcr2ns_scale) >> TCR2NS_SCALE_FACTOR; + u32 cyc = timer_read(); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} + +static void notrace mmp_update_sched_clock(void) +{ + u32 cyc = timer_read(); + update_sched_clock(&cd, cyc, (u32)~0); } static irqreturn_t timer_interrupt(int irq, void *dev_id) @@ -185,7 +175,7 @@ void __init timer_init(int irq) { timer_config(); - set_tcr2ns_scale(CLOCK_TICK_RATE); + init_sched_clock(&cd, mmp_update_sched_clock, 32, CLOCK_TICK_RATE); ckevt.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, ckevt.shift); ckevt.max_delta_ns = clockevent_delta2ns(MAX_DELTA, &ckevt); From 7ce830188199c23aaeaf0c5ccc28b73c32b6df02 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:48:15 +0000 Subject: [PATCH 45/54] ARM: pxa: convert sched_clock() to use new infrastructure Convert pxa to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Tested-by: Eric Miao Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/mach-pxa/time.c | 33 +++++++++++---------------------- 2 files changed, 12 insertions(+), 22 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0208e1e9ed79..23035d65252c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -593,6 +593,7 @@ config ARCH_PXA select COMMON_CLKDEV select ARCH_REQUIRE_GPIOLIB select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select TICK_ONESHOT select PLAT_PXA select SPARSE_IRQ diff --git a/arch/arm/mach-pxa/time.c b/arch/arm/mach-pxa/time.c index b8d9dff00ad1..e7f64d9b4f2d 100644 --- a/arch/arm/mach-pxa/time.c +++ b/arch/arm/mach-pxa/time.c @@ -17,11 +17,11 @@ #include #include #include -#include #include #include #include +#include #include /* @@ -32,29 +32,18 @@ * long as there is always less than 582 seconds between successive * calls to sched_clock() which should always be the case in practice. */ - -#define OSCR2NS_SCALE_FACTOR 10 - -static unsigned long oscr2ns_scale; - -static void __init set_oscr2ns_scale(unsigned long oscr_rate) -{ - unsigned long long v = 1000000000ULL << OSCR2NS_SCALE_FACTOR; - do_div(v, oscr_rate); - oscr2ns_scale = v; - /* - * We want an even value to automatically clear the top bit - * returned by cnt32_to_63() without an additional run time - * instruction. So if the LSB is 1 then round it up. - */ - if (oscr2ns_scale & 1) - oscr2ns_scale++; -} +static DEFINE_CLOCK_DATA(cd); unsigned long long notrace sched_clock(void) { - unsigned long long v = cnt32_to_63(OSCR); - return (v * oscr2ns_scale) >> OSCR2NS_SCALE_FACTOR; + u32 cyc = OSCR; + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} + +static void notrace pxa_update_sched_clock(void) +{ + u32 cyc = OSCR; + update_sched_clock(&cd, cyc, (u32)~0); } @@ -144,7 +133,7 @@ static void __init pxa_timer_init(void) OIER = 0; OSSR = OSSR_M0 | OSSR_M1 | OSSR_M2 | OSSR_M3; - set_oscr2ns_scale(clock_tick_rate); + init_sched_clock(&cd, pxa_update_sched_clock, 32, clock_tick_rate); ckevt_pxa_osmr0.mult = div_sc(clock_tick_rate, NSEC_PER_SEC, ckevt_pxa_osmr0.shift); From 5094b92f1c7d0f21c5d4411ba7415bac0684210f Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:49:06 +0000 Subject: [PATCH 46/54] ARM: sa1100: convert sched_clock() to use new infrastructure Convert sa1100 to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/mach-sa1100/generic.c | 23 ----------------------- arch/arm/mach-sa1100/time.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 23035d65252c..0e1a9667a869 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -642,6 +642,7 @@ config ARCH_SA1100 select CPU_FREQ select GENERIC_CLOCKEVENTS select HAVE_CLK + select HAVE_SCHED_CLOCK select TICK_ONESHOT select ARCH_REQUIRE_GPIOLIB help diff --git a/arch/arm/mach-sa1100/generic.c b/arch/arm/mach-sa1100/generic.c index 33b4969e9d51..fbc224d88e6f 100644 --- a/arch/arm/mach-sa1100/generic.c +++ b/arch/arm/mach-sa1100/generic.c @@ -16,9 +16,7 @@ #include #include #include -#include /* just for sched_clock() - funny that */ #include -#include #include #include @@ -109,27 +107,6 @@ unsigned int sa11x0_getspeed(unsigned int cpu) return cclk_frequency_100khz[PPCR & 0xf] * 100; } -/* - * This is the SA11x0 sched_clock implementation. This has - * a resolution of 271ns, and a maximum value of 32025597s (370 days). - * - * The return value is guaranteed to be monotonic in that range as - * long as there is always less than 582 seconds between successive - * calls to this function. - * - * ( * 1E9 / 3686400 => * 78125 / 288) - */ -unsigned long long notrace sched_clock(void) -{ - unsigned long long v = cnt32_to_63(OSCR); - - /* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */ - v *= 78125<<1; - do_div(v, 288<<1); - - return v; -} - /* * Default power-off for SA1100 */ diff --git a/arch/arm/mach-sa1100/time.c b/arch/arm/mach-sa1100/time.c index 96154f578cd5..ae4f3d80416f 100644 --- a/arch/arm/mach-sa1100/time.c +++ b/arch/arm/mach-sa1100/time.c @@ -12,12 +12,39 @@ #include #include #include +#include /* just for sched_clock() - funny that */ #include #include #include +#include #include +/* + * This is the SA11x0 sched_clock implementation. + */ +static DEFINE_CLOCK_DATA(cd); + +/* + * Constants generated by clocks_calc_mult_shift(m, s, 3.6864MHz, + * NSEC_PER_SEC, 60). + * This gives a resolution of about 271ns and a wrap period of about 19min. + */ +#define SC_MULT 2275555556u +#define SC_SHIFT 23 + +unsigned long long notrace sched_clock(void) +{ + u32 cyc = OSCR; + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT); +} + +static void notrace sa1100_update_sched_clock(void) +{ + u32 cyc = OSCR; + update_sched_clock(&cd, cyc, (u32)~0); +} + #define MIN_OSCR_DELTA 2 static irqreturn_t sa1100_ost0_interrupt(int irq, void *dev_id) @@ -96,6 +123,9 @@ static void __init sa1100_timer_init(void) OIER = 0; /* disable any timer interrupts */ OSSR = 0xf; /* clear status on all timers */ + init_fixed_sched_clock(&cd, sa1100_update_sched_clock, 32, + 3686400, SC_MULT, SC_SHIFT); + ckevt_sa1100_osmr0.mult = div_sc(3686400, NSEC_PER_SEC, ckevt_sa1100_osmr0.shift); ckevt_sa1100_osmr0.max_delta_ns = From e3f4c0ab916334e861fc49eeb2673a8c13e978e7 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:49:42 +0000 Subject: [PATCH 47/54] ARM: tegra: convert sched_clock() to use new infrastructure Convert tegra to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Tested-by: Olof Johansson Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/mach-tegra/timer.c | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 0e1a9667a869..ec7b0274d46a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -570,6 +570,7 @@ config ARCH_TEGRA select GENERIC_CLOCKEVENTS select GENERIC_GPIO select HAVE_CLK + select HAVE_SCHED_CLOCK select COMMON_CLKDEV select ARCH_HAS_BARRIERS if CACHE_L2X0 select ARCH_HAS_CPUFREQ diff --git a/arch/arm/mach-tegra/timer.c b/arch/arm/mach-tegra/timer.c index c52bd84652e5..7b8ad1f98f44 100644 --- a/arch/arm/mach-tegra/timer.c +++ b/arch/arm/mach-tegra/timer.c @@ -26,10 +26,10 @@ #include #include #include -#include #include #include +#include #include #include @@ -111,9 +111,25 @@ static struct clocksource tegra_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static DEFINE_CLOCK_DATA(cd); + +/* + * Constants generated by clocks_calc_mult_shift(m, s, 1MHz, NSEC_PER_SEC, 60). + * This gives a resolution of about 1us and a wrap period of about 1h11min. + */ +#define SC_MULT 4194304000u +#define SC_SHIFT 22 + unsigned long long notrace sched_clock(void) { - return cnt32_to_63(timer_readl(TIMERUS_CNTR_1US)) * 1000; + u32 cyc = timer_readl(TIMERUS_CNTR_1US); + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT); +} + +static void notrace tegra_update_sched_clock(void) +{ + u32 cyc = timer_readl(TIMERUS_CNTR_1US); + update_sched_clock(&cd, cyc, (u32)~0); } static irqreturn_t tegra_timer_interrupt(int irq, void *dev_id) @@ -158,6 +174,9 @@ static void __init tegra_init_timer(void) WARN(1, "Unknown clock rate"); } + init_fixed_sched_clock(&cd, tegra_update_sched_clock, 32, + 1000000, SC_MULT, SC_SHIFT); + if (clocksource_register_hz(&tegra_clocksource, 1000000)) { printk(KERN_ERR "Failed to register clocksource\n"); BUG(); From 5c21b7ca2c1cca3758a1d357dd57987c96ad1aa0 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:50:14 +0000 Subject: [PATCH 48/54] ARM: u300: convert sched_clock() to use new infrastructure Convert u300 to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Acked-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/mach-u300/timer.c | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ec7b0274d46a..d5bd266ee565 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -790,6 +790,7 @@ config ARCH_U300 bool "ST-Ericsson U300 Series" depends on MMU select CPU_ARM926T + select HAVE_SCHED_CLOCK select HAVE_TCM select ARM_AMBA select ARM_VIC diff --git a/arch/arm/mach-u300/timer.c b/arch/arm/mach-u300/timer.c index 6c68fe797903..3ec58bd2d6e4 100644 --- a/arch/arm/mach-u300/timer.c +++ b/arch/arm/mach-u300/timer.c @@ -22,6 +22,7 @@ #include /* Generic stuff */ +#include #include #include #include @@ -353,12 +354,18 @@ static struct clocksource clocksource_u300_1mhz = { * this wraps around for now, since it is just a relative time * stamp. (Inspired by OMAP implementation.) */ +static DEFINE_CLOCK_DATA(cd); + unsigned long long notrace sched_clock(void) { - return clocksource_cyc2ns(clocksource_u300_1mhz.read( - &clocksource_u300_1mhz), - clocksource_u300_1mhz.mult, - clocksource_u300_1mhz.shift); + u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} + +static void notrace u300_update_sched_clock(void) +{ + u32 cyc = readl(U300_TIMER_APP_VBASE + U300_TIMER_APP_GPT2CC); + update_sched_clock(&cd, cyc, (u32)~0); } @@ -376,6 +383,8 @@ static void __init u300_timer_init(void) clk_enable(clk); rate = clk_get_rate(clk); + init_sched_clock(&cd, u300_update_sched_clock, 32, rate); + /* * Disable the "OS" and "DD" timers - these are designed for Symbian! * Example usage in cnh1601578 cpu subsystem pd_timer_app.c From 08f26b1ef25a2f7b52afcb805d260fd5a000a7f6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:52:10 +0000 Subject: [PATCH 49/54] ARM: iop: convert sched_clock() to use new infrastructure Convert iop platforms to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/plat-iop/time.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index d5bd266ee565..58e2fe30715b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -992,6 +992,7 @@ config ARCH_ACORN config PLAT_IOP bool select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK config PLAT_ORION bool diff --git a/arch/arm/plat-iop/time.c b/arch/arm/plat-iop/time.c index d03a93295e60..2db6e60d149d 100644 --- a/arch/arm/plat-iop/time.c +++ b/arch/arm/plat-iop/time.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -50,15 +51,21 @@ static struct clocksource iop_clocksource = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static DEFINE_CLOCK_DATA(cd); + /* * IOP sched_clock() implementation via its clocksource. */ unsigned long long notrace sched_clock(void) { - cycle_t cyc = iop_clocksource_read(NULL); - struct clocksource *cs = &iop_clocksource; + u32 cyc = 0xffffffffu - read_tcr1(); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); +} - return clocksource_cyc2ns(cyc, cs->mult, cs->shift); +static void notrace iop_update_sched_clock(void) +{ + u32 cyc = 0xffffffffu - read_tcr1(); + update_sched_clock(&cd, cyc, (u32)~0); } /* @@ -144,6 +151,8 @@ void __init iop_init_time(unsigned long tick_rate) { u32 timer_ctl; + init_sched_clock(&cd, iop_update_sched_clock, 32, tick_rate); + ticks_per_jiffy = DIV_ROUND_CLOSEST(tick_rate, HZ); iop_tick_rate = tick_rate; From ec05aa139939d9c7b790041a379f0e752b2f1a4d Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:53:02 +0000 Subject: [PATCH 50/54] ARM: nomadik: convert sched_clock() to use new infrastructure Convert nomadik platforms to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Acked-by: Linus Walleij Signed-off-by: Russell King --- arch/arm/plat-nomadik/Kconfig | 1 + arch/arm/plat-nomadik/timer.c | 76 +++++------------------------------ 2 files changed, 10 insertions(+), 67 deletions(-) diff --git a/arch/arm/plat-nomadik/Kconfig b/arch/arm/plat-nomadik/Kconfig index 5da3f97c537b..187f4e84bb22 100644 --- a/arch/arm/plat-nomadik/Kconfig +++ b/arch/arm/plat-nomadik/Kconfig @@ -14,6 +14,7 @@ if PLAT_NOMADIK config HAS_MTU bool + select HAVE_SCHED_CLOCK help Support for Multi Timer Unit. MTU provides access to multiple interrupt generating programmable diff --git a/arch/arm/plat-nomadik/timer.c b/arch/arm/plat-nomadik/timer.c index c3b8a2246b99..41723402006b 100644 --- a/arch/arm/plat-nomadik/timer.c +++ b/arch/arm/plat-nomadik/timer.c @@ -17,10 +17,9 @@ #include #include #include -#include -#include #include #include +#include #include @@ -53,81 +52,24 @@ static struct clocksource nmdk_clksrc = { * Override the global weak sched_clock symbol with this * local implementation which uses the clocksource to get some * better resolution when scheduling the kernel. - * - * Because the hardware timer period may be quite short - * (32.3 secs on the 133 MHz MTU timer selection on ux500) - * and because cnt32_to_63() needs to be called at least once per - * half period to work properly, a kernel keepwarm() timer is set up - * to ensure this requirement is always met. - * - * Also the sched_clock timer will wrap around at some point, - * here we set it to run continously for a year. */ -#define SCHED_CLOCK_MIN_WRAP 3600*24*365 -static struct timer_list cnt32_to_63_keepwarm_timer; -static u32 sched_mult; -static u32 sched_shift; +static DEFINE_CLOCK_DATA(cd); unsigned long long notrace sched_clock(void) { - u64 cycles; + u32 cyc; if (unlikely(!mtu_base)) return 0; - cycles = cnt32_to_63(-readl(mtu_base + MTU_VAL(0))); - /* - * sched_mult is guaranteed to be even so will - * shift out bit 63 - */ - return (cycles * sched_mult) >> sched_shift; + cyc = -readl(mtu_base + MTU_VAL(0)); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); } -/* Just kick sched_clock every so often */ -static void cnt32_to_63_keepwarm(unsigned long data) +static void notrace nomadik_update_sched_clock(void) { - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); - (void) sched_clock(); -} - -/* - * Set up a timer to keep sched_clock():s 32_to_63 algorithm warm - * once in half a 32bit timer wrap interval. - */ -static void __init nmdk_sched_clock_init(unsigned long rate) -{ - u32 v; - unsigned long delta; - u64 days; - - /* Find the apropriate mult and shift factors */ - clocks_calc_mult_shift(&sched_mult, &sched_shift, - rate, NSEC_PER_SEC, SCHED_CLOCK_MIN_WRAP); - /* We need to multiply by an even number to get rid of bit 63 */ - if (sched_mult & 1) - sched_mult++; - - /* Let's see what we get, take max counter and scale it */ - days = (0xFFFFFFFFFFFFFFFFLLU * sched_mult) >> sched_shift; - do_div(days, NSEC_PER_SEC); - do_div(days, (3600*24)); - - pr_info("sched_clock: using %d bits @ %lu Hz wrap in %lu days\n", - (64 - sched_shift), rate, (unsigned long) days); - - /* - * Program a timer to kick us at half 32bit wraparound - * Formula: seconds per wrap = (2^32) / f - */ - v = 0xFFFFFFFFUL / rate; - /* We want half of the wrap time to keep cnt32_to_63 warm */ - v /= 2; - pr_debug("sched_clock: prescaled timer rate: %lu Hz, " - "initialize keepwarm timer every %d seconds\n", rate, v); - /* Convert seconds to jiffies */ - delta = msecs_to_jiffies(v*1000); - setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, delta); - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + delta)); + u32 cyc = -readl(mtu_base + MTU_VAL(0)); + update_sched_clock(&cd, cyc, (u32)~0); } /* Clockevent device: use one-shot mode */ @@ -237,7 +179,7 @@ void __init nmdk_timer_init(void) pr_err("timer: failed to initialize clock source %s\n", nmdk_clksrc.name); - nmdk_sched_clock_init(rate); + init_sched_clock(&cd, nomadik_update_sched_clock, 32, rate); /* Timer 1 is used for events */ From dc548fbbd2ecd0fc3b02301d551e5f8e19ae58fd Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:53:51 +0000 Subject: [PATCH 51/54] ARM: omap: convert sched_clock() to use new infrastructure Convert omap to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Tested-by: Santosh Shilimkar Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/plat-omap/counter_32k.c | 24 ++++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 58e2fe30715b..726279fe3f53 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -839,6 +839,7 @@ config ARCH_OMAP select ARCH_REQUIRE_GPIOLIB select ARCH_HAS_CPUFREQ select GENERIC_CLOCKEVENTS + select HAVE_SCHED_CLOCK select ARCH_HAS_HOLES_MEMORYMODEL help Support for TI's OMAP platform (OMAP1/2/3/4). diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c index aed301bfa2f9..1b558efbe732 100644 --- a/arch/arm/plat-omap/counter_32k.c +++ b/arch/arm/plat-omap/counter_32k.c @@ -18,6 +18,8 @@ #include #include +#include + #include #include @@ -110,10 +112,25 @@ static struct clocksource clocksource_32k = { * Returns current time from boot in nsecs. It's OK for this to wrap * around for now, as it's just a relative time stamp. */ +static DEFINE_CLOCK_DATA(cd); + +/* + * Constants generated by clocks_calc_mult_shift(m, s, 32768, NSEC_PER_SEC, 60). + * This gives a resolution of about 30us and a wrap period of about 36hrs. + */ +#define SC_MULT 4000000000u +#define SC_SHIFT 17 + unsigned long long notrace sched_clock(void) { - return clocksource_cyc2ns(clocksource_32k.read(&clocksource_32k), - clocksource_32k.mult, clocksource_32k.shift); + u32 cyc = clocksource_32k.read(&clocksource_32k); + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, SC_MULT, SC_SHIFT); +} + +static void notrace omap_update_sched_clock(void) +{ + u32 cyc = clocksource_32k.read(&clocksource_32k); + update_sched_clock(&cd, cyc, (u32)~0); } /** @@ -171,6 +188,9 @@ static int __init omap_init_clocksource_32k(void) if (clocksource_register_hz(&clocksource_32k, 32768)) printk(err, clocksource_32k.name); + + init_fixed_sched_clock(&cd, omap_update_sched_clock, 32, + 32768, SC_MULT, SC_SHIFT); } return 0; } From f06a1624621527ef597ae4b3b795553fc1b2eff2 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:55:06 +0000 Subject: [PATCH 52/54] ARM: orion: convert sched_clock() to use new infrastructure Convert orion platforms to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/plat-orion/time.c | 44 ++++++++------------------------------ 2 files changed, 10 insertions(+), 35 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 726279fe3f53..92e8c0174dd0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -997,6 +997,7 @@ config PLAT_IOP config PLAT_ORION bool + select HAVE_SCHED_CLOCK config PLAT_PXA bool diff --git a/arch/arm/plat-orion/time.c b/arch/arm/plat-orion/time.c index 123f96f4194f..c3da2478b2aa 100644 --- a/arch/arm/plat-orion/time.c +++ b/arch/arm/plat-orion/time.c @@ -13,11 +13,11 @@ #include #include -#include #include #include #include #include +#include #include #include #include @@ -44,52 +44,26 @@ static u32 ticks_per_jiffy; /* * Orion's sched_clock implementation. It has a resolution of - * at least 7.5ns (133MHz TCLK) and a maximum value of 834 days. - * - * Because the hardware timer period is quite short (21 secs if - * 200MHz TCLK) and because cnt32_to_63() needs to be called at - * least once per half period to work properly, a kernel timer is - * set up to ensure this requirement is always met. + * at least 7.5ns (133MHz TCLK). */ -#define TCLK2NS_SCALE_FACTOR 8 - -static unsigned long tclk2ns_scale; +static DEFINE_CLOCK_DATA(cd); unsigned long long notrace sched_clock(void) { - unsigned long long v = cnt32_to_63(0xffffffff - readl(TIMER0_VAL)); - return (v * tclk2ns_scale) >> TCLK2NS_SCALE_FACTOR; + u32 cyc = 0xffffffff - readl(TIMER0_VAL); + return cyc_to_sched_clock(&cd, cyc, (u32)~0); } -static struct timer_list cnt32_to_63_keepwarm_timer; -static void cnt32_to_63_keepwarm(unsigned long data) +static void notrace orion_update_sched_clock(void) { - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); - (void) sched_clock(); + u32 cyc = 0xffffffff - readl(TIMER0_VAL); + update_sched_clock(&cd, cyc, (u32)~0); } static void __init setup_sched_clock(unsigned long tclk) { - unsigned long long v; - unsigned long data; - - v = NSEC_PER_SEC; - v <<= TCLK2NS_SCALE_FACTOR; - v += tclk/2; - do_div(v, tclk); - /* - * We want an even value to automatically clear the top bit - * returned by cnt32_to_63() without an additional run time - * instruction. So if the LSB is 1 then round it up. - */ - if (v & 1) - v++; - tclk2ns_scale = v; - - data = (0xffffffffUL / tclk / 2 - 2) * HZ; - setup_timer(&cnt32_to_63_keepwarm_timer, cnt32_to_63_keepwarm, data); - mod_timer(&cnt32_to_63_keepwarm_timer, round_jiffies(jiffies + data)); + init_sched_clock(&cd, orion_update_sched_clock, 32, tclk); } /* From 1da0c89c66753860ccfe81eb327c25db46c2a24a Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:56:47 +0000 Subject: [PATCH 53/54] ARM: versatile: convert sched_clock() to use new infrastructure Convert versatile platforms to use the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 2 + arch/arm/mach-realview/core.c | 10 ++++ arch/arm/mach-versatile/core.c | 10 ++++ .../plat-versatile/include/plat/sched_clock.h | 6 +++ arch/arm/plat-versatile/sched-clock.c | 52 ++++++++++--------- 5 files changed, 56 insertions(+), 24 deletions(-) create mode 100644 arch/arm/plat-versatile/include/plat/sched_clock.h diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 92e8c0174dd0..34311f40d713 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -236,6 +236,7 @@ config ARCH_REALVIEW bool "ARM Ltd. RealView family" select ARM_AMBA select COMMON_CLKDEV + select HAVE_SCHED_CLOCK select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB @@ -250,6 +251,7 @@ config ARCH_VERSATILE select ARM_AMBA select ARM_VIC select COMMON_CLKDEV + select HAVE_SCHED_CLOCK select ICST select GENERIC_CLOCKEVENTS select ARCH_WANT_OPTIONAL_GPIOLIB diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c index 07c08151dfe6..3d653e05b75d 100644 --- a/arch/arm/mach-realview/core.c +++ b/arch/arm/mach-realview/core.c @@ -52,6 +52,8 @@ #include #include +#include + #include "core.h" /* used by entry-macro.S and platsmp.c */ @@ -657,6 +659,12 @@ void realview_leds_event(led_event_t ledevt) } #endif /* CONFIG_LEDS */ +/* + * The sched_clock counter + */ +#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + \ + REALVIEW_SYS_24MHz_OFFSET) + /* * Where is the timer (VA)? */ @@ -672,6 +680,8 @@ void __init realview_timer_init(unsigned int timer_irq) { u32 val; + versatile_sched_clock_init(REFCOUNTER, 24000000); + /* * set clock frequency: * REALVIEW_REFCLK is 32KHz diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c index e38acb0f89c8..56cdc2257424 100644 --- a/arch/arm/mach-versatile/core.c +++ b/arch/arm/mach-versatile/core.c @@ -51,6 +51,8 @@ #include #include +#include + #include "core.h" /* @@ -885,6 +887,12 @@ void __init versatile_init(void) #endif } +/* + * The sched_clock counter + */ +#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + \ + VERSATILE_SYS_24MHz_OFFSET) + /* * Where is the timer (VA)? */ @@ -900,6 +908,8 @@ static void __init versatile_timer_init(void) { u32 val; + versatile_sched_clock_init(REFCOUNTER, 24000000); + /* * set clock frequency: * VERSATILE_REFCLK is 32KHz diff --git a/arch/arm/plat-versatile/include/plat/sched_clock.h b/arch/arm/plat-versatile/include/plat/sched_clock.h new file mode 100644 index 000000000000..5c3e4fc9fa0c --- /dev/null +++ b/arch/arm/plat-versatile/include/plat/sched_clock.h @@ -0,0 +1,6 @@ +#ifndef ARM_PLAT_SCHED_CLOCK_H +#define ARM_PLAT_SCHED_CLOCK_H + +void versatile_sched_clock_init(void __iomem *, unsigned long); + +#endif diff --git a/arch/arm/plat-versatile/sched-clock.c b/arch/arm/plat-versatile/sched-clock.c index 42efd14ed4b3..3d6a4c292cab 100644 --- a/arch/arm/plat-versatile/sched-clock.c +++ b/arch/arm/plat-versatile/sched-clock.c @@ -18,37 +18,41 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#include #include #include -#include -#include -#include +#include +#include -#ifdef VERSATILE_SYS_BASE -#define REFCOUNTER (__io_address(VERSATILE_SYS_BASE) + VERSATILE_SYS_24MHz_OFFSET) -#endif - -#ifdef REALVIEW_SYS_BASE -#define REFCOUNTER (__io_address(REALVIEW_SYS_BASE) + REALVIEW_SYS_24MHz_OFFSET) -#endif +static DEFINE_CLOCK_DATA(cd); +static void __iomem *ctr; /* - * This is the Realview and Versatile sched_clock implementation. This - * has a resolution of 41.7ns, and a maximum value of about 35583 days. - * - * The return value is guaranteed to be monotonic in that range as - * long as there is always less than 89 seconds between successive - * calls to this function. + * Constants generated by clocks_calc_mult_shift(m, s, 24MHz, NSEC_PER_SEC, 60). + * This gives a resolution of about 41ns and a wrap period of about 178s. */ +#define SC_MULT 2796202667u +#define SC_SHIFT 26 + unsigned long long notrace sched_clock(void) { - unsigned long long v = cnt32_to_63(readl(REFCOUNTER)); - - /* the <<1 gets rid of the cnt_32_to_63 top bit saving on a bic insn */ - v *= 125<<1; - do_div(v, 3<<1); - - return v; + if (ctr) { + u32 cyc = readl(ctr); + return cyc_to_fixed_sched_clock(&cd, cyc, (u32)~0, + SC_MULT, SC_SHIFT); + } else + return 0; +} + +static void notrace versatile_update_sched_clock(void) +{ + u32 cyc = readl(ctr); + update_sched_clock(&cd, cyc, (u32)~0); +} + +void __init versatile_sched_clock_init(void __iomem *reg, unsigned long rate) +{ + ctr = reg; + init_fixed_sched_clock(&cd, versatile_update_sched_clock, + 32, rate, SC_MULT, SC_SHIFT); } From 0af85dda39d9b673aca8c0ebae004ea70f3efc93 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 15 Dec 2010 21:58:50 +0000 Subject: [PATCH 54/54] ARM: vexpress: add sched_clock() for Versatile Express Add a sched_clock() implementation to Versatile Express using the new sched_clock() infrastructure for extending 32bit counters to full 64-bit nanoseconds. Tested-by: Will Deacon Signed-off-by: Russell King --- arch/arm/Kconfig | 1 + arch/arm/mach-vexpress/v2m.c | 4 ++++ arch/arm/plat-versatile/Makefile | 5 +++-- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 34311f40d713..17b5ff0db1dc 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -268,6 +268,7 @@ config ARCH_VEXPRESS select COMMON_CLKDEV select GENERIC_CLOCKEVENTS select HAVE_CLK + select HAVE_SCHED_CLOCK select ICST select PLAT_VERSATILE help diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c index 7eaa232180a5..8c283100a9a0 100644 --- a/arch/arm/mach-vexpress/v2m.c +++ b/arch/arm/mach-vexpress/v2m.c @@ -22,6 +22,8 @@ #include #include +#include + #include #include "core.h" @@ -50,6 +52,8 @@ void __init v2m_map_io(struct map_desc *tile, size_t num) static void __init v2m_timer_init(void) { + versatile_sched_clock_init(MMIO_P2V(V2M_SYS_24MHZ), 24000000); + writel(0, MMIO_P2V(V2M_TIMER0) + TIMER_CTRL); writel(0, MMIO_P2V(V2M_TIMER1) + TIMER_CTRL); diff --git a/arch/arm/plat-versatile/Makefile b/arch/arm/plat-versatile/Makefile index 5cf88e8427b1..8bb6ba6d3572 100644 --- a/arch/arm/plat-versatile/Makefile +++ b/arch/arm/plat-versatile/Makefile @@ -1,7 +1,8 @@ obj-y := clock.o obj-$(CONFIG_ARM_TIMER_SP804) += timer-sp.o -obj-$(CONFIG_ARCH_REALVIEW) += sched-clock.o -obj-$(CONFIG_ARCH_VERSATILE) += sched-clock.o +ifneq ($(CONFIG_ARCH_INTEGRATOR),y) +obj-y += sched-clock.o +endif ifeq ($(CONFIG_LEDS_CLASS),y) obj-$(CONFIG_ARCH_REALVIEW) += leds.o obj-$(CONFIG_ARCH_VERSATILE) += leds.o