diff --git a/[refs] b/[refs] index 354b17e34a55..fac4fb2342d8 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 2f841ed13b9f10037e25ddf417d01700ecd886d0 +refs/heads/master: 1ccb53a4f3d0bfd65889fcfe988d829633641f49 diff --git a/trunk/Documentation/DocBook/sh.tmpl b/trunk/Documentation/DocBook/sh.tmpl index 4a38f604fa66..d858d92cf6d9 100644 --- a/trunk/Documentation/DocBook/sh.tmpl +++ b/trunk/Documentation/DocBook/sh.tmpl @@ -79,6 +79,10 @@ + + Clock Framework Extensions +!Iinclude/linux/sh_clk.h + Machine Specific Interfaces diff --git a/trunk/Documentation/edac.txt b/trunk/Documentation/edac.txt index 9ee774de57cd..0b875e8da969 100644 --- a/trunk/Documentation/edac.txt +++ b/trunk/Documentation/edac.txt @@ -196,7 +196,7 @@ csrow3. The representation of the above is reflected in the directory tree in EDAC's sysfs interface. Starting in directory /sys/devices/system/edac/mc each memory controller will be represented -by its own 'mcX' directory, where 'X' is the index of the MC. +by its own 'mcX' directory, where 'X" is the index of the MC. ..../edac/mc/ @@ -207,7 +207,7 @@ by its own 'mcX' directory, where 'X' is the index of the MC. .... Under each 'mcX' directory each 'csrowX' is again represented by a -'csrowX', where 'X' is the csrow index: +'csrowX', where 'X" is the csrow index: .../mc/mc0/ @@ -232,7 +232,7 @@ EDAC control and attribute files. In 'mcX' directories are EDAC control and attribute files for -this 'X' instance of the memory controllers: +this 'X" instance of the memory controllers: Counter reset control file: @@ -343,7 +343,7 @@ Sdram memory scrubbing rate: 'csrowX' DIRECTORIES In the 'csrowX' directories are EDAC control and attribute files for -this 'X' instance of csrow: +this 'X" instance of csrow: Total Uncorrectable Errors count attribute file: diff --git a/trunk/Documentation/fb/00-INDEX b/trunk/Documentation/fb/00-INDEX index 30a70542e823..a618fd99c9f0 100644 --- a/trunk/Documentation/fb/00-INDEX +++ b/trunk/Documentation/fb/00-INDEX @@ -4,41 +4,33 @@ please mail me. Geert Uytterhoeven 00-INDEX - - this file. + - this file arkfb.txt - info on the fbdev driver for ARK Logic chips. aty128fb.txt - info on the ATI Rage128 frame buffer driver. cirrusfb.txt - info on the driver for Cirrus Logic chipsets. -cmap_xfbdev.txt - - an introduction to fbdev's cmap structures. deferred_io.txt - an introduction to deferred IO. -efifb.txt - - info on the EFI platform driver for Intel based Apple computers. -ep93xx-fb.txt - - info on the driver for EP93xx LCD controller. fbcon.txt - intro to and usage guide for the framebuffer console (fbcon). framebuffer.txt - introduction to frame buffer devices. -gxfb.txt - - info on the framebuffer driver for AMD Geode GX2 based processors. +imacfb.txt + - info on the generic EFI platform driver for Intel based Macs. intel810.txt - documentation for the Intel 810/815 framebuffer driver. intelfb.txt - docs for Intel 830M/845G/852GM/855GM/865G/915G/945G fb driver. internals.txt - quick overview of frame buffer device internals. -lxfb.txt - - info on the framebuffer driver for AMD Geode LX based processors. matroxfb.txt - info on the Matrox framebuffer driver for Alpha, Intel and PPC. -metronomefb.txt - - info on the driver for the Metronome display controller. modedb.txt - info on the video mode database. +matroxfb.txt + - info on the Matrox frame buffer driver. pvr2fb.txt - info on the PowerVR 2 frame buffer driver. pxafb.txt @@ -47,23 +39,13 @@ s3fb.txt - info on the fbdev driver for S3 Trio/Virge chips. sa1100fb.txt - information about the driver for the SA-1100 LCD controller. -sh7760fb.txt - - info on the SH7760/SH7763 integrated LCDC Framebuffer driver. sisfb.txt - info on the framebuffer device driver for various SiS chips. sstfb.txt - info on the frame buffer driver for 3dfx' Voodoo Graphics boards. tgafb.txt - - info on the TGA (DECChip 21030) frame buffer driver. -tridentfb.txt - info on the framebuffer driver for some Trident chip based cards. -uvesafb.txt - - info on the userspace VESA (VBE2+ compliant) frame buffer device. + - info on the TGA (DECChip 21030) frame buffer driver vesafb.txt - - info on the VESA frame buffer device. -viafb.modes - - list of modes for VIA Integration Graphic Chip. -viafb.txt - - info on the VIA Integration Graphic Chip console framebuffer driver. + - info on the VESA frame buffer device vt8623fb.txt - info on the fb driver for the graphics core in VIA VT8623 chipsets. diff --git a/trunk/Documentation/kernel-parameters.txt b/trunk/Documentation/kernel-parameters.txt index cdd2a6e8a3b7..92e83e53148f 100644 --- a/trunk/Documentation/kernel-parameters.txt +++ b/trunk/Documentation/kernel-parameters.txt @@ -2385,11 +2385,6 @@ and is between 256 and 4096 characters. It is defined in the file improve throughput, but will also increase the amount of memory reserved for use by the client. - swapaccount[=0|1] - [KNL] Enable accounting of swap in memory resource - controller if no parameter or 1 is given or disable - it if 0 is given (See Documentation/cgroups/memory.txt) - swiotlb= [IA-64] Number of I/O TLB slabs switches= [HW,M68k] diff --git a/trunk/Documentation/networking/ip-sysctl.txt b/trunk/Documentation/networking/ip-sysctl.txt index 3c5e465296e1..fe95105992c5 100644 --- a/trunk/Documentation/networking/ip-sysctl.txt +++ b/trunk/Documentation/networking/ip-sysctl.txt @@ -144,7 +144,6 @@ tcp_adv_win_scale - INTEGER Count buffering overhead as bytes/2^tcp_adv_win_scale (if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale), if it is <= 0. - Possible values are [-31, 31], inclusive. Default: 2 tcp_allowed_congestion_control - STRING diff --git a/trunk/Documentation/sh/clk.txt b/trunk/Documentation/sh/clk.txt new file mode 100644 index 000000000000..114b595cfa97 --- /dev/null +++ b/trunk/Documentation/sh/clk.txt @@ -0,0 +1,32 @@ +Clock framework on SuperH architecture + +The framework on SH extends existing API by the function clk_set_rate_ex, +which prototype is as follows: + + clk_set_rate_ex (struct clk *clk, unsigned long rate, int algo_id) + +The algo_id parameter is used to specify algorithm used to recalculate clocks, +adjanced to clock, specified as first argument. It is assumed that algo_id==0 +means no changes to adjanced clock + +Internally, the clk_set_rate_ex forwards request to clk->ops->set_rate method, +if it is present in ops structure. The method should set the clock rate and adjust +all needed clocks according to the passed algo_id. +Exact values for algo_id are machine-dependent. For the sh7722, the following +values are defined: + + NO_CHANGE = 0, + IUS_N1_N1, /* I:U = N:1, U:Sh = N:1 */ + IUS_322, /* I:U:Sh = 3:2:2 */ + IUS_522, /* I:U:Sh = 5:2:2 */ + IUS_N11, /* I:U:Sh = N:1:1 */ + SB_N1, /* Sh:B = N:1 */ + SB3_N1, /* Sh:B3 = N:1 */ + SB3_32, /* Sh:B3 = 3:2 */ + SB3_43, /* Sh:B3 = 4:3 */ + SB3_54, /* Sh:B3 = 5:4 */ + BP_N1, /* B:P = N:1 */ + IP_N1 /* I:P = N:1 */ + +Each of these constants means relation between clocks that can be set via the FRQCR +register diff --git a/trunk/MAINTAINERS b/trunk/MAINTAINERS index b3be8b3d0437..a92c994ba935 100644 --- a/trunk/MAINTAINERS +++ b/trunk/MAINTAINERS @@ -1359,7 +1359,7 @@ F: include/net/bluetooth/ BONDING DRIVER M: Jay Vosburgh -L: netdev@vger.kernel.org +L: bonding-devel@lists.sourceforge.net W: http://sourceforge.net/projects/bonding/ S: Supported F: drivers/net/bonding/ @@ -2444,12 +2444,10 @@ F: drivers/net/wan/sdla.c FRAMEBUFFER LAYER L: linux-fbdev@vger.kernel.org W: http://linux-fbdev.sourceforge.net/ -Q: http://patchwork.kernel.org/project/linux-fbdev/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/lethal/fbdev-2.6.git S: Orphan F: Documentation/fb/ -F: drivers/video/ -F: include/video/ +F: drivers/video/fb* F: include/linux/fb.h FREESCALE DMA DRIVER @@ -5839,8 +5837,6 @@ M: Chris Metcalf W: http://www.tilera.com/scm/ S: Supported F: arch/tile/ -F: drivers/char/hvc_tile.c -F: drivers/net/tile/ TLAN NETWORK DRIVER M: Samuel Chessman diff --git a/trunk/Makefile b/trunk/Makefile index 9e3c89030f5c..b31d21377e4c 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 6 SUBLEVEL = 37 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc3 NAME = Flesh-Eating Bats with Fangs # *DOCUMENTATION* diff --git a/trunk/arch/arm/Kconfig b/trunk/arch/arm/Kconfig index 49778bb43782..db524e75c4a2 100644 --- a/trunk/arch/arm/Kconfig +++ b/trunk/arch/arm/Kconfig @@ -14,7 +14,6 @@ config ARM select HAVE_FUNCTION_TRACER if (!XIP_KERNEL) select HAVE_FTRACE_MCOUNT_RECORD if (!XIP_KERNEL) select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) - select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL) select HAVE_GENERIC_DMA_COHERENT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO @@ -1206,11 +1205,10 @@ config SMP depends on EXPERIMENTAL depends on GENERIC_CLOCKEVENTS depends on REALVIEW_EB_ARM11MP || REALVIEW_EB_A9MP || \ - MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 || \ - ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 || \ - ARCH_MSM_SCORPIONMP + MACH_REALVIEW_PB11MP || MACH_REALVIEW_PBX || ARCH_OMAP4 ||\ + ARCH_S5PV310 || ARCH_TEGRA || ARCH_U8500 || ARCH_VEXPRESS_CA9X4 select USE_GENERIC_SMP_HELPERS - select HAVE_ARM_SCU if !ARCH_MSM_SCORPIONMP + select HAVE_ARM_SCU help This enables support for systems with more than one CPU. If you have a system with only one CPU, like most personal computers, say N. If @@ -1285,7 +1283,6 @@ config NR_CPUS config HOTPLUG_CPU bool "Support for hot-pluggable CPUs (EXPERIMENTAL)" depends on SMP && HOTPLUG && EXPERIMENTAL - depends on !ARCH_MSM help Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. @@ -1294,7 +1291,7 @@ config LOCAL_TIMERS bool "Use local timer interrupts" depends on SMP default y - select HAVE_ARM_TWD if !ARCH_MSM_SCORPIONMP + select HAVE_ARM_TWD help Enable support for local timers on SMP platforms, rather then the legacy IPI broadcast method. Local timers allows the system diff --git a/trunk/arch/arm/Kconfig.debug b/trunk/arch/arm/Kconfig.debug index eac62085f5b2..2fd0b99afc4b 100644 --- a/trunk/arch/arm/Kconfig.debug +++ b/trunk/arch/arm/Kconfig.debug @@ -23,7 +23,7 @@ config STRICT_DEVMEM config FRAME_POINTER bool depends on !THUMB2_KERNEL - default y if !ARM_UNWIND || FUNCTION_GRAPH_TRACER + default y if !ARM_UNWIND help If you say N here, the resulting kernel will be slightly smaller and faster. However, if neither FRAME_POINTER nor ARM_UNWIND are enabled, diff --git a/trunk/arch/arm/boot/compressed/head.S b/trunk/arch/arm/boot/compressed/head.S index 9be21ba648cd..6825c34646d4 100644 --- a/trunk/arch/arm/boot/compressed/head.S +++ b/trunk/arch/arm/boot/compressed/head.S @@ -1084,6 +1084,6 @@ memdump: mov r12, r0 reloc_end: .align - .section ".stack", "aw", %nobits + .section ".stack", "w" user_stack: .space 4096 user_stack_end: diff --git a/trunk/arch/arm/boot/compressed/vmlinux.lds.in b/trunk/arch/arm/boot/compressed/vmlinux.lds.in index 366a924019ac..d08168941bd6 100644 --- a/trunk/arch/arm/boot/compressed/vmlinux.lds.in +++ b/trunk/arch/arm/boot/compressed/vmlinux.lds.in @@ -57,7 +57,7 @@ SECTIONS .bss : { *(.bss) } _end = .; - .stack : { *(.stack) } + .stack (NOLOAD) : { *(.stack) } .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } diff --git a/trunk/arch/arm/include/asm/assembler.h b/trunk/arch/arm/include/asm/assembler.h index 749bb6622404..062b58c029ab 100644 --- a/trunk/arch/arm/include/asm/assembler.h +++ b/trunk/arch/arm/include/asm/assembler.h @@ -238,7 +238,7 @@ @ Slightly optimised to avoid incrementing the pointer twice usraccoff \instr, \reg, \ptr, \inc, 0, \cond, \abort .if \rept == 2 - usraccoff \instr, \reg, \ptr, \inc, \inc, \cond, \abort + usraccoff \instr, \reg, \ptr, \inc, 4, \cond, \abort .endif add\cond \ptr, #\rept * \inc diff --git a/trunk/arch/arm/include/asm/hw_breakpoint.h b/trunk/arch/arm/include/asm/hw_breakpoint.h index f389b2704d82..4d8ae9d67abe 100644 --- a/trunk/arch/arm/include/asm/hw_breakpoint.h +++ b/trunk/arch/arm/include/asm/hw_breakpoint.h @@ -20,8 +20,8 @@ struct arch_hw_breakpoint_ctrl { struct arch_hw_breakpoint { u32 address; u32 trigger; - struct arch_hw_breakpoint_ctrl step_ctrl; - struct arch_hw_breakpoint_ctrl ctrl; + struct perf_event *suspended_wp; + struct arch_hw_breakpoint_ctrl ctrl; }; static inline u32 encode_ctrl_reg(struct arch_hw_breakpoint_ctrl ctrl) diff --git a/trunk/arch/arm/include/asm/mmu.h b/trunk/arch/arm/include/asm/mmu.h index b4ffe9d5b526..68870c776671 100644 --- a/trunk/arch/arm/include/asm/mmu.h +++ b/trunk/arch/arm/include/asm/mmu.h @@ -13,10 +13,6 @@ typedef struct { #ifdef CONFIG_CPU_HAS_ASID #define ASID(mm) ((mm)->context.id & 255) - -/* init_mm.context.id_lock should be initialized. */ -#define INIT_MM_CONTEXT(name) \ - .context.id_lock = __SPIN_LOCK_UNLOCKED(name.context.id_lock), #else #define ASID(mm) (0) #endif diff --git a/trunk/arch/arm/include/asm/pgtable.h b/trunk/arch/arm/include/asm/pgtable.h index 53d1d5deb111..b155414192da 100644 --- a/trunk/arch/arm/include/asm/pgtable.h +++ b/trunk/arch/arm/include/asm/pgtable.h @@ -374,9 +374,6 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #define pmd_page(pmd) pfn_to_page(__phys_to_pfn(pmd_val(pmd))) -/* we don't need complex calculations here as the pmd is folded into the pgd */ -#define pmd_addr_end(addr,end) (end) - /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. diff --git a/trunk/arch/arm/include/asm/system.h b/trunk/arch/arm/include/asm/system.h index ec4327a4653d..1120f18a6b17 100644 --- a/trunk/arch/arm/include/asm/system.h +++ b/trunk/arch/arm/include/asm/system.h @@ -63,11 +63,6 @@ #include #define __exception __attribute__((section(".exception.text"))) -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -#define __exception_irq_entry __irq_entry -#else -#define __exception_irq_entry __exception -#endif struct thread_info; struct task_struct; diff --git a/trunk/arch/arm/include/asm/traps.h b/trunk/arch/arm/include/asm/traps.h index 124475afb007..491960bf4260 100644 --- a/trunk/arch/arm/include/asm/traps.h +++ b/trunk/arch/arm/include/asm/traps.h @@ -15,32 +15,13 @@ struct undef_hook { void register_undef_hook(struct undef_hook *hook); void unregister_undef_hook(struct undef_hook *hook); -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -static inline int __in_irqentry_text(unsigned long ptr) -{ - extern char __irqentry_text_start[]; - extern char __irqentry_text_end[]; - - return ptr >= (unsigned long)&__irqentry_text_start && - ptr < (unsigned long)&__irqentry_text_end; -} -#else -static inline int __in_irqentry_text(unsigned long ptr) -{ - return 0; -} -#endif - static inline int in_exception_text(unsigned long ptr) { extern char __exception_text_start[]; extern char __exception_text_end[]; - int in; - - in = ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; - return in ? : __in_irqentry_text(ptr); + return ptr >= (unsigned long)&__exception_text_start && + ptr < (unsigned long)&__exception_text_end; } extern void __init early_trap_init(void); diff --git a/trunk/arch/arm/kernel/Makefile b/trunk/arch/arm/kernel/Makefile index 679851a9f589..5b9b268f4fbb 100644 --- a/trunk/arch/arm/kernel/Makefile +++ b/trunk/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_FUNCTION_TRACER +ifdef CONFIG_DYNAMIC_FTRACE CFLAGS_REMOVE_ftrace.o = -pg endif @@ -33,7 +33,6 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o -obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/trunk/arch/arm/kernel/entry-armv.S b/trunk/arch/arm/kernel/entry-armv.S index 34bbef0d2e7e..c09e3573c5de 100644 --- a/trunk/arch/arm/kernel/entry-armv.S +++ b/trunk/arch/arm/kernel/entry-armv.S @@ -198,7 +198,6 @@ __dabt_svc: @ @ set desired IRQ state, then call main handler @ - debug_entry r1 msr cpsr_c, r9 mov r2, sp bl do_DataAbort @@ -325,7 +324,6 @@ __pabt_svc: #else bl CPU_PABORT_HANDLER #endif - debug_entry r1 msr cpsr_c, r9 @ Maybe enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler @@ -441,7 +439,6 @@ __dabt_usr: @ @ IRQs on, then call the main handler @ - debug_entry r1 enable_irq mov r2, sp adr lr, BSYM(ret_from_exception) @@ -706,7 +703,6 @@ __pabt_usr: #else bl CPU_PABORT_HANDLER #endif - debug_entry r1 enable_irq @ Enable interrupts mov r2, sp @ regs bl do_PrefetchAbort @ call abort handler diff --git a/trunk/arch/arm/kernel/entry-common.S b/trunk/arch/arm/kernel/entry-common.S index aae802ee12f8..8bfa98757cd2 100644 --- a/trunk/arch/arm/kernel/entry-common.S +++ b/trunk/arch/arm/kernel/entry-common.S @@ -141,170 +141,98 @@ ENDPROC(ret_from_fork) #endif #endif -.macro __mcount suffix - mcount_enter - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne 1f - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - ldr r1, =ftrace_graph_return - ldr r2, [r1] - cmp r0, r2 - bne ftrace_graph_caller\suffix - - ldr r1, =ftrace_graph_entry - ldr r2, [r1] - ldr r0, =ftrace_graph_entry_stub - cmp r0, r2 - bne ftrace_graph_caller\suffix -#endif - - mcount_exit - -1: mcount_get_lr r1 @ lr of instrumented func - mov r0, lr @ instrumented function - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(2f) - mov pc, r2 -2: mcount_exit -.endm - -.macro __ftrace_caller suffix - mcount_enter +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(__gnu_mcount_nc) + mov ip, lr + ldmia sp!, {lr} + mov pc, ip +ENDPROC(__gnu_mcount_nc) - mcount_get_lr r1 @ lr of instrumented func - mov r0, lr @ instrumented function +ENTRY(ftrace_caller) + stmdb sp!, {r0-r3, lr} + mov r0, lr sub r0, r0, #MCOUNT_INSN_SIZE + ldr r1, [sp, #20] - .globl ftrace_call\suffix -ftrace_call\suffix: + .global ftrace_call +ftrace_call: bl ftrace_stub - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl ftrace_graph_call\suffix -ftrace_graph_call\suffix: - mov r0, r0 -#endif - - mcount_exit -.endm - -.macro __ftrace_graph_caller - sub r0, fp, #4 @ &lr of instrumented routine (&parent) -#ifdef CONFIG_DYNAMIC_FTRACE - @ called from __ftrace_caller, saved in mcount_enter - ldr r1, [sp, #16] @ instrumented routine (func) -#else - @ called from __mcount, untouched in lr - mov r1, lr @ instrumented routine (func) -#endif - sub r1, r1, #MCOUNT_INSN_SIZE - mov r2, fp @ frame pointer - bl prepare_ftrace_return - mcount_exit -.endm + ldmia sp!, {r0-r3, ip, lr} + mov pc, ip +ENDPROC(ftrace_caller) #ifdef CONFIG_OLD_MCOUNT -/* - * mcount - */ - -.macro mcount_enter - stmdb sp!, {r0-r3, lr} -.endm - -.macro mcount_get_lr reg - ldr \reg, [fp, #-4] -.endm - -.macro mcount_exit - ldr lr, [fp, #-4] - ldmia sp!, {r0-r3, pc} -.endm - ENTRY(mcount) -#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} -#else - __mcount _old -#endif ENDPROC(mcount) -#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - __ftrace_caller _old -ENDPROC(ftrace_caller_old) -#endif - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller_old) - __ftrace_graph_caller -ENDPROC(ftrace_graph_caller_old) -#endif + stmdb sp!, {r0-r3, lr} + ldr r1, [fp, #-4] + mov r0, lr + sub r0, r0, #MCOUNT_INSN_SIZE -.purgem mcount_enter -.purgem mcount_get_lr -.purgem mcount_exit + .globl ftrace_call_old +ftrace_call_old: + bl ftrace_stub + ldr lr, [fp, #-4] @ restore lr + ldmia sp!, {r0-r3, pc} +ENDPROC(ftrace_caller_old) #endif -/* - * __gnu_mcount_nc - */ +#else -.macro mcount_enter +ENTRY(__gnu_mcount_nc) stmdb sp!, {r0-r3, lr} -.endm - -.macro mcount_get_lr reg - ldr \reg, [sp, #20] -.endm - -.macro mcount_exit + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne gnu_trace ldmia sp!, {r0-r3, ip, lr} mov pc, ip -.endm -ENTRY(__gnu_mcount_nc) -#ifdef CONFIG_DYNAMIC_FTRACE - mov ip, lr - ldmia sp!, {lr} +gnu_trace: + ldr r1, [sp, #20] @ lr of instrumented routine + mov r0, lr + sub r0, r0, #MCOUNT_INSN_SIZE + adr lr, BSYM(1f) + mov pc, r2 +1: + ldmia sp!, {r0-r3, ip, lr} mov pc, ip -#else - __mcount -#endif ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(ftrace_caller) - __ftrace_caller -ENDPROC(ftrace_caller) -#endif +#ifdef CONFIG_OLD_MCOUNT +/* + * This is under an ifdef in order to force link-time errors for people trying + * to build with !FRAME_POINTER with a GCC which doesn't use the new-style + * mcount. + */ +ENTRY(mcount) + stmdb sp!, {r0-r3, lr} + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, ftrace_stub + cmp r0, r2 + bne trace + ldr lr, [fp, #-4] @ restore lr + ldmia sp!, {r0-r3, pc} -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -ENTRY(ftrace_graph_caller) - __ftrace_graph_caller -ENDPROC(ftrace_graph_caller) +trace: + ldr r1, [fp, #-4] @ lr of instrumented routine + mov r0, lr + sub r0, r0, #MCOUNT_INSN_SIZE + mov lr, pc + mov pc, r2 + ldr lr, [fp, #-4] @ restore lr + ldmia sp!, {r0-r3, pc} +ENDPROC(mcount) #endif -.purgem mcount_enter -.purgem mcount_get_lr -.purgem mcount_exit - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER - .globl return_to_handler -return_to_handler: - stmdb sp!, {r0-r3} - mov r0, fp @ frame pointer - bl ftrace_return_to_handler - mov lr, r0 @ r0 has real ret addr - ldmia sp!, {r0-r3} - mov pc, lr -#endif +#endif /* CONFIG_DYNAMIC_FTRACE */ ENTRY(ftrace_stub) .Lftrace_stub: diff --git a/trunk/arch/arm/kernel/entry-header.S b/trunk/arch/arm/kernel/entry-header.S index ae9464900168..d93f976fb389 100644 --- a/trunk/arch/arm/kernel/entry-header.S +++ b/trunk/arch/arm/kernel/entry-header.S @@ -165,25 +165,6 @@ .endm #endif /* !CONFIG_THUMB2_KERNEL */ - @ - @ Debug exceptions are taken as prefetch or data aborts. - @ We must disable preemption during the handler so that - @ we can access the debug registers safely. - @ - .macro debug_entry, fsr -#if defined(CONFIG_HAVE_HW_BREAKPOINT) && defined(CONFIG_PREEMPT) - ldr r4, =0x40f @ mask out fsr.fs - and r5, r4, \fsr - cmp r5, #2 @ debug exception - bne 1f - get_thread_info r10 - ldr r6, [r10, #TI_PREEMPT] @ get preempt count - add r11, r6, #1 @ increment it - str r11, [r10, #TI_PREEMPT] -1: -#endif - .endm - /* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. diff --git a/trunk/arch/arm/kernel/ftrace.c b/trunk/arch/arm/kernel/ftrace.c index c0062ad1e847..971ac8c36ea7 100644 --- a/trunk/arch/arm/kernel/ftrace.c +++ b/trunk/arch/arm/kernel/ftrace.c @@ -24,7 +24,6 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif -#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -60,9 +59,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif +/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, - bool link) +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -84,22 +83,15 @@ static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; - if (link) - second |= 1 << 14; + second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; return (second << 16) | first; } #else -static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, - bool link) +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) { - unsigned long opcode = 0xea000000; long offset; - if (link) - opcode |= 1 << 24; - offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -111,15 +103,10 @@ static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, offset = (offset >> 2) & 0x00ffffff; - return opcode | offset; + return 0xeb000000 | offset; } #endif -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) -{ - return ftrace_gen_branch(pc, addr, true); -} - static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -206,83 +193,3 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } -#endif /* CONFIG_DYNAMIC_FTRACE */ - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, - unsigned long frame_pointer) -{ - unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; - unsigned long old; - int err; - - if (unlikely(atomic_read(¤t->tracing_graph_pause))) - return; - - old = *parent; - *parent = return_hooker; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer); - if (err == -EBUSY) { - *parent = old; - return; - } - - trace.func = self_addr; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - *parent = old; - } -} - -#ifdef CONFIG_DYNAMIC_FTRACE -extern unsigned long ftrace_graph_call; -extern unsigned long ftrace_graph_call_old; -extern void ftrace_graph_caller_old(void); - -static int __ftrace_modify_caller(unsigned long *callsite, - void (*func) (void), bool enable) -{ - unsigned long caller_fn = (unsigned long) func; - unsigned long pc = (unsigned long) callsite; - unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); - unsigned long nop = 0xe1a00000; /* mov r0, r0 */ - unsigned long old = enable ? nop : branch; - unsigned long new = enable ? branch : nop; - - return ftrace_modify_code(pc, old, new); -} - -static int ftrace_modify_graph_caller(bool enable) -{ - int ret; - - ret = __ftrace_modify_caller(&ftrace_graph_call, - ftrace_graph_caller, - enable); - -#ifdef CONFIG_OLD_MCOUNT - if (!ret) - ret = __ftrace_modify_caller(&ftrace_graph_call_old, - ftrace_graph_caller_old, - enable); -#endif - - return ret; -} - -int ftrace_enable_ftrace_graph_caller(void) -{ - return ftrace_modify_graph_caller(true); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - return ftrace_modify_graph_caller(false); -} -#endif /* CONFIG_DYNAMIC_FTRACE */ -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/trunk/arch/arm/kernel/hw_breakpoint.c b/trunk/arch/arm/kernel/hw_breakpoint.c index c9f3f0467570..21e3a4ab3b8c 100644 --- a/trunk/arch/arm/kernel/hw_breakpoint.c +++ b/trunk/arch/arm/kernel/hw_breakpoint.c @@ -24,7 +24,6 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt #include -#include #include #include #include @@ -45,7 +44,6 @@ static DEFINE_PER_CPU(struct perf_event *, wp_on_reg[ARM_MAX_WRP]); /* Number of BRP/WRP registers on this CPU. */ static int core_num_brps; -static int core_num_reserved_brps; static int core_num_wrps; /* Debug architecture version. */ @@ -54,6 +52,87 @@ static u8 debug_arch; /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; +/* Determine number of BRP registers available. */ +static int get_num_brps(void) +{ + u32 didr; + ARM_DBG_READ(c0, 0, didr); + return ((didr >> 24) & 0xf) + 1; +} + +/* Determine number of WRP registers available. */ +static int get_num_wrps(void) +{ + /* + * FIXME: When a watchpoint fires, the only way to work out which + * watchpoint it was is by disassembling the faulting instruction + * and working out the address of the memory access. + * + * Furthermore, we can only do this if the watchpoint was precise + * since imprecise watchpoints prevent us from calculating register + * based addresses. + * + * For the time being, we only report 1 watchpoint register so we + * always know which watchpoint fired. In the future we can either + * add a disassembler and address generation emulator, or we can + * insert a check to see if the DFAR is set on watchpoint exception + * entry [the ARM ARM states that the DFAR is UNKNOWN, but + * experience shows that it is set on some implementations]. + */ + +#if 0 + u32 didr, wrps; + ARM_DBG_READ(c0, 0, didr); + return ((didr >> 28) & 0xf) + 1; +#endif + + return 1; +} + +int hw_breakpoint_slots(int type) +{ + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } +} + +/* Determine debug architecture. */ +static u8 get_debug_arch(void) +{ + u32 didr; + + /* Do we implement the extended CPUID interface? */ + if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { + pr_warning("CPUID feature registers not supported. " + "Assuming v6 debug is present.\n"); + return ARM_DEBUG_ARCH_V6; + } + + ARM_DBG_READ(c0, 0, didr); + return (didr >> 16) & 0xf; +} + +/* Does this core support mismatch breakpoints? */ +static int core_has_mismatch_bps(void) +{ + return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1; +} + +u8 arch_get_debug_arch(void) +{ + return debug_arch; +} + #define READ_WB_REG_CASE(OP2, M, VAL) \ case ((OP2 << 4) + M): \ ARM_DBG_READ(c ## M, OP2, VAL); \ @@ -131,94 +210,6 @@ static void write_wb_reg(int n, u32 val) isb(); } -/* Determine debug architecture. */ -static u8 get_debug_arch(void) -{ - u32 didr; - - /* Do we implement the extended CPUID interface? */ - if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { - pr_warning("CPUID feature registers not supported. " - "Assuming v6 debug is present.\n"); - return ARM_DEBUG_ARCH_V6; - } - - ARM_DBG_READ(c0, 0, didr); - return (didr >> 16) & 0xf; -} - -u8 arch_get_debug_arch(void) -{ - return debug_arch; -} - -/* Determine number of BRP register available. */ -static int get_num_brp_resources(void) -{ - u32 didr; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 24) & 0xf) + 1; -} - -/* Does this core support mismatch breakpoints? */ -static int core_has_mismatch_brps(void) -{ - return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 && - get_num_brp_resources() > 1); -} - -/* Determine number of usable WRPs available. */ -static int get_num_wrps(void) -{ - /* - * FIXME: When a watchpoint fires, the only way to work out which - * watchpoint it was is by disassembling the faulting instruction - * and working out the address of the memory access. - * - * Furthermore, we can only do this if the watchpoint was precise - * since imprecise watchpoints prevent us from calculating register - * based addresses. - * - * Providing we have more than 1 breakpoint register, we only report - * a single watchpoint register for the time being. This way, we always - * know which watchpoint fired. In the future we can either add a - * disassembler and address generation emulator, or we can insert a - * check to see if the DFAR is set on watchpoint exception entry - * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows - * that it is set on some implementations]. - */ - -#if 0 - int wrps; - u32 didr; - ARM_DBG_READ(c0, 0, didr); - wrps = ((didr >> 28) & 0xf) + 1; -#endif - int wrps = 1; - - if (core_has_mismatch_brps() && wrps >= get_num_brp_resources()) - wrps = get_num_brp_resources() - 1; - - return wrps; -} - -/* We reserve one breakpoint for each watchpoint. */ -static int get_num_reserved_brps(void) -{ - if (core_has_mismatch_brps()) - return get_num_wrps(); - return 0; -} - -/* Determine number of usable BRPs available. */ -static int get_num_brps(void) -{ - int brps = get_num_brp_resources(); - if (core_has_mismatch_brps()) - brps -= get_num_reserved_brps(); - return brps; -} - /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can @@ -239,12 +230,8 @@ static int enable_monitor_mode(void) goto out; } - /* If monitor mode is already enabled, just return. */ - if (dscr & ARM_DSCR_MDBGEN) - goto out; - /* Write to the corresponding DSCR. */ - switch (get_debug_arch()) { + switch (debug_arch) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); @@ -259,30 +246,15 @@ static int enable_monitor_mode(void) /* Check that the write made it through. */ ARM_DBG_READ(c1, 0, dscr); - if (!(dscr & ARM_DSCR_MDBGEN)) + if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), + "failed to enable monitor mode.")) { ret = -EPERM; + } out: return ret; } -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - /* * Check if 8-bit byte-address select is available. * This clobbers WRP 0. @@ -296,6 +268,9 @@ static u8 get_max_wp_len(void) if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14) goto out; + if (enable_monitor_mode()) + goto out; + memset(&ctrl, 0, sizeof(ctrl)); ctrl.len = ARM_BREAKPOINT_LEN_8; ctrl_reg = encode_ctrl_reg(ctrl); @@ -314,6 +289,23 @@ u8 arch_get_max_wp_len(void) return max_watchpoint_len; } +/* + * Handler for reactivating a suspended watchpoint when the single + * step `mismatch' breakpoint is triggered. + */ +static void wp_single_step_handler(struct perf_event *bp, int unused, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + perf_event_enable(counter_arch_bp(bp)->suspended_wp); + unregister_hw_breakpoint(bp); +} + +static int bp_is_single_step(struct perf_event *bp) +{ + return bp->overflow_handler == wp_single_step_handler; +} + /* * Install a perf counter breakpoint. */ @@ -322,41 +314,30 @@ int arch_install_hw_breakpoint(struct perf_event *bp) struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct perf_event **slot, **slots; int i, max_slots, ctrl_base, val_base, ret = 0; - u32 addr, ctrl; /* Ensure that we are in monitor mode and halting mode is disabled. */ ret = enable_monitor_mode(); if (ret) goto out; - addr = info->address; - ctrl = encode_ctrl_reg(info->ctrl) | 0x1; - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); - max_slots = core_num_brps; - if (info->step_ctrl.enabled) { - /* Override the breakpoint data with the step data. */ - addr = info->trigger & ~0x3; - ctrl = encode_ctrl_reg(info->step_ctrl); + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps - 1; + + if (bp_is_single_step(bp)) { + info->ctrl.mismatch = 1; + i = max_slots; + slots[i] = bp; + goto setup; } } else { /* Watchpoint */ - if (info->step_ctrl.enabled) { - /* Install into the reserved breakpoint region. */ - ctrl_base = ARM_BASE_BCR + core_num_brps; - val_base = ARM_BASE_BVR + core_num_brps; - /* Override the watchpoint data with the step data. */ - addr = info->trigger & ~0x3; - ctrl = encode_ctrl_reg(info->step_ctrl); - } else { - ctrl_base = ARM_BASE_WCR; - val_base = ARM_BASE_WVR; - } - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + ctrl_base = ARM_BASE_WCR; + val_base = ARM_BASE_WVR; + slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -374,11 +355,12 @@ int arch_install_hw_breakpoint(struct perf_event *bp) goto out; } +setup: /* Setup the address register. */ - write_wb_reg(val_base + i, addr); + write_wb_reg(val_base + i, info->address); /* Setup the control register. */ - write_wb_reg(ctrl_base + i, ctrl); + write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1); out: return ret; @@ -393,15 +375,18 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ base = ARM_BASE_BCR; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); - max_slots = core_num_brps; + slots = __get_cpu_var(bp_on_reg); + max_slots = core_num_brps - 1; + + if (bp_is_single_step(bp)) { + i = max_slots; + slots[i] = NULL; + goto reset; + } } else { /* Watchpoint */ - if (info->step_ctrl.enabled) - base = ARM_BASE_BCR + core_num_brps; - else - base = ARM_BASE_WCR; - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + base = ARM_BASE_WCR; + slots = __get_cpu_var(wp_on_reg); max_slots = core_num_wrps; } @@ -418,6 +403,7 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) return; +reset: /* Reset the control register. */ write_wb_reg(base + i, 0); } @@ -551,23 +537,12 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } - /* - * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes. - * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported - * by the hardware and must be aligned to the appropriate number of - * bytes. - */ - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE && - info->ctrl.len != ARM_BREAKPOINT_LEN_2 && - info->ctrl.len != ARM_BREAKPOINT_LEN_4) - return -EINVAL; - /* Address */ info->address = bp->attr.bp_addr; /* Privilege */ info->ctrl.privilege = ARM_BREAKPOINT_USER; - if (arch_check_bp_in_kernelspace(bp)) + if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp)) info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; /* Enabled? */ @@ -586,7 +561,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret = 0; - u32 offset, alignment_mask = 0x3; + u32 bytelen, max_len, offset, alignment_mask = 0x3; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp); @@ -596,85 +571,84 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) /* Check address alignment. */ if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; - offset = info->address & alignment_mask; - switch (offset) { - case 0: - /* Aligned */ - break; - case 1: - /* Allow single byte watchpoint. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) - break; - case 2: - /* Allow halfword watchpoints and breakpoints. */ - if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) - break; - default: - ret = -EINVAL; - goto out; - } + if (info->address & alignment_mask) { + /* + * Try to fix the alignment. This may result in a length + * that is too large, so we must check for that. + */ + bytelen = get_hbp_len(info->ctrl.len); + max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 : + max_watchpoint_len; + + if (max_len >= 8) + offset = info->address & 0x7; + else + offset = info->address & 0x3; + + if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) { + ret = -EFBIG; + goto out; + } - info->address &= ~alignment_mask; - info->ctrl.len <<= offset; + info->ctrl.len <<= offset; + info->address &= ~offset; + + pr_debug("breakpoint alignment fixup: length = 0x%x, " + "address = 0x%x\n", info->ctrl.len, info->address); + } /* * Currently we rely on an overflow handler to take * care of single-stepping the breakpoint when it fires. * In the case of userspace breakpoints on a core with V7 debug, - * we can use the mismatch feature as a poor-man's hardware - * single-step, but this only works for per-task breakpoints. + * we can use the mismatch feature as a poor-man's hardware single-step. */ if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_brps() - || !bp->hw.bp_target), + (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), "overflow handler required but none found")) { ret = -EINVAL; + goto out; } out: return ret; } -/* - * Enable/disable single-stepping over the breakpoint bp at address addr. - */ -static void enable_single_step(struct perf_event *bp, u32 addr) +static void update_mismatch_flag(int idx, int flag) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); + struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]); + struct arch_hw_breakpoint *info; - arch_uninstall_hw_breakpoint(bp); - info->step_ctrl.mismatch = 1; - info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; - info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; - info->step_ctrl.privilege = info->ctrl.privilege; - info->step_ctrl.enabled = 1; - info->trigger = addr; - arch_install_hw_breakpoint(bp); -} + if (bp == NULL) + return; -static void disable_single_step(struct perf_event *bp) -{ - arch_uninstall_hw_breakpoint(bp); - counter_arch_bp(bp)->step_ctrl.enabled = 0; - arch_install_hw_breakpoint(bp); + info = counter_arch_bp(bp); + + /* Update the mismatch field to enter/exit `single-step' mode */ + if (!bp->overflow_handler && info->ctrl.mismatch != flag) { + info->ctrl.mismatch = flag; + write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1); + } } static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - struct perf_event *wp, **slots; + struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg); struct arch_hw_breakpoint *info; - - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + struct perf_event_attr attr; /* Without a disassembler, we can only handle 1 watchpoint. */ BUG_ON(core_num_wrps > 1); + hw_breakpoint_init(&attr); + attr.bp_addr = regs->ARM_pc & ~0x3; + attr.bp_len = HW_BREAKPOINT_LEN_4; + attr.bp_type = HW_BREAKPOINT_X; + for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); - wp = slots[i]; - - if (wp == NULL) { + if (slots[i] == NULL) { rcu_read_unlock(); continue; } @@ -684,51 +658,24 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) * single watchpoint, we can set the trigger to the lowest * possible faulting address. */ - info = counter_arch_bp(wp); - info->trigger = wp->attr.bp_addr; + info = counter_arch_bp(slots[i]); + info->trigger = slots[i]->attr.bp_addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); - perf_bp_event(wp, regs); + perf_bp_event(slots[i], regs); /* * If no overflow handler is present, insert a temporary * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!wp->overflow_handler) - enable_single_step(wp, instruction_pointer(regs)); - - rcu_read_unlock(); - } -} - -static void watchpoint_single_step_handler(unsigned long pc) -{ - int i; - struct perf_event *wp, **slots; - struct arch_hw_breakpoint *info; - - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); - - for (i = 0; i < core_num_reserved_brps; ++i) { - rcu_read_lock(); - - wp = slots[i]; - - if (wp == NULL) - goto unlock; - - info = counter_arch_bp(wp); - if (!info->step_ctrl.enabled) - goto unlock; - - /* - * Restore the original watchpoint if we've completed the - * single-step. - */ - if (info->trigger != pc) - disable_single_step(wp); + if (!slots[i]->overflow_handler) { + bp = register_user_hw_breakpoint(&attr, + wp_single_step_handler, + current); + counter_arch_bp(bp)->suspended_wp = slots[i]; + perf_event_disable(slots[i]); + } -unlock: rcu_read_unlock(); } } @@ -736,69 +683,62 @@ static void watchpoint_single_step_handler(unsigned long pc) static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; + int mismatch; u32 ctrl_reg, val, addr; - struct perf_event *bp, **slots; + struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); - /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; - /* Check the currently installed breakpoints first. */ for (i = 0; i < core_num_brps; ++i) { rcu_read_lock(); bp = slots[i]; - if (bp == NULL) - goto unlock; + if (bp == NULL) { + rcu_read_unlock(); + continue; + } - info = counter_arch_bp(bp); + mismatch = 0; /* Check if the breakpoint value matches. */ val = read_wb_reg(ARM_BASE_BVR + i); if (val != (addr & ~0x3)) - goto mismatch; + goto unlock; /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(ARM_BASE_BCR + i); decode_ctrl_reg(ctrl_reg, &ctrl); if ((1 << (addr & 0x3)) & ctrl.len) { + mismatch = 1; + info = counter_arch_bp(bp); info->trigger = addr; + } + +unlock: + if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); - if (!bp->overflow_handler) - enable_single_step(bp, addr); - goto unlock; } -mismatch: - /* If we're stepping a breakpoint, it can now be restored. */ - if (info->step_ctrl.enabled) - disable_single_step(bp); -unlock: + update_mismatch_flag(i, mismatch); rcu_read_unlock(); } - - /* Handle any pending watchpoint single-step breakpoints. */ - watchpoint_single_step_handler(addr); } /* * Called from either the Data Abort Handler [watchpoint] or the - * Prefetch Abort Handler [breakpoint] with preemption disabled. + * Prefetch Abort Handler [breakpoint]. */ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int ret = 0; + int ret = 1; /* Unhandled fault. */ u32 dscr; - /* We must be called with preemption disabled. */ - WARN_ON(preemptible()); - /* We only handle watchpoints and hardware breakpoints. */ ARM_DBG_READ(c1, 0, dscr); @@ -813,47 +753,25 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, watchpoint_handler(addr, regs); break; default: - ret = 1; /* Unhandled fault. */ + goto out; } - /* - * Re-enable preemption after it was disabled in the - * low-level exception handling code. - */ - preempt_enable(); - + ret = 0; +out: return ret; } /* * One-time initialisation. */ -static void reset_ctrl_regs(void *unused) +static void __init reset_ctrl_regs(void *unused) { int i; - /* - * v7 debug contains save and restore registers so that debug state - * can be maintained across low-power modes without leaving - * the debug logic powered up. It is IMPLEMENTATION DEFINED whether - * we can write to the debug registers out of reset, so we must - * unlock the OS Lock Access Register to avoid taking undefined - * instruction exceptions later on. - */ - if (debug_arch >= ARM_DEBUG_ARCH_V7_ECP14) { - /* - * Unconditionally clear the lock by writing a value - * other than 0xC5ACCE55 to the access register. - */ - asm volatile("mcr p14, 0, %0, c1, c0, 4" : : "r" (0)); - isb(); - } - if (enable_monitor_mode()) return; - /* We must also reset any reserved registers. */ - for (i = 0; i < core_num_brps + core_num_reserved_brps; ++i) { + for (i = 0; i < core_num_brps; ++i) { write_wb_reg(ARM_BASE_BCR + i, 0UL); write_wb_reg(ARM_BASE_BVR + i, 0UL); } @@ -864,57 +782,45 @@ static void reset_ctrl_regs(void *unused) } } -static int __cpuinit dbg_reset_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - if (action == CPU_ONLINE) - smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); - return NOTIFY_OK; -} - -static struct notifier_block __cpuinitdata dbg_reset_nb = { - .notifier_call = dbg_reset_notify, -}; - static int __init arch_hw_breakpoint_init(void) { + int ret = 0; u32 dscr; debug_arch = get_debug_arch(); if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) { pr_info("debug architecture 0x%x unsupported.\n", debug_arch); - return 0; + ret = -ENODEV; + goto out; } /* Determine how many BRPs/WRPs are available. */ core_num_brps = get_num_brps(); - core_num_reserved_brps = get_num_reserved_brps(); core_num_wrps = get_num_wrps(); pr_info("found %d breakpoint and %d watchpoint registers.\n", - core_num_brps + core_num_reserved_brps, core_num_wrps); + core_num_brps, core_num_wrps); - if (core_num_reserved_brps) - pr_info("%d breakpoint(s) reserved for watchpoint " - "single-step.\n", core_num_reserved_brps); + if (core_has_mismatch_bps()) + pr_info("1 breakpoint reserved for watchpoint single-step.\n"); ARM_DBG_READ(c1, 0, dscr); if (dscr & ARM_DSCR_HDBGEN) { pr_warning("halting debug mode enabled. Assuming maximum " "watchpoint size of 4 bytes."); } else { + /* Work out the maximum supported watchpoint length. */ + max_watchpoint_len = get_max_wp_len(); + pr_info("maximum watchpoint size is %u bytes.\n", + max_watchpoint_len); + /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ smp_call_function(reset_ctrl_regs, NULL, 1); reset_ctrl_regs(NULL); - - /* Work out the maximum supported watchpoint length. */ - max_watchpoint_len = get_max_wp_len(); - pr_info("maximum watchpoint size is %u bytes.\n", - max_watchpoint_len); } /* Register debug fault handler. */ @@ -923,9 +829,8 @@ static int __init arch_hw_breakpoint_init(void) hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, "breakpoint debug exception"); - /* Register hotplug notifier. */ - register_cpu_notifier(&dbg_reset_nb); - return 0; +out: + return ret; } arch_initcall(arch_hw_breakpoint_init); diff --git a/trunk/arch/arm/kernel/irq.c b/trunk/arch/arm/kernel/irq.c index 6d616333340f..36ad3be4692a 100644 --- a/trunk/arch/arm/kernel/irq.c +++ b/trunk/arch/arm/kernel/irq.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -106,8 +105,7 @@ int show_interrupts(struct seq_file *p, void *v) * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception_irq_entry -asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/trunk/arch/arm/kernel/perf_event.c b/trunk/arch/arm/kernel/perf_event.c index 624e2a5de2b3..07a50357492a 100644 --- a/trunk/arch/arm/kernel/perf_event.c +++ b/trunk/arch/arm/kernel/perf_event.c @@ -4,7 +4,9 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * Copyright (C) 2010 ARM Ltd., Will Deacon + * + * ARMv7 support: Jean Pihet + * 2010 (c) MontaVista Software, LLC. * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -32,7 +34,7 @@ static struct platform_device *pmu_device; * Hardware lock to serialize accesses to PMU registers. Needed for the * read/modify/write sequences. */ -static DEFINE_RAW_SPINLOCK(pmu_lock); +DEFINE_SPINLOCK(pmu_lock); /* * ARMv6 supports a maximum of 3 events, starting from index 1. If we add @@ -65,25 +67,31 @@ struct cpu_hw_events { */ unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; }; -static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); +DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); + +/* PMU names. */ +static const char *arm_pmu_names[] = { + [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", + [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", + [ARM_PERF_PMU_ID_V6] = "v6", + [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", + [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", + [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", +}; struct arm_pmu { enum arm_perf_pmu_ids id; - const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); + int (*event_map)(int evt); + u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); - const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - const unsigned (*event_map)[PERF_COUNT_HW_MAX]; - u32 raw_event_mask; int num_events; u64 max_period; }; @@ -128,6 +136,10 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF +static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + static int armpmu_map_cache_event(u64 config) { @@ -145,7 +157,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; + ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -153,19 +165,6 @@ armpmu_map_cache_event(u64 config) return ret; } -static int -armpmu_map_event(u64 config) -{ - int mapping = (*armpmu->event_map)[config]; - return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; -} - -static int -armpmu_map_raw_event(u64 config) -{ - return (int)(config & armpmu->raw_event_mask); -} - static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, @@ -459,11 +458,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu_map_event(event->attr.config); + mapping = armpmu->event_map(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu_map_raw_event(event->attr.config); + mapping = armpmu->raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -604,10 +603,2366 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* Include the PMU-specific implementations. */ -#include "perf_event_xscale.c" -#include "perf_event_v6.c" -#include "perf_event_v7.c" +/* + * ARMv6 Performance counter handling code. + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 1, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +enum armv6mpcore_perf_types { + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u32 +armv6pmu_read_counter(int counter) +{ + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void +armv6pmu_write_counter(int counter, + u32 value) +{ + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +void +armv6pmu_enable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t +armv6pmu_handle_irq(int irq_num, + void *dev) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void +armv6pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +void +armv6pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline int +armv6pmu_event_map(int config) +{ + int mapping = armv6_perf_map[config]; + if (HW_OP_UNSUPPORTED == mapping) + mapping = -EOPNOTSUPP; + return mapping; +} + +static inline int +armv6mpcore_pmu_event_map(int config) +{ + int mapping = armv6mpcore_perf_map[config]; + if (HW_OP_UNSUPPORTED == mapping) + mapping = -EOPNOTSUPP; + return mapping; +} + +static u64 +armv6pmu_raw_event(u64 config) +{ + return config & 0xff; +} + +static int +armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { + return ARMV6_COUNTER1; + } + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { + return ARMV6_COUNTER0; + } + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void +armv6pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, evt, flags; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, + int idx) +{ + unsigned long val, mask, flags, evt = 0; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Unlike UP ARMv6, we don't have a way of stopping the counters. We + * simply disable the interrupt reporting. + */ + spin_lock_irqsave(&pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static const struct arm_pmu armv6pmu = { + .id = ARM_PERF_PMU_ID_V6, + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6pmu_disable_event, + .event_map = armv6pmu_event_map, + .raw_event = armv6pmu_raw_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +/* + * ARMv6mpcore is almost identical to single core ARMv6 with the exception + * that some of the events have different enumerations and that there is no + * *hack* to stop the programmable counters. To stop the counters we simply + * disable the interrupt reporting and update the event. When unthrottling we + * reset the period and enable the interrupt reporting. + */ +static const struct arm_pmu armv6mpcore_pmu = { + .id = ARM_PERF_PMU_ID_V6MP, + .handle_irq = armv6pmu_handle_irq, + .enable = armv6pmu_enable_event, + .disable = armv6mpcore_pmu_disable_event, + .event_map = armv6mpcore_pmu_event_map, + .raw_event = armv6pmu_raw_event, + .read_counter = armv6pmu_read_counter, + .write_counter = armv6pmu_write_counter, + .get_event_idx = armv6pmu_get_event_idx, + .start = armv6pmu_start, + .stop = armv6pmu_stop, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +/* Common ARMv7 event types */ +enum armv7_perf_types { + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV7_PERFCTR_IFETCH_MISS = 0x01, + ARMV7_PERFCTR_ITLB_MISS = 0x02, + ARMV7_PERFCTR_DCACHE_REFILL = 0x03, + ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, + ARMV7_PERFCTR_DTLB_REFILL = 0x05, + ARMV7_PERFCTR_DREAD = 0x06, + ARMV7_PERFCTR_DWRITE = 0x07, + + ARMV7_PERFCTR_EXC_TAKEN = 0x09, + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV7_PERFCTR_CID_WRITE = 0x0B, + /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ + ARMV7_PERFCTR_PC_WRITE = 0x0C, + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, + + ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, + + ARMV7_PERFCTR_CPU_CYCLES = 0xFF +}; + +/* ARMv7 Cortex-A8 specific event types */ +enum armv7_a8_perf_types { + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, + + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, + + ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, + ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, + ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, + ARMV7_PERFCTR_L2_ACCESS = 0x43, + ARMV7_PERFCTR_L2_CACH_MISS = 0x44, + ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, + ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, + ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, + ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, + ARMV7_PERFCTR_L1_DATA_MISS = 0x49, + ARMV7_PERFCTR_L1_INST_MISS = 0x4A, + ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, + ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, + ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, + ARMV7_PERFCTR_L2_NEON = 0x4E, + ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, + ARMV7_PERFCTR_L1_INST = 0x50, + ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, + ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, + ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, + ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, + ARMV7_PERFCTR_OP_EXECUTED = 0x55, + ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, + ARMV7_PERFCTR_CYCLES_INST = 0x57, + ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, + ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, + ARMV7_PERFCTR_NEON_CYCLES = 0x5A, + + ARMV7_PERFCTR_PMU0_EVENTS = 0x70, + ARMV7_PERFCTR_PMU1_EVENTS = 0x71, + ARMV7_PERFCTR_PMU_EVENTS = 0x72, +}; + +/* ARMv7 Cortex-A9 specific event types */ +enum armv7_a9_perf_types { + ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, + ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, + ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, + + ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, + ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, + + ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, + ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, + ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, + ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, + ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, + ARMV7_PERFCTR_DATA_EVICTION = 0x65, + ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, + ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, + + ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, + + ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, + ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, + ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, + ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, + ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, + + ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, + ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, + ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, + ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, + ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, + ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, + ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, + + ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, + ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, + + ARMV7_PERFCTR_ISB_INST = 0x90, + ARMV7_PERFCTR_DSB_INST = 0x91, + ARMV7_PERFCTR_DMB_INST = 0x92, + ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, + + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, + ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, + ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, + ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, + ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, + ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 +}; + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = + ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * Only ITLB misses and DTLB refills are supported. + * If users want the DTLB refills misses a raw counter + * must be used. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, + [C(RESULT_MISS)] + = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events counters + */ +enum armv7_counters { + ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ + ARMV7_COUNTER0 = 2, /* First event counter */ +}; + +/* + * The cycle counter is ARMV7_CYCLE_COUNTER. + * The first event counter is ARMV7_COUNTER0. + * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). + */ +#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * Available counters + */ +#define ARMV7_CNT0 0 /* First event counter */ +#define ARMV7_CCNT 31 /* Cycle counter */ + +/* Perf Event to low level counters mapping */ +#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) + +/* + * CNTENS: counters enable reg + */ +#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) + +/* + * CNTENC: counters disable reg + */ +#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) + +/* + * INTENS: counters overflow interrupt enable reg + */ +#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENS_C (1 << ARMV7_CCNT) + +/* + * INTENC: counters overflow interrupt disable reg + */ +#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_INTENC_C (1 << ARMV7_CCNT) + +/* + * EVTSEL: Event selection reg + */ +#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ + +/* + * SELECT: Counter selection reg + */ +#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) +#define ARMV7_FLAG_C (1 << ARMV7_CCNT) +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +static inline unsigned long armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(unsigned long val) +{ + val &= ARMV7_PMNC_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, + enum armv7_counters counter) +{ + int ret = 0; + + if (counter == ARMV7_CYCLE_COUNTER) + ret = pmnc & ARMV7_FLAG_C; + else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) + ret = pmnc & ARMV7_FLAG_P(counter); + else + pr_err("CPU%u checking wrong counter %d overflow status\n", + smp_processor_id(), counter); + + return ret; +} + +static inline int armv7_pmnc_select_counter(unsigned int idx) +{ + u32 val; + + if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { + pr_err("CPU%u selecting wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); + + return idx; +} + +static inline u32 armv7pmu_read_counter(int idx) +{ + unsigned long value = 0; + + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" + : "=r" (value)); + } else + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + + return value; +} + +static inline void armv7pmu_write_counter(int idx, u32 value) +{ + if (idx == ARMV7_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { + if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" + : : "r" (value)); + } else + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); +} + +static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) +{ + if (armv7_pmnc_select_counter(idx) == idx) { + val &= ARMV7_EVTSEL_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static inline u32 armv7_pmnc_enable_counter(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENS_C; + else + val = ARMV7_CNTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_counter(unsigned int idx) +{ + u32 val; + + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_CNTENC_C; + else + val = ARMV7_CNTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_enable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u enabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENS_C; + else + val = ARMV7_INTENS_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_disable_intens(unsigned int idx) +{ + u32 val; + + if ((idx != ARMV7_CYCLE_COUNTER) && + ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { + pr_err("CPU%u disabling wrong PMNC counter" + " interrupt enable %d\n", smp_processor_id(), idx); + return -1; + } + + if (idx == ARMV7_CYCLE_COUNTER) + val = ARMV7_INTENC_C; + else + val = ARMV7_INTENC_P(idx); + + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); + + return idx; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(void) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + cnt-ARMV7_EVENT_CNT_TO_CNTx, val); + } +} +#endif + +void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We don't need to set the event if it's a cycle count + */ + if (idx != ARMV7_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags; + + /* + * Disable counter and interrupt + */ + spin_lock_irqsave(&pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); + + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void armv7pmu_stop(void) +{ + unsigned long flags; + + spin_lock_irqsave(&pmu_lock, flags); + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline int armv7_a8_pmu_event_map(int config) +{ + int mapping = armv7_a8_perf_map[config]; + if (HW_OP_UNSUPPORTED == mapping) + mapping = -EOPNOTSUPP; + return mapping; +} + +static inline int armv7_a9_pmu_event_map(int config) +{ + int mapping = armv7_a9_perf_map[config]; + if (HW_OP_UNSUPPORTED == mapping) + mapping = -EOPNOTSUPP; + return mapping; +} + +static u64 armv7pmu_raw_event(u64 config) +{ + return config & 0xff; +} + +static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx; + + /* Always place a cycle counter into the cycle counter. */ + if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static struct arm_pmu armv7pmu = { + .handle_irq = armv7pmu_handle_irq, + .enable = armv7pmu_enable_event, + .disable = armv7pmu_disable_event, + .raw_event = armv7pmu_raw_event, + .read_counter = armv7pmu_read_counter, + .write_counter = armv7pmu_write_counter, + .get_event_idx = armv7pmu_get_event_idx, + .start = armv7pmu_start, + .stop = armv7pmu_stop, + .max_period = (1LLU << 32) - 1, +}; + +static u32 __init armv7_reset_read_pmnc(void) +{ + u32 nb_cnt; + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Based on xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 1, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +static inline int +xscalepmu_event_map(int config) +{ + int mapping = xscale_perf_map[config]; + if (HW_OP_UNSUPPORTED == mapping) + mapping = -EOPNOTSUPP; + return mapping; +} + +static u64 +xscalepmu_raw_event(u64 config) +{ + return config & 0xff; +} + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long val, mask, evt, flags; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + if (XSCALE_PERFCTR_CCNT == event->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { + return XSCALE_COUNTER1; + } + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { + return XSCALE_COUNTER0; + } + + return -EAGAIN; + } +} + +static void +xscale1pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale1pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale1pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale1pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale1pmu = { + .id = ARM_PERF_PMU_ID_XSCALE1, + .handle_irq = xscale1pmu_handle_irq, + .enable = xscale1pmu_enable_event, + .disable = xscale1pmu_disable_event, + .event_map = xscalepmu_event_map, + .raw_event = xscalepmu_raw_event, + .read_counter = xscale1pmu_read_counter, + .write_counter = xscale1pmu_write_counter, + .get_event_idx = xscale1pmu_get_event_idx, + .start = xscale1pmu_start, + .stop = xscale1pmu_stop, + .num_events = 3, + .max_period = (1LLU << 32) - 1, +}; + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + perf_sample_data_init(&data, 0); + + cpuc = &__get_cpu_var(cpu_hw_events); + for (idx = 0; idx <= armpmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event, hwc, idx); + data.period = event->hw.last_period; + if (!armpmu_event_set_period(event, hwc, idx)) + continue; + + if (perf_event_overflow(event, 0, &data, regs)) + armpmu->disable(hwc, idx); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void +xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + unsigned long flags, ien, evtsel; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + spin_lock_irqsave(&pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static int +xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, + struct hw_perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void +xscale2pmu_start(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static void +xscale2pmu_stop(void) +{ + unsigned long flags, val; + + spin_lock_irqsave(&pmu_lock, flags); + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + spin_unlock_irqrestore(&pmu_lock, flags); +} + +static inline u32 +xscale2pmu_read_counter(int counter) +{ + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void +xscale2pmu_write_counter(int counter, u32 val) +{ + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static const struct arm_pmu xscale2pmu = { + .id = ARM_PERF_PMU_ID_XSCALE2, + .handle_irq = xscale2pmu_handle_irq, + .enable = xscale2pmu_enable_event, + .disable = xscale2pmu_disable_event, + .event_map = xscalepmu_event_map, + .raw_event = xscalepmu_raw_event, + .read_counter = xscale2pmu_read_counter, + .write_counter = xscale2pmu_write_counter, + .get_event_idx = xscale2pmu_get_event_idx, + .start = xscale2pmu_start, + .stop = xscale2pmu_stop, + .num_events = 5, + .max_period = (1LLU << 32) - 1, +}; static int __init init_hw_perf_events(void) @@ -622,16 +2977,37 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - armpmu = armv6pmu_init(); + armpmu = &armv6pmu; + memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, + sizeof(armv6_perf_cache_map)); break; case 0xB020: /* ARM11mpcore */ - armpmu = armv6mpcore_pmu_init(); + armpmu = &armv6mpcore_pmu; + memcpy(armpmu_perf_cache_map, + armv6mpcore_perf_cache_map, + sizeof(armv6mpcore_perf_cache_map)); break; case 0xC080: /* Cortex-A8 */ - armpmu = armv7_a8_pmu_init(); + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, + sizeof(armv7_a8_perf_cache_map)); + armv7pmu.event_map = armv7_a8_pmu_event_map; + armpmu = &armv7pmu; + + /* Reset PMNC and read the nb of CNTx counters + supported */ + armv7pmu.num_events = armv7_reset_read_pmnc(); break; case 0xC090: /* Cortex-A9 */ - armpmu = armv7_a9_pmu_init(); + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, + sizeof(armv7_a9_perf_cache_map)); + armv7pmu.event_map = armv7_a9_pmu_event_map; + armpmu = &armv7pmu; + + /* Reset PMNC and read the nb of CNTx counters + supported */ + armv7pmu.num_events = armv7_reset_read_pmnc(); break; } /* Intel CPUs [xscale]. */ @@ -639,17 +3015,21 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - armpmu = xscale1pmu_init(); + armpmu = &xscale1pmu; + memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, + sizeof(xscale_perf_cache_map)); break; case 2: - armpmu = xscale2pmu_init(); + armpmu = &xscale2pmu; + memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, + sizeof(xscale_perf_cache_map)); break; } } if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - armpmu->name, armpmu->num_events); + arm_pmu_names[armpmu->id], armpmu->num_events); } else { pr_info("no hardware support available\n"); } @@ -673,17 +3053,17 @@ arch_initcall(init_hw_perf_events); * This code has been adapted from the ARM OProfile support. */ struct frame_tail { - struct frame_tail __user *fp; - unsigned long sp; - unsigned long lr; + struct frame_tail *fp; + unsigned long sp; + unsigned long lr; } __attribute__((packed)); /* * Get the return address for a single stackframe and return a pointer to the * next frame tail. */ -static struct frame_tail __user * -user_backtrace(struct frame_tail __user *tail, +static struct frame_tail * +user_backtrace(struct frame_tail *tail, struct perf_callchain_entry *entry) { struct frame_tail buftail; @@ -709,10 +3089,10 @@ user_backtrace(struct frame_tail __user *tail, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail __user *tail; + struct frame_tail *tail; - tail = (struct frame_tail __user *)regs->ARM_fp - 1; + tail = (struct frame_tail *)regs->ARM_fp - 1; while (tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); diff --git a/trunk/arch/arm/kernel/perf_event_v6.c b/trunk/arch/arm/kernel/perf_event_v6.c deleted file mode 100644 index c058bfc8532b..000000000000 --- a/trunk/arch/arm/kernel/perf_event_v6.c +++ /dev/null @@ -1,672 +0,0 @@ -/* - * ARMv6 Performance counter handling code. - * - * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -#ifdef CONFIG_CPU_V6 -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -static void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - raw_spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6pmu_stop(void) -{ - unsigned long flags, val; - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) - return ARMV6_COUNTER1; - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) - return ARMV6_COUNTER0; - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - raw_spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, flags, evt = 0; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Unlike UP ARMv6, we don't have a way of stopping the counters. We - * simply disable the interrupt reporting. - */ - raw_spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static const struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, - .name = "v6", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6_perf_cache_map, - .event_map = &armv6_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -static const struct arm_pmu *__init armv6pmu_init(void) -{ - return &armv6pmu; -} - -/* - * ARMv6mpcore is almost identical to single core ARMv6 with the exception - * that some of the events have different enumerations and that there is no - * *hack* to stop the programmable counters. To stop the counters we simply - * disable the interrupt reporting and update the event. When unthrottling we - * reset the period and enable the interrupt reporting. - */ -static const struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, - .name = "v6mpcore", - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .cache_map = &armv6mpcore_perf_cache_map, - .event_map = &armv6mpcore_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -static const struct arm_pmu *__init armv6mpcore_pmu_init(void) -{ - return &armv6mpcore_pmu; -} -#else -static const struct arm_pmu *__init armv6pmu_init(void) -{ - return NULL; -} - -static const struct arm_pmu *__init armv6mpcore_pmu_init(void) -{ - return NULL; -} -#endif /* CONFIG_CPU_V6 */ diff --git a/trunk/arch/arm/kernel/perf_event_v7.c b/trunk/arch/arm/kernel/perf_event_v7.c deleted file mode 100644 index 2e1402556fa0..000000000000 --- a/trunk/arch/arm/kernel/perf_event_v7.c +++ /dev/null @@ -1,906 +0,0 @@ -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * ARMv7 support: Jean Pihet - * 2010 (c) MontaVista Software, LLC. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -#ifdef CONFIG_CPU_V7 -/* Common ARMv7 event types */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_IFETCH_MISS = 0x01, - ARMV7_PERFCTR_ITLB_MISS = 0x02, - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_DREAD = 0x06, - ARMV7_PERFCTR_DWRITE = 0x07, - - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; - -/* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, - ARMV7_PERFCTR_L2_ACCESS = 0x43, - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, - ARMV7_PERFCTR_L2_NEON = 0x4E, - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, - ARMV7_PERFCTR_L1_INST = 0x50, - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, - ARMV7_PERFCTR_OP_EXECUTED = 0x55, - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, - ARMV7_PERFCTR_CYCLES_INST = 0x57, - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, - - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, - ARMV7_PERFCTR_PMU_EVENTS = 0x72, -}; - -/* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, - - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, - - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, - ARMV7_PERFCTR_DATA_EVICTION = 0x65, - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, - - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, - - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, - - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, - - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, - - ARMV7_PERFCTR_ISB_INST = 0x90, - ARMV7_PERFCTR_DSB_INST = 0x91, - ARMV7_PERFCTR_DMB_INST = 0x92, - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, - - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 -}; - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Perf Events counters - */ -enum armv7_counters { - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ - ARMV7_COUNTER0 = 2, /* First event counter */ -}; - -/* - * The cycle counter is ARMV7_CYCLE_COUNTER. - * The first event counter is ARMV7_COUNTER0. - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). - */ -#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * Available counters - */ -#define ARMV7_CNT0 0 /* First event counter */ -#define ARMV7_CCNT 31 /* Cycle counter */ - -/* Perf Event to low level counters mapping */ -#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) - -/* - * CNTENS: counters enable reg - */ -#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) - -/* - * CNTENC: counters disable reg - */ -#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) - -/* - * INTENS: counters overflow interrupt enable reg - */ -#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENS_C (1 << ARMV7_CCNT) - -/* - * INTENC: counters overflow interrupt disable reg - */ -#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENC_C (1 << ARMV7_CCNT) - -/* - * EVTSEL: Event selection reg - */ -#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ - -/* - * SELECT: Counter selection reg - */ -#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_FLAG_C (1 << ARMV7_CCNT) -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -static inline unsigned long armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; -} - -static inline void armv7_pmnc_write(unsigned long val) -{ - val &= ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} - -static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} - -static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, - enum armv7_counters counter) -{ - int ret = 0; - - if (counter == ARMV7_CYCLE_COUNTER) - ret = pmnc & ARMV7_FLAG_C; - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) - ret = pmnc & ARMV7_FLAG_P(counter); - else - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), counter); - - return ret; -} - -static inline int armv7_pmnc_select_counter(unsigned int idx) -{ - u32 val; - - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { - pr_err("CPU%u selecting wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); - - return idx; -} - -static inline u32 armv7pmu_read_counter(int idx) -{ - unsigned long value = 0; - - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r" (value)); - } else - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - - return value; -} - -static inline void armv7pmu_write_counter(int idx, u32 value) -{ - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r" (value)); - } else - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); -} - -static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) -{ - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } -} - -static inline u32 armv7_pmnc_enable_counter(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENS_C; - else - val = ARMV7_CNTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_counter(unsigned int idx) -{ - u32 val; - - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENC_C; - else - val = ARMV7_CNTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_enable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENS_C; - else - val = ARMV7_INTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENC_C; - else - val = ARMV7_INTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; -} - -#ifdef DEBUG -static void armv7_pmnc_dump_regs(void) -{ - u32 val; - unsigned int cnt; - - printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); - - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - } -} -#endif - -static void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - raw_spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't need to set the event if it's a cycle count - */ - if (idx != ARMV7_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); - - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Disable counter and interrupt - */ - raw_spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); - - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&pmu_lock, flags); - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_stop(void) -{ - unsigned long flags; - - raw_spin_lock_irqsave(&pmu_lock, flags); - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx; - - /* Always place a cycle counter into the cycle counter. */ - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .raw_event_mask = 0xFF, - .max_period = (1LLU << 32) - 1, -}; - -static u32 __init armv7_reset_read_pmnc(void) -{ - u32 nb_cnt; - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); - - /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; -} - -static const struct arm_pmu *__init armv7_a8_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.name = "ARMv7 Cortex-A8"; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} - -static const struct arm_pmu *__init armv7_a9_pmu_init(void) -{ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.name = "ARMv7 Cortex-A9"; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armv7pmu.num_events = armv7_reset_read_pmnc(); - return &armv7pmu; -} -#else -static const struct arm_pmu *__init armv7_a8_pmu_init(void) -{ - return NULL; -} - -static const struct arm_pmu *__init armv7_a9_pmu_init(void) -{ - return NULL; -} -#endif /* CONFIG_CPU_V7 */ diff --git a/trunk/arch/arm/kernel/perf_event_xscale.c b/trunk/arch/arm/kernel/perf_event_xscale.c deleted file mode 100644 index 28cd3b025bc3..000000000000 --- a/trunk/arch/arm/kernel/perf_event_xscale.c +++ /dev/null @@ -1,807 +0,0 @@ -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Copyright (C) 2010, ARM Ltd., Will Deacon - * - * Based on the previous xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -#ifdef CONFIG_CPU_XSCALE -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 1, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - if (XSCALE_PERFCTR_CCNT == event->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) - return XSCALE_COUNTER1; - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) - return XSCALE_COUNTER0; - - return -EAGAIN; - } -} - -static void -xscale1pmu_start(void) -{ - unsigned long flags, val; - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_stop(void) -{ - unsigned long flags, val; - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale1pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale1pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, - .name = "xscale1", - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -static const struct arm_pmu *__init xscale1pmu_init(void) -{ - return &xscale1pmu; -} - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - raw_spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - raw_spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void -xscale2pmu_start(void) -{ - unsigned long flags, val; - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_stop(void) -{ - unsigned long flags, val; - - raw_spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - raw_spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale2pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale2pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, - .name = "xscale2", - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .cache_map = &xscale_perf_cache_map, - .event_map = &xscale_perf_map, - .raw_event_mask = 0xFF, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; - -static const struct arm_pmu *__init xscale2pmu_init(void) -{ - return &xscale2pmu; -} -#else -static const struct arm_pmu *__init xscale1pmu_init(void) -{ - return NULL; -} - -static const struct arm_pmu *__init xscale2pmu_init(void) -{ - return NULL; -} -#endif /* CONFIG_CPU_XSCALE */ diff --git a/trunk/arch/arm/kernel/ptrace.c b/trunk/arch/arm/kernel/ptrace.c index 19c6816db61e..3e97483abcf0 100644 --- a/trunk/arch/arm/kernel/ptrace.c +++ b/trunk/arch/arm/kernel/ptrace.c @@ -1060,8 +1060,8 @@ static int ptrace_sethbpregs(struct task_struct *tsk, long num, goto out; if ((gen_type & implied_type) != gen_type) { - ret = -EINVAL; - goto out; + ret = -EINVAL; + goto out; } attr.bp_len = gen_len; diff --git a/trunk/arch/arm/kernel/smp.c b/trunk/arch/arm/kernel/smp.c index bbca89872c18..8c1959590252 100644 --- a/trunk/arch/arm/kernel/smp.c +++ b/trunk/arch/arm/kernel/smp.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -458,7 +457,7 @@ static void ipi_timer(void) } #ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) +asmlinkage void __exception do_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); @@ -545,7 +544,7 @@ static void ipi_cpu_stop(unsigned int cpu) * * Bit 0 - Inter-processor function call */ -asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs) +asmlinkage void __exception do_IPI(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct ipi_data *ipi = &per_cpu(ipi_data, cpu); diff --git a/trunk/arch/arm/kernel/vmlinux.lds.S b/trunk/arch/arm/kernel/vmlinux.lds.S index 897c1a8f1694..cead8893b46b 100644 --- a/trunk/arch/arm/kernel/vmlinux.lds.S +++ b/trunk/arch/arm/kernel/vmlinux.lds.S @@ -101,7 +101,6 @@ SECTIONS __exception_text_start = .; *(.exception.text) __exception_text_end = .; - IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT diff --git a/trunk/arch/arm/lib/findbit.S b/trunk/arch/arm/lib/findbit.S index 64f6bc1a9132..1e4cbd4e7be9 100644 --- a/trunk/arch/arm/lib/findbit.S +++ b/trunk/arch/arm/lib/findbit.S @@ -174,8 +174,8 @@ ENDPROC(_find_next_bit_be) */ .L_found: #if __LINUX_ARM_ARCH__ >= 5 - rsb r0, r3, #0 - and r3, r3, r0 + rsb r1, r3, #0 + and r3, r3, r1 clz r3, r3 rsb r3, r3, #31 add r0, r2, r3 @@ -190,7 +190,5 @@ ENDPROC(_find_next_bit_be) addeq r2, r2, #1 mov r0, r2 #endif - cmp r1, r0 @ Clamp to maxbit - movlo r0, r1 mov pc, lr diff --git a/trunk/arch/arm/mach-aaec2000/include/mach/vmalloc.h b/trunk/arch/arm/mach-aaec2000/include/mach/vmalloc.h index a6299e8321bd..cff4e0a996ce 100644 --- a/trunk/arch/arm/mach-aaec2000/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-aaec2000/include/mach/vmalloc.h @@ -11,6 +11,6 @@ #ifndef __ASM_ARCH_VMALLOC_H #define __ASM_ARCH_VMALLOC_H -#define VMALLOC_END 0xd0000000UL +#define VMALLOC_END 0xd0000000 #endif /* __ASM_ARCH_VMALLOC_H */ diff --git a/trunk/arch/arm/mach-bcmring/include/mach/vmalloc.h b/trunk/arch/arm/mach-bcmring/include/mach/vmalloc.h index 7397bd7817d9..3db3a09fd398 100644 --- a/trunk/arch/arm/mach-bcmring/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-bcmring/include/mach/vmalloc.h @@ -22,4 +22,4 @@ * 0xe0000000 to 0xefffffff. This gives us 256 MB of vm space and handles * larger physical memory designs better. */ -#define VMALLOC_END 0xf0000000UL +#define VMALLOC_END 0xf0000000 diff --git a/trunk/arch/arm/mach-clps711x/include/mach/vmalloc.h b/trunk/arch/arm/mach-clps711x/include/mach/vmalloc.h index 467b96137e47..30b3a287ed88 100644 --- a/trunk/arch/arm/mach-clps711x/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-clps711x/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd0000000UL +#define VMALLOC_END 0xd0000000 diff --git a/trunk/arch/arm/mach-davinci/dm355.c b/trunk/arch/arm/mach-davinci/dm355.c index 2652af124acd..9be261beae7d 100644 --- a/trunk/arch/arm/mach-davinci/dm355.c +++ b/trunk/arch/arm/mach-davinci/dm355.c @@ -359,8 +359,8 @@ static struct clk_lookup dm355_clks[] = { CLK(NULL, "uart1", &uart1_clk), CLK(NULL, "uart2", &uart2_clk), CLK("i2c_davinci.1", NULL, &i2c_clk), - CLK("davinci-mcbsp.0", NULL, &asp0_clk), - CLK("davinci-mcbsp.1", NULL, &asp1_clk), + CLK("davinci-asp.0", NULL, &asp0_clk), + CLK("davinci-asp.1", NULL, &asp1_clk), CLK("davinci_mmc.0", NULL, &mmcsd0_clk), CLK("davinci_mmc.1", NULL, &mmcsd1_clk), CLK("spi_davinci.0", NULL, &spi0_clk), @@ -664,7 +664,7 @@ static struct resource dm355_asp1_resources[] = { }; static struct platform_device dm355_asp1_device = { - .name = "davinci-mcbsp", + .name = "davinci-asp", .id = 1, .num_resources = ARRAY_SIZE(dm355_asp1_resources), .resource = dm355_asp1_resources, diff --git a/trunk/arch/arm/mach-davinci/dm365.c b/trunk/arch/arm/mach-davinci/dm365.c index c466d710d3c1..a12065e87266 100644 --- a/trunk/arch/arm/mach-davinci/dm365.c +++ b/trunk/arch/arm/mach-davinci/dm365.c @@ -459,7 +459,7 @@ static struct clk_lookup dm365_clks[] = { CLK(NULL, "usb", &usb_clk), CLK("davinci_emac.1", NULL, &emac_clk), CLK("davinci_voicecodec", NULL, &voicecodec_clk), - CLK("davinci-mcbsp", NULL, &asp0_clk), + CLK("davinci-asp.0", NULL, &asp0_clk), CLK(NULL, "rto", &rto_clk), CLK(NULL, "mjcp", &mjcp_clk), CLK(NULL, NULL, NULL), @@ -922,8 +922,8 @@ static struct resource dm365_asp_resources[] = { }; static struct platform_device dm365_asp_device = { - .name = "davinci-mcbsp", - .id = -1, + .name = "davinci-asp", + .id = 0, .num_resources = ARRAY_SIZE(dm365_asp_resources), .resource = dm365_asp_resources, }; diff --git a/trunk/arch/arm/mach-davinci/dm644x.c b/trunk/arch/arm/mach-davinci/dm644x.c index 9a2376b3137c..0608dd776a16 100644 --- a/trunk/arch/arm/mach-davinci/dm644x.c +++ b/trunk/arch/arm/mach-davinci/dm644x.c @@ -302,7 +302,7 @@ static struct clk_lookup dm644x_clks[] = { CLK("davinci_emac.1", NULL, &emac_clk), CLK("i2c_davinci.1", NULL, &i2c_clk), CLK("palm_bk3710", NULL, &ide_clk), - CLK("davinci-mcbsp", NULL, &asp_clk), + CLK("davinci-asp", NULL, &asp_clk), CLK("davinci_mmc.0", NULL, &mmcsd_clk), CLK(NULL, "spi", &spi_clk), CLK(NULL, "gpio", &gpio_clk), @@ -580,7 +580,7 @@ static struct resource dm644x_asp_resources[] = { }; static struct platform_device dm644x_asp_device = { - .name = "davinci-mcbsp", + .name = "davinci-asp", .id = -1, .num_resources = ARRAY_SIZE(dm644x_asp_resources), .resource = dm644x_asp_resources, diff --git a/trunk/arch/arm/mach-ebsa110/include/mach/vmalloc.h b/trunk/arch/arm/mach-ebsa110/include/mach/vmalloc.h index ea141b7a3e03..60bde56fba4c 100644 --- a/trunk/arch/arm/mach-ebsa110/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-ebsa110/include/mach/vmalloc.h @@ -7,4 +7,4 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#define VMALLOC_END 0xdf000000UL +#define VMALLOC_END 0xdf000000 diff --git a/trunk/arch/arm/mach-footbridge/include/mach/vmalloc.h b/trunk/arch/arm/mach-footbridge/include/mach/vmalloc.h index 40ba78e5782b..0ffbb7c85e59 100644 --- a/trunk/arch/arm/mach-footbridge/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-footbridge/include/mach/vmalloc.h @@ -7,4 +7,4 @@ */ -#define VMALLOC_END 0xf0000000UL +#define VMALLOC_END 0xf0000000 diff --git a/trunk/arch/arm/mach-h720x/include/mach/vmalloc.h b/trunk/arch/arm/mach-h720x/include/mach/vmalloc.h index 8520b4a4d4e6..a45915b88756 100644 --- a/trunk/arch/arm/mach-h720x/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-h720x/include/mach/vmalloc.h @@ -5,6 +5,6 @@ #ifndef __ARCH_ARM_VMALLOC_H #define __ARCH_ARM_VMALLOC_H -#define VMALLOC_END 0xd0000000UL +#define VMALLOC_END 0xd0000000 #endif diff --git a/trunk/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c b/trunk/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c index 7e1e9dc2c8fc..026263c665ca 100644 --- a/trunk/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c +++ b/trunk/arch/arm/mach-imx/eukrea_mbimx27-baseboard.c @@ -250,6 +250,9 @@ static const struct imxuart_platform_data uart_pdata __initconst = { .flags = IMXUART_HAVE_RTSCTS, }; +#if defined(CONFIG_TOUCHSCREEN_ADS7846) \ + || defined(CONFIG_TOUCHSCREEN_ADS7846_MODULE) + #define ADS7846_PENDOWN (GPIO_PORTD | 25) static void ads7846_dev_init(void) @@ -270,7 +273,9 @@ static struct ads7846_platform_data ads7846_config __initdata = { .get_pendown_state = ads7846_get_pendown_state, .keep_vref_on = 1, }; +#endif +#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) static struct spi_board_info eukrea_mbimx27_spi_board_info[] __initdata = { [0] = { .modalias = "ads7846", @@ -289,6 +294,7 @@ static const struct spi_imx_master eukrea_mbimx27_spi0_data __initconst = { .chipselect = eukrea_mbimx27_spi_cs, .num_chipselect = ARRAY_SIZE(eukrea_mbimx27_spi_cs), }; +#endif static struct i2c_board_info eukrea_mbimx27_i2c_devices[] = { { diff --git a/trunk/arch/arm/mach-integrator/include/mach/vmalloc.h b/trunk/arch/arm/mach-integrator/include/mach/vmalloc.h index 2f5a2bafb11f..e056e7cf5645 100644 --- a/trunk/arch/arm/mach-integrator/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-integrator/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd0000000UL +#define VMALLOC_END 0xd0000000 diff --git a/trunk/arch/arm/mach-msm/Kconfig b/trunk/arch/arm/mach-msm/Kconfig index 31e5fd63ec9a..dbbcfeb919db 100644 --- a/trunk/arch/arm/mach-msm/Kconfig +++ b/trunk/arch/arm/mach-msm/Kconfig @@ -49,8 +49,6 @@ endchoice config MSM_SOC_REV_A bool -config ARCH_MSM_SCORPIONMP - bool config ARCH_MSM_ARM11 bool diff --git a/trunk/arch/arm/mach-msm/include/mach/vmalloc.h b/trunk/arch/arm/mach-msm/include/mach/vmalloc.h index d138448eff16..31a32ad062dc 100644 --- a/trunk/arch/arm/mach-msm/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-msm/include/mach/vmalloc.h @@ -16,7 +16,7 @@ #ifndef __ASM_ARCH_MSM_VMALLOC_H #define __ASM_ARCH_MSM_VMALLOC_H -#define VMALLOC_END 0xd0000000UL +#define VMALLOC_END 0xd0000000 #endif diff --git a/trunk/arch/arm/mach-mv78xx0/include/mach/mv78xx0.h b/trunk/arch/arm/mach-mv78xx0/include/mach/mv78xx0.h index 788bdace1304..3eff39921d4d 100644 --- a/trunk/arch/arm/mach-mv78xx0/include/mach/mv78xx0.h +++ b/trunk/arch/arm/mach-mv78xx0/include/mach/mv78xx0.h @@ -65,7 +65,7 @@ */ #define DDR_VIRT_BASE (MV78XX0_REGS_VIRT_BASE | 0x00000) #define DDR_WINDOW_CPU0_BASE (DDR_VIRT_BASE | 0x1500) -#define DDR_WINDOW_CPU1_BASE (DDR_VIRT_BASE | 0x1700) +#define DDR_WINDOW_CPU1_BASE (DDR_VIRT_BASE | 0x1570) #define DEV_BUS_PHYS_BASE (MV78XX0_REGS_PHYS_BASE | 0x10000) #define DEV_BUS_VIRT_BASE (MV78XX0_REGS_VIRT_BASE | 0x10000) diff --git a/trunk/arch/arm/mach-mx25/devices-imx25.h b/trunk/arch/arm/mach-mx25/devices-imx25.h index d94d282fa676..93afa10b13cf 100644 --- a/trunk/arch/arm/mach-mx25/devices-imx25.h +++ b/trunk/arch/arm/mach-mx25/devices-imx25.h @@ -42,9 +42,9 @@ extern const struct imx_mxc_nand_data imx25_mxc_nand_data __initconst; #define imx25_add_mxc_nand(pdata) \ imx_add_mxc_nand(&imx25_mxc_nand_data, pdata) -extern const struct imx_spi_imx_data imx25_cspi_data[] __initconst; +extern const struct imx_spi_imx_data imx25_spi_imx_data[] __initconst; #define imx25_add_spi_imx(id, pdata) \ - imx_add_spi_imx(&imx25_cspi_data[id], pdata) + imx_add_spi_imx(&imx25_spi_imx_data[id], pdata) #define imx25_add_spi_imx0(pdata) imx25_add_spi_imx(0, pdata) #define imx25_add_spi_imx1(pdata) imx25_add_spi_imx(1, pdata) #define imx25_add_spi_imx2(pdata) imx25_add_spi_imx(2, pdata) diff --git a/trunk/arch/arm/mach-mx3/mach-pcm037_eet.c b/trunk/arch/arm/mach-mx3/mach-pcm037_eet.c index fda56545d2fd..99e0894e07db 100644 --- a/trunk/arch/arm/mach-mx3/mach-pcm037_eet.c +++ b/trunk/arch/arm/mach-mx3/mach-pcm037_eet.c @@ -14,7 +14,6 @@ #include #include -#include #include @@ -60,12 +59,14 @@ static struct spi_board_info pcm037_spi_dev[] = { }; /* Platform Data for MXC CSPI */ +#if defined(CONFIG_SPI_IMX) || defined(CONFIG_SPI_IMX_MODULE) static int pcm037_spi1_cs[] = {MXC_SPI_CS(1), IOMUX_TO_GPIO(MX31_PIN_KEY_COL7)}; static const struct spi_imx_master pcm037_spi1_pdata __initconst = { .chipselect = pcm037_spi1_cs, .num_chipselect = ARRAY_SIZE(pcm037_spi1_cs), }; +#endif /* GPIO-keys input device */ static struct gpio_keys_button pcm037_gpio_keys[] = { @@ -170,7 +171,7 @@ static struct platform_device pcm037_gpio_keys_device = { }, }; -static int __init eet_init_devices(void) +static int eet_init_devices(void) { if (!machine_is_pcm037() || pcm037_variant() != PCM037_EET) return 0; diff --git a/trunk/arch/arm/mach-netx/include/mach/vmalloc.h b/trunk/arch/arm/mach-netx/include/mach/vmalloc.h index 871f1ef7bff5..7cca3574308f 100644 --- a/trunk/arch/arm/mach-netx/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-netx/include/mach/vmalloc.h @@ -16,4 +16,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd0000000UL +#define VMALLOC_END 0xd0000000 diff --git a/trunk/arch/arm/mach-omap1/include/mach/vmalloc.h b/trunk/arch/arm/mach-omap1/include/mach/vmalloc.h index 22ec4a479577..b001f67d695b 100644 --- a/trunk/arch/arm/mach-omap1/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-omap1/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xd8000000UL +#define VMALLOC_END 0xd8000000 diff --git a/trunk/arch/arm/mach-omap2/include/mach/vmalloc.h b/trunk/arch/arm/mach-omap2/include/mach/vmalloc.h index 866319947760..4da31e997efe 100644 --- a/trunk/arch/arm/mach-omap2/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-omap2/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define VMALLOC_END 0xf8000000UL +#define VMALLOC_END 0xf8000000 diff --git a/trunk/arch/arm/mach-omap2/pm24xx.c b/trunk/arch/arm/mach-omap2/pm24xx.c index c85923e56b85..a40457d81927 100644 --- a/trunk/arch/arm/mach-omap2/pm24xx.c +++ b/trunk/arch/arm/mach-omap2/pm24xx.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include @@ -119,10 +118,6 @@ static void omap2_enter_full_retention(void) if (omap_irq_pending()) goto no_sleep; - /* Block console output in case it is on one of the OMAP UARTs */ - if (try_acquire_console_sem()) - goto no_sleep; - omap_uart_prepare_idle(0); omap_uart_prepare_idle(1); omap_uart_prepare_idle(2); @@ -136,8 +131,6 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(1); omap_uart_resume_idle(0); - release_console_sem(); - no_sleep: if (omap2_pm_debug) { unsigned long long tmp; diff --git a/trunk/arch/arm/mach-omap2/pm34xx.c b/trunk/arch/arm/mach-omap2/pm34xx.c index 0ec8a04b7473..75c0cd13ad8e 100644 --- a/trunk/arch/arm/mach-omap2/pm34xx.c +++ b/trunk/arch/arm/mach-omap2/pm34xx.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -386,12 +385,6 @@ void omap_sram_idle(void) omap3_enable_io_chain(); } - /* Block console output in case it is on one of the OMAP UARTs */ - if (per_next_state < PWRDM_POWER_ON || - core_next_state < PWRDM_POWER_ON) - if (try_acquire_console_sem()) - goto console_still_active; - /* PER */ if (per_next_state < PWRDM_POWER_ON) { omap_uart_prepare_idle(2); @@ -470,9 +463,6 @@ void omap_sram_idle(void) omap_uart_resume_idle(3); } - release_console_sem(); - -console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ if (omap3_has_io_wakeup() && (per_next_state < PWRDM_POWER_ON || diff --git a/trunk/arch/arm/mach-omap2/serial.c b/trunk/arch/arm/mach-omap2/serial.c index d17960a1be25..becf0e38ef7e 100644 --- a/trunk/arch/arm/mach-omap2/serial.c +++ b/trunk/arch/arm/mach-omap2/serial.c @@ -27,7 +27,6 @@ #include #include #include -#include #ifdef CONFIG_SERIAL_OMAP #include @@ -407,7 +406,7 @@ void omap_uart_resume_idle(int num) struct omap_uart_state *uart; list_for_each_entry(uart, &uart_list, node) { - if (num == uart->num && uart->can_sleep) { + if (num == uart->num) { omap_uart_enable_clocks(uart); /* Check for IO pad wakeup */ @@ -808,8 +807,6 @@ void __init omap_serial_init_port(int port) oh->dev_attr = uart; - acquire_console_sem(); /* in case the earlycon is on the UART */ - /* * Because of early UART probing, UART did not get idled * on init. Now that omap_device is ready, ensure full idle @@ -834,8 +831,6 @@ void __init omap_serial_init_port(int port) omap_uart_block_sleep(uart); uart->timeout = DEFAULT_TIMEOUT; - release_console_sem(); - if ((cpu_is_omap34xx() && uart->padconf) || (uart->wk_en && uart->wk_mask)) { device_init_wakeup(&od->pdev.dev, true); diff --git a/trunk/arch/arm/mach-pnx4008/include/mach/vmalloc.h b/trunk/arch/arm/mach-pnx4008/include/mach/vmalloc.h index 184913c71141..31b65ee07b0b 100644 --- a/trunk/arch/arm/mach-pnx4008/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-pnx4008/include/mach/vmalloc.h @@ -17,4 +17,4 @@ * The vmalloc() routines leaves a hole of 4kB between each vmalloced * area for the same reason. ;) */ -#define VMALLOC_END 0xd0000000UL +#define VMALLOC_END 0xd0000000 diff --git a/trunk/arch/arm/mach-rpc/include/mach/vmalloc.h b/trunk/arch/arm/mach-rpc/include/mach/vmalloc.h index fb700228637a..3bcd86fadb81 100644 --- a/trunk/arch/arm/mach-rpc/include/mach/vmalloc.h +++ b/trunk/arch/arm/mach-rpc/include/mach/vmalloc.h @@ -7,4 +7,4 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#define VMALLOC_END 0xdc000000UL +#define VMALLOC_END 0xdc000000 diff --git a/trunk/arch/arm/mach-s3c2410/h1940-bluetooth.c b/trunk/arch/arm/mach-s3c2410/h1940-bluetooth.c index 6b86a722a7db..8aa2f1902a94 100644 --- a/trunk/arch/arm/mach-s3c2410/h1940-bluetooth.c +++ b/trunk/arch/arm/mach-s3c2410/h1940-bluetooth.c @@ -77,13 +77,13 @@ static int __devinit h1940bt_probe(struct platform_device *pdev) /* Configures BT serial port GPIOs */ s3c_gpio_cfgpin(S3C2410_GPH(0), S3C2410_GPH0_nCTS0); - s3c_gpio_setpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE); + s3c_gpio_cfgpull(S3C2410_GPH(0), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(1), S3C2410_GPIO_OUTPUT); - s3c_gpio_setpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE); + s3c_gpio_cfgpull(S3C2410_GPH(1), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(2), S3C2410_GPH2_TXD0); - s3c_gpio_setpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE); + s3c_gpio_cfgpull(S3C2410_GPH(2), S3C_GPIO_PULL_NONE); s3c_gpio_cfgpin(S3C2410_GPH(3), S3C2410_GPH3_RXD0); - s3c_gpio_setpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE); + s3c_gpio_cfgpull(S3C2410_GPH(3), S3C_GPIO_PULL_NONE); rfk = rfkill_alloc(DRV_NAME, &pdev->dev, RFKILL_TYPE_BLUETOOTH, diff --git a/trunk/arch/arm/mach-s3c2416/irq.c b/trunk/arch/arm/mach-s3c2416/irq.c index 00174daf1526..084d121f368c 100644 --- a/trunk/arch/arm/mach-s3c2416/irq.c +++ b/trunk/arch/arm/mach-s3c2416/irq.c @@ -168,11 +168,12 @@ static struct irq_chip s3c2416_irq_dma = { static void s3c2416_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) { - s3c2416_irq_demux(IRQ_S3C2443_RX3, 3); + s3c2416_irq_demux(IRQ_S3C2443_UART3, 3); } #define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) -#define SUBMSK_UART3 (0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) +#define SUBMSK_UART3 (0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) + static void s3c2416_irq_uart3_mask(unsigned int irqno) { diff --git a/trunk/arch/arm/mach-s3c2443/irq.c b/trunk/arch/arm/mach-s3c2443/irq.c index 893424767ce1..0e0d693f3974 100644 --- a/trunk/arch/arm/mach-s3c2443/irq.c +++ b/trunk/arch/arm/mach-s3c2443/irq.c @@ -166,11 +166,12 @@ static struct irq_chip s3c2443_irq_dma = { static void s3c2443_irq_demux_uart3(unsigned int irq, struct irq_desc *desc) { - s3c2443_irq_demux(IRQ_S3C2443_RX3, 3); + s3c2443_irq_demux(IRQ_S3C2443_UART3, 3); } #define INTMSK_UART3 (1UL << (IRQ_S3C2443_UART3 - IRQ_EINT0)) -#define SUBMSK_UART3 (0x7 << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) +#define SUBMSK_UART3 (0xf << (IRQ_S3C2443_RX3 - S3C2410_IRQSUB(0))) + static void s3c2443_irq_uart3_mask(unsigned int irqno) { diff --git a/trunk/arch/arm/mach-s3c64xx/mach-mini6410.c b/trunk/arch/arm/mach-s3c64xx/mach-mini6410.c index 89f35e02e883..249c62956471 100644 --- a/trunk/arch/arm/mach-s3c64xx/mach-mini6410.c +++ b/trunk/arch/arm/mach-s3c64xx/mach-mini6410.c @@ -45,7 +45,7 @@ #include