From 90c47cf2e9005282b8e4ae83d446befa6eba1ce9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 29 Apr 2011 15:12:32 -0400 Subject: [PATCH] --- yaml --- r: 245528 b: refs/heads/master c: b448c4e3ae6d20108dba1d7833f2c0d3dbad87ce h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/Makefile | 1 - trunk/arch/s390/include/asm/ftrace.h | 4 +- trunk/arch/x86/include/asm/alternative-asm.h | 9 - trunk/arch/x86/include/asm/cpufeature.h | 1 - trunk/arch/x86/include/asm/ftrace.h | 7 +- trunk/arch/x86/include/asm/setup.h | 2 +- trunk/arch/x86/include/asm/uaccess.h | 2 +- trunk/arch/x86/kernel/alternative.c | 9 - trunk/arch/x86/kernel/cpu/common.c | 3 +- trunk/arch/x86/kernel/cpu/intel.c | 19 +- trunk/arch/x86/lib/clear_page_64.S | 33 +- trunk/arch/x86/lib/copy_user_64.S | 69 +--- trunk/arch/x86/lib/memcpy_64.S | 45 +-- trunk/arch/x86/lib/memmove_64.S | 29 +- trunk/arch/x86/lib/memset_64.S | 54 +-- trunk/include/linux/ftrace.h | 1 - trunk/include/linux/init.h | 14 +- trunk/kernel/trace/ftrace.c | 176 +++++++-- trunk/scripts/Makefile.build | 12 +- trunk/scripts/recordmcount.c | 168 +++------ trunk/scripts/recordmcount.h | 174 ++------- trunk/scripts/recordmcount.pl | 5 - trunk/tools/perf/builtin-stat.c | 347 +++--------------- .../perf/util/include/asm/alternative-asm.h | 8 - 25 files changed, 370 insertions(+), 824 deletions(-) delete mode 100644 trunk/tools/perf/util/include/asm/alternative-asm.h diff --git a/[refs] b/[refs] index 81f7e843088e..094f2d17f0fe 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 398995ce7980b03b5803f8f31073b45d87746bc1 +refs/heads/master: b448c4e3ae6d20108dba1d7833f2c0d3dbad87ce diff --git a/trunk/Makefile b/trunk/Makefile index e7d01adaf692..41ea6fbec55a 100644 --- a/trunk/Makefile +++ b/trunk/Makefile @@ -1268,7 +1268,6 @@ help: @echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)' @echo ' make C=2 [targets] Force check of all c source with $$CHECK' @echo ' make W=1 [targets] Enable extra gcc checks' - @echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections' @echo '' @echo 'Execute "make" or "make all" to build all targets marked with [*] ' @echo 'For further info see the ./README file' diff --git a/trunk/arch/s390/include/asm/ftrace.h b/trunk/arch/s390/include/asm/ftrace.h index b7931faaef6d..3c29be4836ed 100644 --- a/trunk/arch/s390/include/asm/ftrace.h +++ b/trunk/arch/s390/include/asm/ftrace.h @@ -11,13 +11,15 @@ struct dyn_arch_ftrace { }; #ifdef CONFIG_64BIT #define MCOUNT_INSN_SIZE 12 +#define MCOUNT_OFFSET 8 #else #define MCOUNT_INSN_SIZE 20 +#define MCOUNT_OFFSET 4 #endif static inline unsigned long ftrace_call_adjust(unsigned long addr) { - return addr; + return addr - MCOUNT_OFFSET; } #endif /* __ASSEMBLY__ */ diff --git a/trunk/arch/x86/include/asm/alternative-asm.h b/trunk/arch/x86/include/asm/alternative-asm.h index 94d420b360d1..a63a68be1cce 100644 --- a/trunk/arch/x86/include/asm/alternative-asm.h +++ b/trunk/arch/x86/include/asm/alternative-asm.h @@ -15,13 +15,4 @@ .endm #endif -.macro altinstruction_entry orig alt feature orig_len alt_len - .align 8 - .quad \orig - .quad \alt - .word \feature - .byte \orig_len - .byte \alt_len -.endm - #endif /* __ASSEMBLY__ */ diff --git a/trunk/arch/x86/include/asm/cpufeature.h b/trunk/arch/x86/include/asm/cpufeature.h index 7f2f7b123293..91f3e087cf21 100644 --- a/trunk/arch/x86/include/asm/cpufeature.h +++ b/trunk/arch/x86/include/asm/cpufeature.h @@ -195,7 +195,6 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ #define X86_FEATURE_FSGSBASE (9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ -#define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ #if defined(__KERNEL__) && !defined(__ASSEMBLY__) diff --git a/trunk/arch/x86/include/asm/ftrace.h b/trunk/arch/x86/include/asm/ftrace.h index 268c783ab1c0..db24c2278be0 100644 --- a/trunk/arch/x86/include/asm/ftrace.h +++ b/trunk/arch/x86/include/asm/ftrace.h @@ -38,10 +38,11 @@ extern void mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { /* - * addr is the address of the mcount call instruction. - * recordmcount does the necessary offset calculation. + * call mcount is "e8 <4 byte offset>" + * The addr points to the 4 byte offset and the caller of this + * function wants the pointer to e8. Simply subtract one. */ - return addr; + return addr - 1; } #ifdef CONFIG_DYNAMIC_FTRACE diff --git a/trunk/arch/x86/include/asm/setup.h b/trunk/arch/x86/include/asm/setup.h index 647d8a06ce4f..db8aa19a08a2 100644 --- a/trunk/arch/x86/include/asm/setup.h +++ b/trunk/arch/x86/include/asm/setup.h @@ -88,7 +88,7 @@ void *extend_brk(size_t size, size_t align); * executable.) */ #define RESERVE_BRK(name,sz) \ - static void __section(.discard.text) __used notrace \ + static void __section(.discard.text) __used \ __brk_reservation_fn_##name##__(void) { \ asm volatile ( \ ".pushsection .brk_reservation,\"aw\",@nobits;" \ diff --git a/trunk/arch/x86/include/asm/uaccess.h b/trunk/arch/x86/include/asm/uaccess.h index 99f0ad753f32..abd3e0ea762a 100644 --- a/trunk/arch/x86/include/asm/uaccess.h +++ b/trunk/arch/x86/include/asm/uaccess.h @@ -42,7 +42,7 @@ * Returns 0 if the range is valid, nonzero otherwise. * * This is equivalent to the following test: - * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) + * (u33)addr + (u33)size >= (u33)current->addr_limit.seg (u65 for x86_64) * * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... */ diff --git a/trunk/arch/x86/kernel/alternative.c b/trunk/arch/x86/kernel/alternative.c index 1eeeafcb4410..651454b0c811 100644 --- a/trunk/arch/x86/kernel/alternative.c +++ b/trunk/arch/x86/kernel/alternative.c @@ -210,15 +210,6 @@ void __init_or_module apply_alternatives(struct alt_instr *start, u8 insnbuf[MAX_PATCH_LEN]; DPRINTK("%s: alt table %p -> %p\n", __func__, start, end); - /* - * The scan order should be from start to end. A later scanned - * alternative code can overwrite a previous scanned alternative code. - * Some kernel functions (e.g. memcpy, memset, etc) use this order to - * patch code. - * - * So be careful if you want to change the scan order to any other - * order. - */ for (a = start; a < end; a++) { u8 *instr = a->instr; BUG_ON(a->replacementlen > a->instrlen); diff --git a/trunk/arch/x86/kernel/cpu/common.c b/trunk/arch/x86/kernel/cpu/common.c index 173f3a3fa1a6..e2ced0074a45 100644 --- a/trunk/arch/x86/kernel/cpu/common.c +++ b/trunk/arch/x86/kernel/cpu/common.c @@ -565,7 +565,8 @@ void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); - c->x86_capability[9] = ebx; + if (eax > 0) + c->x86_capability[9] = ebx; } /* AMD-defined flags: level 0x80000001 */ diff --git a/trunk/arch/x86/kernel/cpu/intel.c b/trunk/arch/x86/kernel/cpu/intel.c index fc73a34ba8c9..df86bc8c859d 100644 --- a/trunk/arch/x86/kernel/cpu/intel.c +++ b/trunk/arch/x86/kernel/cpu/intel.c @@ -29,10 +29,10 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) { - u64 misc_enable; - /* Unmask CPUID levels if masked: */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { + u64 misc_enable; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { @@ -118,6 +118,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) * (model 2) with the same problem. */ if (c->x86 == 15) { + u64 misc_enable; + rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { @@ -128,19 +130,6 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) } } #endif - - /* - * If fast string is not enabled in IA32_MISC_ENABLE for any reason, - * clear the fast string and enhanced fast string CPU capabilities. - */ - if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { - printk(KERN_INFO "Disabled fast string operations\n"); - setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); - setup_clear_cpu_cap(X86_FEATURE_ERMS); - } - } } #ifdef CONFIG_X86_32 diff --git a/trunk/arch/x86/lib/clear_page_64.S b/trunk/arch/x86/lib/clear_page_64.S index f2145cfa12a6..aa4326bfb24a 100644 --- a/trunk/arch/x86/lib/clear_page_64.S +++ b/trunk/arch/x86/lib/clear_page_64.S @@ -1,6 +1,5 @@ #include #include -#include /* * Zero a page. @@ -15,15 +14,6 @@ ENTRY(clear_page_c) CFI_ENDPROC ENDPROC(clear_page_c) -ENTRY(clear_page_c_e) - CFI_STARTPROC - movl $4096,%ecx - xorl %eax,%eax - rep stosb - ret - CFI_ENDPROC -ENDPROC(clear_page_c_e) - ENTRY(clear_page) CFI_STARTPROC xorl %eax,%eax @@ -48,26 +38,21 @@ ENTRY(clear_page) .Lclear_page_end: ENDPROC(clear_page) - /* - * Some CPUs support enhanced REP MOVSB/STOSB instructions. - * It is recommended to use this when possible. - * If enhanced REP MOVSB/STOSB is not available, try to use fast string. - * Otherwise, use original function. - * - */ + /* Some CPUs run faster using the string instructions. + It is also a lot simpler. Use this when possible */ #include .section .altinstr_replacement,"ax" 1: .byte 0xeb /* jmp */ .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */ -2: .byte 0xeb /* jmp */ - .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */ -3: +2: .previous .section .altinstructions,"a" - altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\ - .Lclear_page_end-clear_page, 2b-1b - altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \ - .Lclear_page_end-clear_page,3b-2b + .align 8 + .quad clear_page + .quad 1b + .word X86_FEATURE_REP_GOOD + .byte .Lclear_page_end - clear_page + .byte 2b - 1b .previous diff --git a/trunk/arch/x86/lib/copy_user_64.S b/trunk/arch/x86/lib/copy_user_64.S index 024840266ba0..99e482615195 100644 --- a/trunk/arch/x86/lib/copy_user_64.S +++ b/trunk/arch/x86/lib/copy_user_64.S @@ -15,30 +15,23 @@ #include #include #include -#include -/* - * By placing feature2 after feature1 in altinstructions section, we logically - * implement: - * If CPU has feature2, jmp to alt2 is used - * else if CPU has feature1, jmp to alt1 is used - * else jmp to orig is used. - */ - .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2 + .macro ALTERNATIVE_JUMP feature,orig,alt 0: .byte 0xe9 /* 32bit jump */ .long \orig-1f /* by default jump to orig */ 1: .section .altinstr_replacement,"ax" 2: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt1-1b /* offset */ /* or alternatively to alt1 */ -3: .byte 0xe9 /* near jump with 32bit immediate */ - .long \alt2-1b /* offset */ /* or alternatively to alt2 */ + .long \alt-1b /* offset */ /* or alternatively to alt */ .previous - .section .altinstructions,"a" - altinstruction_entry 0b,2b,\feature1,5,5 - altinstruction_entry 0b,3b,\feature2,5,5 + .align 8 + .quad 0b + .quad 2b + .word \feature /* when feature is set */ + .byte 5 + .byte 5 .previous .endm @@ -79,10 +72,8 @@ ENTRY(_copy_to_user) addq %rdx,%rcx jc bad_to_user cmpq TI_addr_limit(%rax),%rcx - ja bad_to_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ - copy_user_generic_unrolled,copy_user_generic_string, \ - copy_user_enhanced_fast_string + jae bad_to_user + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC ENDPROC(_copy_to_user) @@ -94,10 +85,8 @@ ENTRY(_copy_from_user) addq %rdx,%rcx jc bad_from_user cmpq TI_addr_limit(%rax),%rcx - ja bad_from_user - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \ - copy_user_generic_unrolled,copy_user_generic_string, \ - copy_user_enhanced_fast_string + jae bad_from_user + ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string CFI_ENDPROC ENDPROC(_copy_from_user) @@ -266,37 +255,3 @@ ENTRY(copy_user_generic_string) .previous CFI_ENDPROC ENDPROC(copy_user_generic_string) - -/* - * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. - * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. - * - * Input: - * rdi destination - * rsi source - * rdx count - * - * Output: - * eax uncopied bytes or 0 if successful. - */ -ENTRY(copy_user_enhanced_fast_string) - CFI_STARTPROC - andl %edx,%edx - jz 2f - movl %edx,%ecx -1: rep - movsb -2: xorl %eax,%eax - ret - - .section .fixup,"ax" -12: movl %ecx,%edx /* ecx is zerorest also */ - jmp copy_user_handle_tail - .previous - - .section __ex_table,"a" - .align 8 - .quad 1b,12b - .previous - CFI_ENDPROC -ENDPROC(copy_user_enhanced_fast_string) diff --git a/trunk/arch/x86/lib/memcpy_64.S b/trunk/arch/x86/lib/memcpy_64.S index daab21dae2d1..75ef61e35e38 100644 --- a/trunk/arch/x86/lib/memcpy_64.S +++ b/trunk/arch/x86/lib/memcpy_64.S @@ -4,7 +4,6 @@ #include #include -#include /* * memcpy - Copy a memory block. @@ -38,23 +37,6 @@ .Lmemcpy_e: .previous -/* - * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than - * memcpy_c. Use memcpy_c_e when possible. - * - * This gets patched over the unrolled variant (below) via the - * alternative instructions framework: - */ - .section .altinstr_replacement, "ax", @progbits -.Lmemcpy_c_e: - movq %rdi, %rax - - movl %edx, %ecx - rep movsb - ret -.Lmemcpy_e_e: - .previous - ENTRY(__memcpy) ENTRY(memcpy) CFI_STARTPROC @@ -189,22 +171,21 @@ ENDPROC(memcpy) ENDPROC(__memcpy) /* - * Some CPUs are adding enhanced REP MOVSB/STOSB feature - * If the feature is supported, memcpy_c_e() is the first choice. - * If enhanced rep movsb copy is not available, use fast string copy - * memcpy_c() when possible. This is faster and code is simpler than - * original memcpy(). - * Otherwise, original memcpy() is used. - * In .altinstructions section, ERMS feature is placed after REG_GOOD - * feature to implement the right patch order. - * + * Some CPUs run faster using the string copy instructions. + * It is also a lot simpler. Use this when possible: + */ + + .section .altinstructions, "a" + .align 8 + .quad memcpy + .quad .Lmemcpy_c + .word X86_FEATURE_REP_GOOD + + /* * Replace only beginning, memcpy is used to apply alternatives, * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .section .altinstructions, "a" - altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\ - .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c - altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \ - .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e + .byte .Lmemcpy_e - .Lmemcpy_c + .byte .Lmemcpy_e - .Lmemcpy_c .previous diff --git a/trunk/arch/x86/lib/memmove_64.S b/trunk/arch/x86/lib/memmove_64.S index d0ec9c2936d7..0ecb8433e5a8 100644 --- a/trunk/arch/x86/lib/memmove_64.S +++ b/trunk/arch/x86/lib/memmove_64.S @@ -8,7 +8,6 @@ #define _STRING_C #include #include -#include #undef memmove @@ -25,7 +24,6 @@ */ ENTRY(memmove) CFI_STARTPROC - /* Handle more 32bytes in loop */ mov %rdi, %rax cmp $0x20, %rdx @@ -33,13 +31,8 @@ ENTRY(memmove) /* Decide forward/backward copy mode */ cmp %rdi, %rsi - jge .Lmemmove_begin_forward - mov %rsi, %r8 - add %rdx, %r8 - cmp %rdi, %r8 - jg 2f + jb 2f -.Lmemmove_begin_forward: /* * movsq instruction have many startup latency * so we handle small size by general register. @@ -85,8 +78,6 @@ ENTRY(memmove) rep movsq movq %r11, (%r10) jmp 13f -.Lmemmove_end_forward: - /* * Handle data backward by movsq. */ @@ -203,22 +194,4 @@ ENTRY(memmove) 13: retq CFI_ENDPROC - - .section .altinstr_replacement,"ax" -.Lmemmove_begin_forward_efs: - /* Forward moving data. */ - movq %rdx, %rcx - rep movsb - retq -.Lmemmove_end_forward_efs: - .previous - - .section .altinstructions,"a" - .align 8 - .quad .Lmemmove_begin_forward - .quad .Lmemmove_begin_forward_efs - .word X86_FEATURE_ERMS - .byte .Lmemmove_end_forward-.Lmemmove_begin_forward - .byte .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs - .previous ENDPROC(memmove) diff --git a/trunk/arch/x86/lib/memset_64.S b/trunk/arch/x86/lib/memset_64.S index 79bd454b78a3..09d344269652 100644 --- a/trunk/arch/x86/lib/memset_64.S +++ b/trunk/arch/x86/lib/memset_64.S @@ -2,13 +2,9 @@ #include #include -#include -#include /* - * ISO C memset - set a memory block to a byte value. This function uses fast - * string to get better performance than the original function. The code is - * simpler and shorter than the orignal function as well. + * ISO C memset - set a memory block to a byte value. * * rdi destination * rsi value (char) @@ -35,28 +31,6 @@ .Lmemset_e: .previous -/* - * ISO C memset - set a memory block to a byte value. This function uses - * enhanced rep stosb to override the fast string function. - * The code is simpler and shorter than the fast string function as well. - * - * rdi destination - * rsi value (char) - * rdx count (bytes) - * - * rax original destination - */ - .section .altinstr_replacement, "ax", @progbits -.Lmemset_c_e: - movq %rdi,%r9 - movb %sil,%al - movl %edx,%ecx - rep stosb - movq %r9,%rax - ret -.Lmemset_e_e: - .previous - ENTRY(memset) ENTRY(__memset) CFI_STARTPROC @@ -138,20 +112,16 @@ ENTRY(__memset) ENDPROC(memset) ENDPROC(__memset) - /* Some CPUs support enhanced REP MOVSB/STOSB feature. - * It is recommended to use this when possible. - * - * If enhanced REP MOVSB/STOSB feature is not available, use fast string - * instructions. - * - * Otherwise, use original memset function. - * - * In .altinstructions section, ERMS feature is placed after REG_GOOD - * feature to implement the right patch order. - */ + /* Some CPUs run faster using the string instructions. + It is also a lot simpler. Use this when possible */ + +#include + .section .altinstructions,"a" - altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\ - .Lfinal-memset,.Lmemset_e-.Lmemset_c - altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \ - .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e + .align 8 + .quad memset + .quad .Lmemset_c + .word X86_FEATURE_REP_GOOD + .byte .Lfinal - memset + .byte .Lmemset_e - .Lmemset_c .previous diff --git a/trunk/include/linux/ftrace.h b/trunk/include/linux/ftrace.h index 32047449b309..fe0a90a09243 100644 --- a/trunk/include/linux/ftrace.h +++ b/trunk/include/linux/ftrace.h @@ -149,7 +149,6 @@ enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FILTER = (1 << 1), FTRACE_FL_ENABLED = (1 << 2), - FTRACE_FL_NOTRACE = (1 << 3), }; struct dyn_ftrace { diff --git a/trunk/include/linux/init.h b/trunk/include/linux/init.h index 9146f39cdddf..577671c55153 100644 --- a/trunk/include/linux/init.h +++ b/trunk/include/linux/init.h @@ -79,29 +79,29 @@ #define __exitused __used #endif -#define __exit __section(.exit.text) __exitused __cold notrace +#define __exit __section(.exit.text) __exitused __cold /* Used for HOTPLUG */ -#define __devinit __section(.devinit.text) __cold notrace +#define __devinit __section(.devinit.text) __cold #define __devinitdata __section(.devinit.data) #define __devinitconst __section(.devinit.rodata) -#define __devexit __section(.devexit.text) __exitused __cold notrace +#define __devexit __section(.devexit.text) __exitused __cold #define __devexitdata __section(.devexit.data) #define __devexitconst __section(.devexit.rodata) /* Used for HOTPLUG_CPU */ -#define __cpuinit __section(.cpuinit.text) __cold notrace +#define __cpuinit __section(.cpuinit.text) __cold #define __cpuinitdata __section(.cpuinit.data) #define __cpuinitconst __section(.cpuinit.rodata) -#define __cpuexit __section(.cpuexit.text) __exitused __cold notrace +#define __cpuexit __section(.cpuexit.text) __exitused __cold #define __cpuexitdata __section(.cpuexit.data) #define __cpuexitconst __section(.cpuexit.rodata) /* Used for MEMORY_HOTPLUG */ -#define __meminit __section(.meminit.text) __cold notrace +#define __meminit __section(.meminit.text) __cold #define __meminitdata __section(.meminit.data) #define __meminitconst __section(.meminit.rodata) -#define __memexit __section(.memexit.text) __exitused __cold notrace +#define __memexit __section(.memexit.text) __exitused __cold #define __memexitdata __section(.memexit.data) #define __memexitconst __section(.memexit.rodata) diff --git a/trunk/kernel/trace/ftrace.c b/trunk/kernel/trace/ftrace.c index d3406346ced6..04c002a491fb 100644 --- a/trunk/kernel/trace/ftrace.c +++ b/trunk/kernel/trace/ftrace.c @@ -57,6 +57,7 @@ /* hash bits for specific function selection */ #define FTRACE_HASH_BITS 7 #define FTRACE_FUNC_HASHSIZE (1 << FTRACE_HASH_BITS) +#define FTRACE_HASH_MAX_BITS 10 /* ftrace_enabled is a method to turn ftrace on or off */ int ftrace_enabled __read_mostly; @@ -865,6 +866,22 @@ enum { FTRACE_START_FUNC_RET = (1 << 3), FTRACE_STOP_FUNC_RET = (1 << 4), }; +struct ftrace_func_entry { + struct hlist_node hlist; + unsigned long ip; +}; + +struct ftrace_hash { + unsigned long size_bits; + struct hlist_head *buckets; + unsigned long count; +}; + +static struct hlist_head notrace_buckets[1 << FTRACE_HASH_MAX_BITS]; +static struct ftrace_hash notrace_hash = { + .size_bits = FTRACE_HASH_MAX_BITS, + .buckets = notrace_buckets, +}; static int ftrace_filtered; @@ -889,6 +906,79 @@ static struct ftrace_page *ftrace_pages; static struct dyn_ftrace *ftrace_free_records; +static struct ftrace_func_entry * +ftrace_lookup_ip(struct ftrace_hash *hash, unsigned long ip) +{ + unsigned long key; + struct ftrace_func_entry *entry; + struct hlist_head *hhd; + struct hlist_node *n; + + if (!hash->count) + return NULL; + + if (hash->size_bits > 0) + key = hash_long(ip, hash->size_bits); + else + key = 0; + + hhd = &hash->buckets[key]; + + hlist_for_each_entry_rcu(entry, n, hhd, hlist) { + if (entry->ip == ip) + return entry; + } + return NULL; +} + +static int add_hash_entry(struct ftrace_hash *hash, unsigned long ip) +{ + struct ftrace_func_entry *entry; + struct hlist_head *hhd; + unsigned long key; + + entry = kmalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + + if (hash->size_bits) + key = hash_long(ip, hash->size_bits); + else + key = 0; + + entry->ip = ip; + hhd = &hash->buckets[key]; + hlist_add_head(&entry->hlist, hhd); + hash->count++; + + return 0; +} + +static void +remove_hash_entry(struct ftrace_hash *hash, + struct ftrace_func_entry *entry) +{ + hlist_del(&entry->hlist); + kfree(entry); + hash->count--; +} + +static void ftrace_hash_clear(struct ftrace_hash *hash) +{ + struct hlist_head *hhd; + struct hlist_node *tp, *tn; + struct ftrace_func_entry *entry; + int size = 1 << hash->size_bits; + int i; + + for (i = 0; i < size; i++) { + hhd = &hash->buckets[i]; + hlist_for_each_entry_safe(entry, tp, tn, hhd, hlist) + remove_hash_entry(hash, entry); + } + FTRACE_WARN_ON(hash->count); +} + /* * This is a double for. Do not use 'break' to break out of the loop, * you must use a goto. @@ -1032,7 +1122,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) * If we want to enable it and filtering is on, enable it only if * it's filtered */ - if (enable && !(rec->flags & FTRACE_FL_NOTRACE)) { + if (enable && !ftrace_lookup_ip(¬race_hash, rec->ip)) { if (!ftrace_filtered || (rec->flags & FTRACE_FL_FILTER)) flag = FTRACE_FL_ENABLED; } @@ -1465,7 +1555,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos) !(rec->flags & FTRACE_FL_FILTER)) || ((iter->flags & FTRACE_ITER_NOTRACE) && - !(rec->flags & FTRACE_FL_NOTRACE))) { + !ftrace_lookup_ip(¬race_hash, rec->ip))) { rec = NULL; goto retry; } @@ -1609,14 +1699,15 @@ static void ftrace_filter_reset(int enable) { struct ftrace_page *pg; struct dyn_ftrace *rec; - unsigned long type = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; mutex_lock(&ftrace_lock); - if (enable) + if (enable) { ftrace_filtered = 0; - do_for_each_ftrace_rec(pg, rec) { - rec->flags &= ~type; - } while_for_each_ftrace_rec(); + do_for_each_ftrace_rec(pg, rec) { + rec->flags &= ~FTRACE_FL_FILTER; + } while_for_each_ftrace_rec(); + } else + ftrace_hash_clear(¬race_hash); mutex_unlock(&ftrace_lock); } @@ -1716,13 +1807,36 @@ static int ftrace_match(char *str, char *regex, int len, int type) return matched; } -static void -update_record(struct dyn_ftrace *rec, unsigned long flag, int not) +static int +update_record(struct dyn_ftrace *rec, int enable, int not) { - if (not) - rec->flags &= ~flag; - else - rec->flags |= flag; + struct ftrace_func_entry *entry; + struct ftrace_hash *hash = ¬race_hash; + int ret = 0; + + if (enable) { + if (not) + rec->flags &= ~FTRACE_FL_FILTER; + else + rec->flags |= FTRACE_FL_FILTER; + } else { + if (not) { + /* Do nothing if it doesn't exist */ + entry = ftrace_lookup_ip(hash, rec->ip); + if (!entry) + return 0; + + remove_hash_entry(hash, entry); + } else { + /* Do nothing if it exists */ + entry = ftrace_lookup_ip(hash, rec->ip); + if (entry) + return 0; + + ret = add_hash_entry(hash, rec->ip); + } + } + return ret; } static int @@ -1754,16 +1868,14 @@ static int match_records(char *buff, int len, char *mod, int enable, int not) struct dyn_ftrace *rec; int type = MATCH_FULL; char *search = buff; - unsigned long flag; int found = 0; + int ret; if (len) { type = filter_parse_regex(buff, len, &search, ¬); search_len = strlen(search); } - flag = enable ? FTRACE_FL_FILTER : FTRACE_FL_NOTRACE; - mutex_lock(&ftrace_lock); if (unlikely(ftrace_disabled)) @@ -1772,7 +1884,11 @@ static int match_records(char *buff, int len, char *mod, int enable, int not) do_for_each_ftrace_rec(pg, rec) { if (ftrace_match_record(rec, mod, search, search_len, type)) { - update_record(rec, flag, not); + ret = update_record(rec, enable, not); + if (ret < 0) { + found = ret; + goto out_unlock; + } found = 1; } /* @@ -1821,6 +1937,7 @@ static int ftrace_mod_callback(char *func, char *cmd, char *param, int enable) { char *mod; + int ret = -EINVAL; /* * cmd == 'mod' because we only registered this func @@ -1832,15 +1949,19 @@ ftrace_mod_callback(char *func, char *cmd, char *param, int enable) /* we must have a module name */ if (!param) - return -EINVAL; + return ret; mod = strsep(¶m, ":"); if (!strlen(mod)) - return -EINVAL; + return ret; - if (ftrace_match_module_records(func, mod, enable)) - return 0; - return -EINVAL; + ret = ftrace_match_module_records(func, mod, enable); + if (!ret) + ret = -EINVAL; + if (ret < 0) + return ret; + + return 0; } static struct ftrace_func_command ftrace_mod_cmd = { @@ -2132,14 +2253,17 @@ static int ftrace_process_regex(char *buff, int len, int enable) { char *func, *command, *next = buff; struct ftrace_func_command *p; - int ret = -EINVAL; + int ret; func = strsep(&next, ":"); if (!next) { - if (ftrace_match_records(func, len, enable)) - return 0; - return ret; + ret = ftrace_match_records(func, len, enable); + if (!ret) + ret = -EINVAL; + if (ret < 0) + return ret; + return 0; } /* command found */ diff --git a/trunk/scripts/Makefile.build b/trunk/scripts/Makefile.build index 6165622c3e29..d5f925abe4d2 100644 --- a/trunk/scripts/Makefile.build +++ b/trunk/scripts/Makefile.build @@ -244,19 +244,14 @@ endif ifdef CONFIG_FTRACE_MCOUNT_RECORD ifdef BUILD_C_RECORDMCOUNT -ifeq ("$(origin RECORDMCOUNT_WARN)", "command line") - RECORDMCOUNT_FLAGS = -w -endif # Due to recursion, we must skip empty.o. # The empty.o file is created in the make process in order to determine # the target endianness and word size. It is made before all other C # files, including recordmcount. sub_cmd_record_mcount = \ if [ $(@) != "scripts/mod/empty.o" ]; then \ - $(objtree)/scripts/recordmcount $(RECORDMCOUNT_FLAGS) "$(@)"; \ + $(objtree)/scripts/recordmcount "$(@)"; \ fi; -recordmcount_source := $(srctree)/scripts/recordmcount.c \ - $(srctree)/scripts/recordmcount.h else sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(CONFIG_CPU_BIG_ENDIAN),big,little)" \ @@ -264,7 +259,6 @@ sub_cmd_record_mcount = set -e ; perl $(srctree)/scripts/recordmcount.pl "$(ARCH "$(OBJDUMP)" "$(OBJCOPY)" "$(CC) $(KBUILD_CFLAGS)" \ "$(LD)" "$(NM)" "$(RM)" "$(MV)" \ "$(if $(part-of-module),1,0)" "$(@)"; -recordmcount_source := $(srctree)/scripts/recordmcount.pl endif cmd_record_mcount = \ if [ "$(findstring -pg,$(_c_flags))" = "-pg" ]; then \ @@ -285,13 +279,13 @@ define rule_cc_o_c endef # Built-in and composite module parts -$(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE +$(obj)/%.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) # Single-part modules are special since we need to mark them in $(MODVERDIR) -$(single-used-m): $(obj)/%.o: $(src)/%.c $(recordmcount_source) FORCE +$(single-used-m): $(obj)/%.o: $(src)/%.c FORCE $(call cmd,force_checksrc) $(call if_changed_rule,cc_o_c) @{ echo $(@:.o=.ko); echo $@; } > $(MODVERDIR)/$(@F:.o=.mod) diff --git a/trunk/scripts/recordmcount.c b/trunk/scripts/recordmcount.c index ee52cb8e17ad..f9f6f52db772 100644 --- a/trunk/scripts/recordmcount.c +++ b/trunk/scripts/recordmcount.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -40,7 +39,6 @@ static char gpfx; /* prefix for global symbol name (sometimes '_') */ static struct stat sb; /* Remember .st_size, etc. */ static jmp_buf jmpenv; /* setjmp/longjmp per-file error escape */ static const char *altmcount; /* alternate mcount symbol name */ -static int warn_on_notrace_sect; /* warn when section has mcount not being recorded */ /* setjmp() return values */ enum { @@ -80,7 +78,7 @@ static off_t ulseek(int const fd, off_t const offset, int const whence) { off_t const w = lseek(fd, offset, whence); - if (w == (off_t)-1) { + if ((off_t)-1 == w) { perror("lseek"); fail_file(); } @@ -113,41 +111,13 @@ static void * umalloc(size_t size) { void *const addr = malloc(size); - if (addr == 0) { + if (0 == addr) { fprintf(stderr, "malloc failed: %zu bytes\n", size); fail_file(); } return addr; } -static unsigned char ideal_nop5_x86_64[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 }; -static unsigned char ideal_nop5_x86_32[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 }; -static unsigned char *ideal_nop; - -static char rel_type_nop; - -static int (*make_nop)(void *map, size_t const offset); - -static int make_nop_x86(void *map, size_t const offset) -{ - uint32_t *ptr; - unsigned char *op; - - /* Confirm we have 0xe8 0x0 0x0 0x0 0x0 */ - ptr = map + offset; - if (*ptr != 0) - return -1; - - op = map + offset - 1; - if (*op != 0xe8) - return -1; - - /* convert to nop */ - ulseek(fd_map, offset - 1, SEEK_SET); - uwrite(fd_map, ideal_nop, 5); - return 0; -} - /* * Get the whole file as a programming convenience in order to avoid * malloc+lseek+read+free of many pieces. If successful, then mmap @@ -166,7 +136,7 @@ static void *mmap_file(char const *fname) void *addr; fd_map = open(fname, O_RDWR); - if (fd_map < 0 || fstat(fd_map, &sb) < 0) { + if (0 > fd_map || 0 > fstat(fd_map, &sb)) { perror(fname); fail_file(); } @@ -177,7 +147,7 @@ static void *mmap_file(char const *fname) addr = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd_map, 0); mmap_failed = 0; - if (addr == MAP_FAILED) { + if (MAP_FAILED == addr) { mmap_failed = 1; addr = umalloc(sb.st_size); uread(fd_map, addr, sb.st_size); @@ -236,13 +206,12 @@ static uint32_t (*w2)(uint16_t); static int is_mcounted_section_name(char const *const txtname) { - return strcmp(".text", txtname) == 0 || - strcmp(".ref.text", txtname) == 0 || - strcmp(".sched.text", txtname) == 0 || - strcmp(".spinlock.text", txtname) == 0 || - strcmp(".irqentry.text", txtname) == 0 || - strcmp(".kprobes.text", txtname) == 0 || - strcmp(".text.unlikely", txtname) == 0; + return 0 == strcmp(".text", txtname) || + 0 == strcmp(".ref.text", txtname) || + 0 == strcmp(".sched.text", txtname) || + 0 == strcmp(".spinlock.text", txtname) || + 0 == strcmp(".irqentry.text", txtname) || + 0 == strcmp(".text.unlikely", txtname); } /* 32 bit and 64 bit are very similar */ @@ -295,48 +264,43 @@ do_file(char const *const fname) w8 = w8nat; switch (ehdr->e_ident[EI_DATA]) { static unsigned int const endian = 1; - default: + default: { fprintf(stderr, "unrecognized ELF data encoding %d: %s\n", ehdr->e_ident[EI_DATA], fname); fail_file(); - break; - case ELFDATA2LSB: - if (*(unsigned char const *)&endian != 1) { + } break; + case ELFDATA2LSB: { + if (1 != *(unsigned char const *)&endian) { /* main() is big endian, file.o is little endian. */ w = w4rev; w2 = w2rev; w8 = w8rev; } - break; - case ELFDATA2MSB: - if (*(unsigned char const *)&endian != 0) { + } break; + case ELFDATA2MSB: { + if (0 != *(unsigned char const *)&endian) { /* main() is little endian, file.o is big endian. */ w = w4rev; w2 = w2rev; w8 = w8rev; } - break; + } break; } /* end switch */ - if (memcmp(ELFMAG, ehdr->e_ident, SELFMAG) != 0 - || w2(ehdr->e_type) != ET_REL - || ehdr->e_ident[EI_VERSION] != EV_CURRENT) { + if (0 != memcmp(ELFMAG, ehdr->e_ident, SELFMAG) + || ET_REL != w2(ehdr->e_type) + || EV_CURRENT != ehdr->e_ident[EI_VERSION]) { fprintf(stderr, "unrecognized ET_REL file %s\n", fname); fail_file(); } gpfx = 0; switch (w2(ehdr->e_machine)) { - default: + default: { fprintf(stderr, "unrecognized e_machine %d %s\n", w2(ehdr->e_machine), fname); fail_file(); - break; - case EM_386: - reltype = R_386_32; - make_nop = make_nop_x86; - ideal_nop = ideal_nop5_x86_32; - mcount_adjust_32 = -1; - break; + } break; + case EM_386: reltype = R_386_32; break; case EM_ARM: reltype = R_ARM_ABS32; altmcount = "__gnu_mcount_nc"; break; @@ -347,91 +311,67 @@ do_file(char const *const fname) case EM_S390: /* reltype: e_class */ gpfx = '_'; break; case EM_SH: reltype = R_SH_DIR32; break; case EM_SPARCV9: reltype = R_SPARC_64; gpfx = '_'; break; - case EM_X86_64: - make_nop = make_nop_x86; - ideal_nop = ideal_nop5_x86_64; - reltype = R_X86_64_64; - mcount_adjust_64 = -1; - break; + case EM_X86_64: reltype = R_X86_64_64; break; } /* end switch */ switch (ehdr->e_ident[EI_CLASS]) { - default: + default: { fprintf(stderr, "unrecognized ELF class %d %s\n", ehdr->e_ident[EI_CLASS], fname); fail_file(); - break; - case ELFCLASS32: - if (w2(ehdr->e_ehsize) != sizeof(Elf32_Ehdr) - || w2(ehdr->e_shentsize) != sizeof(Elf32_Shdr)) { + } break; + case ELFCLASS32: { + if (sizeof(Elf32_Ehdr) != w2(ehdr->e_ehsize) + || sizeof(Elf32_Shdr) != w2(ehdr->e_shentsize)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); fail_file(); } - if (w2(ehdr->e_machine) == EM_S390) { + if (EM_S390 == w2(ehdr->e_machine)) reltype = R_390_32; - mcount_adjust_32 = -4; - } - if (w2(ehdr->e_machine) == EM_MIPS) { + if (EM_MIPS == w2(ehdr->e_machine)) { reltype = R_MIPS_32; is_fake_mcount32 = MIPS32_is_fake_mcount; } do32(ehdr, fname, reltype); - break; + } break; case ELFCLASS64: { Elf64_Ehdr *const ghdr = (Elf64_Ehdr *)ehdr; - if (w2(ghdr->e_ehsize) != sizeof(Elf64_Ehdr) - || w2(ghdr->e_shentsize) != sizeof(Elf64_Shdr)) { + if (sizeof(Elf64_Ehdr) != w2(ghdr->e_ehsize) + || sizeof(Elf64_Shdr) != w2(ghdr->e_shentsize)) { fprintf(stderr, "unrecognized ET_REL file: %s\n", fname); fail_file(); } - if (w2(ghdr->e_machine) == EM_S390) { + if (EM_S390 == w2(ghdr->e_machine)) reltype = R_390_64; - mcount_adjust_64 = -8; - } - if (w2(ghdr->e_machine) == EM_MIPS) { + if (EM_MIPS == w2(ghdr->e_machine)) { reltype = R_MIPS_64; Elf64_r_sym = MIPS64_r_sym; Elf64_r_info = MIPS64_r_info; is_fake_mcount64 = MIPS64_is_fake_mcount; } do64(ghdr, fname, reltype); - break; - } + } break; } /* end switch */ cleanup(); } int -main(int argc, char *argv[]) +main(int argc, char const *argv[]) { const char ftrace[] = "/ftrace.o"; int ftrace_size = sizeof(ftrace) - 1; int n_error = 0; /* gcc-4.3.0 false positive complaint */ - int c; - int i; - - while ((c = getopt(argc, argv, "w")) >= 0) { - switch (c) { - case 'w': - warn_on_notrace_sect = 1; - break; - default: - fprintf(stderr, "usage: recordmcount [-w] file.o...\n"); - return 0; - } - } - if ((argc - optind) < 1) { - fprintf(stderr, "usage: recordmcount [-w] file.o...\n"); + if (argc <= 1) { + fprintf(stderr, "usage: recordmcount file.o...\n"); return 0; } /* Process each file in turn, allowing deep failure. */ - for (i = optind; i < argc; i++) { - char *file = argv[i]; + for (--argc, ++argv; 0 < argc; --argc, ++argv) { int const sjval = setjmp(jmpenv); int len; @@ -440,29 +380,29 @@ main(int argc, char *argv[]) * function but does not call it. Since ftrace.o should * not be traced anyway, we just skip it. */ - len = strlen(file); + len = strlen(argv[0]); if (len >= ftrace_size && - strcmp(file + (len - ftrace_size), ftrace) == 0) + strcmp(argv[0] + (len - ftrace_size), ftrace) == 0) continue; switch (sjval) { - default: - fprintf(stderr, "internal error: %s\n", file); + default: { + fprintf(stderr, "internal error: %s\n", argv[0]); exit(1); - break; - case SJ_SETJMP: /* normal sequence */ + } break; + case SJ_SETJMP: { /* normal sequence */ /* Avoid problems if early cleanup() */ fd_map = -1; ehdr_curr = NULL; mmap_failed = 1; - do_file(file); - break; - case SJ_FAIL: /* error in do_file or below */ + do_file(argv[0]); + } break; + case SJ_FAIL: { /* error in do_file or below */ ++n_error; - break; - case SJ_SUCCEED: /* premature success */ + } break; + case SJ_SUCCEED: { /* premature success */ /* do nothing */ - break; + } break; } /* end switch */ } return !!n_error; diff --git a/trunk/scripts/recordmcount.h b/trunk/scripts/recordmcount.h index 4be60364a405..baf187bee983 100644 --- a/trunk/scripts/recordmcount.h +++ b/trunk/scripts/recordmcount.h @@ -22,15 +22,11 @@ #undef is_fake_mcount #undef fn_is_fake_mcount #undef MIPS_is_fake_mcount -#undef mcount_adjust #undef sift_rel_mcount -#undef nop_mcount #undef find_secsym_ndx #undef __has_rel_mcount #undef has_rel_mcount #undef tot_relsize -#undef get_mcountsym -#undef get_sym_str_and_relp #undef do_func #undef Elf_Addr #undef Elf_Ehdr @@ -53,18 +49,14 @@ #ifdef RECORD_MCOUNT_64 # define append_func append64 # define sift_rel_mcount sift64_rel_mcount -# define nop_mcount nop_mcount_64 # define find_secsym_ndx find64_secsym_ndx # define __has_rel_mcount __has64_rel_mcount # define has_rel_mcount has64_rel_mcount # define tot_relsize tot64_relsize -# define get_sym_str_and_relp get_sym_str_and_relp_64 # define do_func do64 -# define get_mcountsym get_mcountsym_64 # define is_fake_mcount is_fake_mcount64 # define fn_is_fake_mcount fn_is_fake_mcount64 # define MIPS_is_fake_mcount MIPS64_is_fake_mcount -# define mcount_adjust mcount_adjust_64 # define Elf_Addr Elf64_Addr # define Elf_Ehdr Elf64_Ehdr # define Elf_Shdr Elf64_Shdr @@ -85,18 +77,14 @@ #else # define append_func append32 # define sift_rel_mcount sift32_rel_mcount -# define nop_mcount nop_mcount_32 # define find_secsym_ndx find32_secsym_ndx # define __has_rel_mcount __has32_rel_mcount # define has_rel_mcount has32_rel_mcount # define tot_relsize tot32_relsize -# define get_sym_str_and_relp get_sym_str_and_relp_32 # define do_func do32 -# define get_mcountsym get_mcountsym_32 # define is_fake_mcount is_fake_mcount32 # define fn_is_fake_mcount fn_is_fake_mcount32 # define MIPS_is_fake_mcount MIPS32_is_fake_mcount -# define mcount_adjust mcount_adjust_32 # define Elf_Addr Elf32_Addr # define Elf_Ehdr Elf32_Ehdr # define Elf_Shdr Elf32_Shdr @@ -135,8 +123,6 @@ static void fn_ELF_R_INFO(Elf_Rel *const rp, unsigned sym, unsigned type) } static void (*Elf_r_info)(Elf_Rel *const rp, unsigned sym, unsigned type) = fn_ELF_R_INFO; -static int mcount_adjust = 0; - /* * MIPS mcount long call has 2 _mcount symbols, only the position of the 1st * _mcount symbol is needed for dynamic function tracer, with it, to disable @@ -248,49 +234,6 @@ static void append_func(Elf_Ehdr *const ehdr, uwrite(fd_map, ehdr, sizeof(*ehdr)); } -static unsigned get_mcountsym(Elf_Sym const *const sym0, - Elf_Rel const *relp, - char const *const str0) -{ - unsigned mcountsym = 0; - - Elf_Sym const *const symp = - &sym0[Elf_r_sym(relp)]; - char const *symname = &str0[w(symp->st_name)]; - char const *mcount = gpfx == '_' ? "_mcount" : "mcount"; - - if (symname[0] == '.') - ++symname; /* ppc64 hack */ - if (strcmp(mcount, symname) == 0 || - (altmcount && strcmp(altmcount, symname) == 0)) - mcountsym = Elf_r_sym(relp); - - return mcountsym; -} - -static void get_sym_str_and_relp(Elf_Shdr const *const relhdr, - Elf_Ehdr const *const ehdr, - Elf_Sym const **sym0, - char const **str0, - Elf_Rel const **relp) -{ - Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) - + (void *)ehdr); - unsigned const symsec_sh_link = w(relhdr->sh_link); - Elf_Shdr const *const symsec = &shdr0[symsec_sh_link]; - Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)]; - Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset) - + (void *)ehdr); - - *sym0 = (Elf_Sym const *)(_w(symsec->sh_offset) - + (void *)ehdr); - - *str0 = (char const *)(_w(strsec->sh_offset) - + (void *)ehdr); - - *relp = rel0; -} - /* * Look at the relocations in order to find the calls to mcount. * Accumulate the section offsets that are found, and their relocation info, @@ -307,27 +250,47 @@ static uint_t *sift_rel_mcount(uint_t *mlocp, { uint_t *const mloc0 = mlocp; Elf_Rel *mrelp = *mrelpp; - Elf_Sym const *sym0; - char const *str0; - Elf_Rel const *relp; + Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) + + (void *)ehdr); + unsigned const symsec_sh_link = w(relhdr->sh_link); + Elf_Shdr const *const symsec = &shdr0[symsec_sh_link]; + Elf_Sym const *const sym0 = (Elf_Sym const *)(_w(symsec->sh_offset) + + (void *)ehdr); + + Elf_Shdr const *const strsec = &shdr0[w(symsec->sh_link)]; + char const *const str0 = (char const *)(_w(strsec->sh_offset) + + (void *)ehdr); + + Elf_Rel const *const rel0 = (Elf_Rel const *)(_w(relhdr->sh_offset) + + (void *)ehdr); unsigned rel_entsize = _w(relhdr->sh_entsize); unsigned const nrel = _w(relhdr->sh_size) / rel_entsize; + Elf_Rel const *relp = rel0; + unsigned mcountsym = 0; unsigned t; - get_sym_str_and_relp(relhdr, ehdr, &sym0, &str0, &relp); - for (t = nrel; t; --t) { - if (!mcountsym) - mcountsym = get_mcountsym(sym0, relp, str0); + if (!mcountsym) { + Elf_Sym const *const symp = + &sym0[Elf_r_sym(relp)]; + char const *symname = &str0[w(symp->st_name)]; + char const *mcount = '_' == gpfx ? "_mcount" : "mcount"; + + if ('.' == symname[0]) + ++symname; /* ppc64 hack */ + if (0 == strcmp(mcount, symname) || + (altmcount && 0 == strcmp(altmcount, symname))) + mcountsym = Elf_r_sym(relp); + } if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { - uint_t const addend = - _w(_w(relp->r_offset) - recval + mcount_adjust); + uint_t const addend = _w(_w(relp->r_offset) - recval); + mrelp->r_offset = _w(offbase + ((void *)mlocp - (void *)mloc0)); Elf_r_info(mrelp, recsym, reltype); - if (rel_entsize == sizeof(Elf_Rela)) { + if (sizeof(Elf_Rela) == rel_entsize) { ((Elf_Rela *)mrelp)->r_addend = addend; *mlocp++ = 0; } else @@ -341,63 +304,6 @@ static uint_t *sift_rel_mcount(uint_t *mlocp, return mlocp; } -/* - * Read the relocation table again, but this time its called on sections - * that are not going to be traced. The mcount calls here will be converted - * into nops. - */ -static void nop_mcount(Elf_Shdr const *const relhdr, - Elf_Ehdr const *const ehdr, - const char *const txtname) -{ - Elf_Shdr *const shdr0 = (Elf_Shdr *)(_w(ehdr->e_shoff) - + (void *)ehdr); - Elf_Sym const *sym0; - char const *str0; - Elf_Rel const *relp; - Elf_Shdr const *const shdr = &shdr0[w(relhdr->sh_info)]; - unsigned rel_entsize = _w(relhdr->sh_entsize); - unsigned const nrel = _w(relhdr->sh_size) / rel_entsize; - unsigned mcountsym = 0; - unsigned t; - int once = 0; - - get_sym_str_and_relp(relhdr, ehdr, &sym0, &str0, &relp); - - for (t = nrel; t; --t) { - int ret = -1; - - if (!mcountsym) - mcountsym = get_mcountsym(sym0, relp, str0); - - if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) { - if (make_nop) - ret = make_nop((void *)ehdr, shdr->sh_offset + relp->r_offset); - if (warn_on_notrace_sect && !once) { - printf("Section %s has mcount callers being ignored\n", - txtname); - once = 1; - /* just warn? */ - if (!make_nop) - return; - } - } - - /* - * If we successfully removed the mcount, mark the relocation - * as a nop (don't do anything with it). - */ - if (!ret) { - Elf_Rel rel; - rel = *(Elf_Rel *)relp; - Elf_r_info(&rel, Elf_r_sym(relp), rel_type_nop); - ulseek(fd_map, (void *)relp - (void *)ehdr, SEEK_SET); - uwrite(fd_map, &rel, sizeof(rel)); - } - relp = (Elf_Rel const *)(rel_entsize + (void *)relp); - } -} - /* * Find a symbol in the given section, to be used as the base for relocating @@ -448,13 +354,13 @@ __has_rel_mcount(Elf_Shdr const *const relhdr, /* is SHT_REL or SHT_RELA */ Elf_Shdr const *const txthdr = &shdr0[w(relhdr->sh_info)]; char const *const txtname = &shstrtab[w(txthdr->sh_name)]; - if (strcmp("__mcount_loc", txtname) == 0) { + if (0 == strcmp("__mcount_loc", txtname)) { fprintf(stderr, "warning: __mcount_loc already exists: %s\n", fname); succeed_file(); } - if (w(txthdr->sh_type) != SHT_PROGBITS || - !(w(txthdr->sh_flags) & SHF_EXECINSTR)) + if (SHT_PROGBITS != w(txthdr->sh_type) || + !is_mcounted_section_name(txtname)) return NULL; return txtname; } @@ -464,7 +370,7 @@ static char const *has_rel_mcount(Elf_Shdr const *const relhdr, char const *const shstrtab, char const *const fname) { - if (w(relhdr->sh_type) != SHT_REL && w(relhdr->sh_type) != SHT_RELA) + if (SHT_REL != w(relhdr->sh_type) && SHT_RELA != w(relhdr->sh_type)) return NULL; return __has_rel_mcount(relhdr, shdr0, shstrtab, fname); } @@ -477,11 +383,9 @@ static unsigned tot_relsize(Elf_Shdr const *const shdr0, { unsigned totrelsz = 0; Elf_Shdr const *shdrp = shdr0; - char const *txtname; for (; nhdr; --nhdr, ++shdrp) { - txtname = has_rel_mcount(shdrp, shdr0, shstrtab, fname); - if (txtname && is_mcounted_section_name(txtname)) + if (has_rel_mcount(shdrp, shdr0, shstrtab, fname)) totrelsz += _w(shdrp->sh_size); } return totrelsz; @@ -517,7 +421,7 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) for (relhdr = shdr0, k = nhdr; k; --k, ++relhdr) { char const *const txtname = has_rel_mcount(relhdr, shdr0, shstrtab, fname); - if (txtname && is_mcounted_section_name(txtname)) { + if (txtname) { uint_t recval = 0; unsigned const recsym = find_secsym_ndx( w(relhdr->sh_info), txtname, &recval, @@ -528,12 +432,6 @@ do_func(Elf_Ehdr *const ehdr, char const *const fname, unsigned const reltype) mlocp = sift_rel_mcount(mlocp, (void *)mlocp - (void *)mloc0, &mrelp, relhdr, ehdr, recsym, recval, reltype); - } else if (txtname && (warn_on_notrace_sect || make_nop)) { - /* - * This section is ignored by ftrace, but still - * has mcount calls. Convert them to nops now. - */ - nop_mcount(relhdr, ehdr, txtname); } } if (mloc0 != mlocp) { diff --git a/trunk/scripts/recordmcount.pl b/trunk/scripts/recordmcount.pl index 858966ab019c..4be0deea71ca 100755 --- a/trunk/scripts/recordmcount.pl +++ b/trunk/scripts/recordmcount.pl @@ -134,7 +134,6 @@ ".sched.text" => 1, ".spinlock.text" => 1, ".irqentry.text" => 1, - ".kprobes.text" => 1, ".text.unlikely" => 1, ); @@ -223,7 +222,6 @@ sub check_objcopy $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\smcount([+-]0x[0-9a-zA-Z]+)?\$"; $type = ".quad"; $alignment = 8; - $mcount_adjust = -1; # force flags for this arch $ld .= " -m elf_x86_64"; @@ -233,7 +231,6 @@ sub check_objcopy } elsif ($arch eq "i386") { $alignment = 4; - $mcount_adjust = -1; # force flags for this arch $ld .= " -m elf_i386"; @@ -243,14 +240,12 @@ sub check_objcopy } elsif ($arch eq "s390" && $bits == 32) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_32\\s+_mcount\$"; - $mcount_adjust = -4; $alignment = 4; $ld .= " -m elf_s390"; $cc .= " -m31"; } elsif ($arch eq "s390" && $bits == 64) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; - $mcount_adjust = -8; $alignment = 8; $type = ".quad"; $ld .= " -m elf64_s390"; diff --git a/trunk/tools/perf/builtin-stat.c b/trunk/tools/perf/builtin-stat.c index a9f06715e44d..602c3c96fa1e 100644 --- a/trunk/tools/perf/builtin-stat.c +++ b/trunk/tools/perf/builtin-stat.c @@ -6,28 +6,24 @@ * * Sample output: - $ perf stat ./hackbench 10 + $ perf stat ~/hackbench 10 + Time: 0.104 - Time: 0.118 + Performance counter stats for '/home/mingo/hackbench': - Performance counter stats for './hackbench 10': + 1255.538611 task clock ticks # 10.143 CPU utilization factor + 54011 context switches # 0.043 M/sec + 385 CPU migrations # 0.000 M/sec + 17755 pagefaults # 0.014 M/sec + 3808323185 CPU cycles # 3033.219 M/sec + 1575111190 instructions # 1254.530 M/sec + 17367895 cache references # 13.833 M/sec + 7674421 cache misses # 6.112 M/sec - 1708.761321 task-clock # 11.037 CPUs utilized - 41,190 context-switches # 0.024 M/sec - 6,735 CPU-migrations # 0.004 M/sec - 17,318 page-faults # 0.010 M/sec - 5,205,202,243 cycles # 3.046 GHz - 3,856,436,920 stalled-cycles-frontend # 74.09% frontend cycles idle - 1,600,790,871 stalled-cycles-backend # 30.75% backend cycles idle - 2,603,501,247 instructions # 0.50 insns per cycle - # 1.48 stalled cycles per insn - 484,357,498 branches # 283.455 M/sec - 6,388,934 branch-misses # 1.32% of all branches - - 0.154822978 seconds time elapsed + Wall-clock time elapsed: 123.786620 msecs * - * Copyright (C) 2008-2011, Red Hat Inc, Ingo Molnar + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar * * Improvements and fixes by: * @@ -79,10 +75,22 @@ static struct perf_event_attr default_attrs[] = { }; /* - * Detailed stats (-d), covering the L1 and last level data caches: + * Detailed stats: */ static struct perf_event_attr detailed_attrs[] = { + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, + { .type = PERF_TYPE_HW_CACHE, .config = PERF_COUNT_HW_CACHE_L1D << 0 | @@ -108,69 +116,6 @@ static struct perf_event_attr detailed_attrs[] = { (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, }; -/* - * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: - */ -static struct perf_event_attr very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - -}; - -/* - * Very, very detailed stats (-d -d -d), adding prefetch events: - */ -static struct perf_event_attr very_very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; - - - struct perf_evlist *evsel_list; static bool system_wide = false; @@ -184,7 +129,7 @@ static pid_t target_pid = -1; static pid_t target_tid = -1; static pid_t child_pid = -1; static bool null_run = false; -static int detailed_run = 0; +static bool detailed_run = false; static bool sync_run = false; static bool big_num = true; static int big_num_opt = -1; @@ -261,10 +206,6 @@ struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; struct stats runtime_branches_stats[MAX_NR_CPUS]; struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; -struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; -struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; -struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; -struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; struct stats walltime_nsecs_stats; static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -321,14 +262,6 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count) update_stats(&runtime_cacherefs_stats[0], count[0]); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) update_stats(&runtime_l1_dcache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_l1_icache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[0], count[0]); } /* @@ -531,7 +464,7 @@ static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg) { double msecs = avg / 1e6; char cpustr[16] = { '\0', }; - const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s"; + const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-24s"; if (no_aggr) sprintf(cpustr, "CPU%*d%s", @@ -642,98 +575,6 @@ static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel __used, dou fprintf(stderr, " of all L1-dcache hits "); } -static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_l1_icache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all L1-icache hits "); -} - -static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_dtlb_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all dTLB cache hits "); -} - -static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_itlb_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all iTLB cache hits "); -} - -static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_ll_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = PERF_COLOR_NORMAL; - if (ratio > 20.0) - color = PERF_COLOR_RED; - else if (ratio > 10.0) - color = PERF_COLOR_MAGENTA; - else if (ratio > 5.0) - color = PERF_COLOR_YELLOW; - - fprintf(stderr, " # "); - color_fprintf(stderr, color, "%6.2f%%", ratio); - fprintf(stderr, " of all LL-cache hits "); -} - static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) { double total, ratio = 0.0; @@ -743,9 +584,9 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (csv_output) fmt = "%s%.0f%s%s"; else if (big_num) - fmt = "%s%'18.0f%s%-25s"; + fmt = "%s%'18.0f%s%-24s"; else - fmt = "%s%18.0f%s%-25s"; + fmt = "%s%18.0f%s%-24s"; if (no_aggr) sprintf(cpustr, "CPU%*d%s", @@ -775,7 +616,7 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) if (total && avg) { ratio = total / avg; - fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); + fprintf(stderr, "\n # %5.2f stalled cycles per insn", ratio); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && @@ -788,34 +629,6 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg) ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && runtime_l1_dcache_stats[cpu].n != 0) { print_l1_dcache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[cpu].n != 0) { - print_l1_icache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[cpu].n != 0) { - print_dtlb_cache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[cpu].n != 0) { - print_itlb_cache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[cpu].n != 0) { - print_ll_cache_misses(cpu, evsel, avg); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && runtime_cacherefs_stats[cpu].n != 0) { total = avg_stats(&runtime_cacherefs_stats[cpu]); @@ -891,7 +704,7 @@ static void print_counter_aggr(struct perf_evsel *counter) avg_enabled = avg_stats(&ps->res_stats[1]); avg_running = avg_stats(&ps->res_stats[2]); - fprintf(stderr, " [%5.2f%%]", 100 * avg_running / avg_enabled); + fprintf(stderr, " (%.2f%%)", 100 * avg_running / avg_enabled); } fprintf(stderr, "\n"); } @@ -974,12 +787,10 @@ static void print_stat(int argc, const char **argv) } if (!csv_output) { - if (!null_run) - fprintf(stderr, "\n"); - fprintf(stderr, " %17.9f seconds time elapsed", + fprintf(stderr, "\n"); + fprintf(stderr, " %18.9f seconds time elapsed", avg_stats(&walltime_nsecs_stats)/1e9); if (run_count > 1) { - fprintf(stderr, " "); print_noise_pct(stddev_stats(&walltime_nsecs_stats), avg_stats(&walltime_nsecs_stats)); } @@ -1043,7 +854,7 @@ static const struct option options[] = { "repeat command and print average + stddev (max: 100)"), OPT_BOOLEAN('n', "null", &null_run, "null run - dont start any counters"), - OPT_INCR('d', "detailed", &detailed_run, + OPT_BOOLEAN('d', "detailed", &detailed_run, "detailed run - start a lot of events"), OPT_BOOLEAN('S', "sync", &sync_run, "call sync() before starting a run"), @@ -1062,70 +873,6 @@ static const struct option options[] = { OPT_END() }; -/* - * Add default attributes, if there were no attributes specified or - * if -d/--detailed, -d -d or -d -d -d is used: - */ -static int add_default_attributes(void) -{ - struct perf_evsel *pos; - size_t attr_nr = 0; - size_t c; - - /* Set attrs if no event is selected and !null_run: */ - if (null_run) - return 0; - - if (!evsel_list->nr_entries) { - for (c = 0; c < ARRAY_SIZE(default_attrs); c++) { - pos = perf_evsel__new(default_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - attr_nr += c; - } - - /* Detailed events get appended to the event list: */ - - if (detailed_run < 1) - return 0; - - /* Append detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(detailed_attrs); c++) { - pos = perf_evsel__new(detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - attr_nr += c; - - if (detailed_run < 2) - return 0; - - /* Append very detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(very_detailed_attrs); c++) { - pos = perf_evsel__new(very_detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - - if (detailed_run < 3) - return 0; - - /* Append very, very detailed run extra attributes: */ - for (c = 0; c < ARRAY_SIZE(very_very_detailed_attrs); c++) { - pos = perf_evsel__new(very_very_detailed_attrs + c, c + attr_nr); - if (pos == NULL) - return -1; - perf_evlist__add(evsel_list, pos); - } - - - return 0; -} - int cmd_stat(int argc, const char **argv, const char *prefix __used) { struct perf_evsel *pos; @@ -1171,8 +918,28 @@ int cmd_stat(int argc, const char **argv, const char *prefix __used) usage_with_options(stat_usage, options); } - if (add_default_attributes()) - goto out; + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (detailed_run) { + size_t c; + + for (c = 0; c < ARRAY_SIZE(detailed_attrs); ++c) { + pos = perf_evsel__new(&detailed_attrs[c], c); + if (pos == NULL) + goto out; + perf_evlist__add(evsel_list, pos); + } + } + /* Set attrs and nr_counters if no event is selected and !null_run */ + if (!detailed_run && !null_run && !evsel_list->nr_entries) { + size_t c; + + for (c = 0; c < ARRAY_SIZE(default_attrs); ++c) { + pos = perf_evsel__new(&default_attrs[c], c); + if (pos == NULL) + goto out; + perf_evlist__add(evsel_list, pos); + } + } if (target_pid != -1) target_tid = target_pid; diff --git a/trunk/tools/perf/util/include/asm/alternative-asm.h b/trunk/tools/perf/util/include/asm/alternative-asm.h deleted file mode 100644 index 6789d788d494..000000000000 --- a/trunk/tools/perf/util/include/asm/alternative-asm.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _PERF_ASM_ALTERNATIVE_ASM_H -#define _PERF_ASM_ALTERNATIVE_ASM_H - -/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ - -#define altinstruction_entry # - -#endif