From b253cc8ab7ac7d212beacef456ae9a16c2d3bb2f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 17 Aug 2009 16:18:05 +0200 Subject: [PATCH] --- yaml --- r: 158278 b: refs/heads/master c: 520509436417901f30106e021e037c75dfe5386c h: refs/heads/master v: v3 --- [refs] | 2 +- trunk/arch/powerpc/include/asm/pgtable.h | 6 +- trunk/arch/powerpc/kernel/Makefile | 2 +- trunk/arch/powerpc/kernel/asm-offsets.c | 2 - trunk/arch/powerpc/kernel/exceptions-64s.S | 19 - trunk/arch/powerpc/kernel/perf_callchain.c | 527 ------------------ trunk/arch/powerpc/mm/slb.c | 37 +- trunk/arch/powerpc/mm/stab.c | 11 +- trunk/include/linux/perf_counter.h | 5 - trunk/kernel/perf_counter.c | 143 +---- trunk/tools/perf/Documentation/Makefile | 2 +- .../{examples.txt => perf-examples.txt} | 0 trunk/tools/perf/builtin-annotate.c | 14 - trunk/tools/perf/builtin-report.c | 32 +- trunk/tools/perf/builtin-top.c | 17 +- trunk/tools/perf/util/thread.c | 25 +- trunk/tools/perf/util/trace-event-info.c | 491 ++++++++++++++++ trunk/tools/perf/util/trace-event.h | 238 ++++++++ trunk/tools/perf/util/util.h | 4 + 19 files changed, 778 insertions(+), 799 deletions(-) delete mode 100644 trunk/arch/powerpc/kernel/perf_callchain.c rename trunk/tools/perf/Documentation/{examples.txt => perf-examples.txt} (100%) create mode 100644 trunk/tools/perf/util/trace-event-info.c create mode 100644 trunk/tools/perf/util/trace-event.h diff --git a/[refs] b/[refs] index 7e87d6e23051..11ac73e29747 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 119e7a22bb70d84849384e5113792cd45afa4f85 +refs/heads/master: 520509436417901f30106e021e037c75dfe5386c diff --git a/trunk/arch/powerpc/include/asm/pgtable.h b/trunk/arch/powerpc/include/asm/pgtable.h index 2a5da069714e..eb17da781128 100644 --- a/trunk/arch/powerpc/include/asm/pgtable.h +++ b/trunk/arch/powerpc/include/asm/pgtable.h @@ -104,8 +104,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, else pte_update(ptep, ~_PAGE_HASHPTE, pte_val(pte)); -#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) - /* Second case is 32-bit with 64-bit PTE. In this case, we +#elif defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT) && defined(CONFIG_SMP) + /* Second case is 32-bit with 64-bit PTE in SMP mode. In this case, we * can just store as long as we do the two halves in the right order * with a barrier in between. This is possible because we take care, * in the hash code, to pre-invalidate if the PTE was already hashed, @@ -140,7 +140,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, #else /* Anything else just stores the PTE normally. That covers all 64-bit - * cases, and 32-bit non-hash with 32-bit PTEs. + * cases, and 32-bit non-hash with 64-bit PTEs in UP mode */ *ptep = pte; #endif diff --git a/trunk/arch/powerpc/kernel/Makefile b/trunk/arch/powerpc/kernel/Makefile index 9619285f64e8..b73396b93905 100644 --- a/trunk/arch/powerpc/kernel/Makefile +++ b/trunk/arch/powerpc/kernel/Makefile @@ -97,7 +97,7 @@ obj64-$(CONFIG_AUDIT) += compat_audit.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o -obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o perf_callchain.o +obj-$(CONFIG_PPC_PERF_CTRS) += perf_counter.o obj64-$(CONFIG_PPC_PERF_CTRS) += power4-pmu.o ppc970-pmu.o power5-pmu.o \ power5+-pmu.o power6-pmu.o power7-pmu.o obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o diff --git a/trunk/arch/powerpc/kernel/asm-offsets.c b/trunk/arch/powerpc/kernel/asm-offsets.c index 197b15646eeb..561b64652311 100644 --- a/trunk/arch/powerpc/kernel/asm-offsets.c +++ b/trunk/arch/powerpc/kernel/asm-offsets.c @@ -67,8 +67,6 @@ int main(void) DEFINE(MMCONTEXTID, offsetof(struct mm_struct, context.id)); #ifdef CONFIG_PPC64 DEFINE(AUDITCONTEXT, offsetof(struct task_struct, audit_context)); - DEFINE(SIGSEGV, SIGSEGV); - DEFINE(NMI_MASK, NMI_MASK); #else DEFINE(THREAD_INFO, offsetof(struct task_struct, stack)); #endif /* CONFIG_PPC64 */ diff --git a/trunk/arch/powerpc/kernel/exceptions-64s.S b/trunk/arch/powerpc/kernel/exceptions-64s.S index 8ac85e08ffae..eb898112e577 100644 --- a/trunk/arch/powerpc/kernel/exceptions-64s.S +++ b/trunk/arch/powerpc/kernel/exceptions-64s.S @@ -729,11 +729,6 @@ BEGIN_FTR_SECTION bne- do_ste_alloc /* If so handle it */ END_FTR_SECTION_IFCLR(CPU_FTR_SLB) - clrrdi r11,r1,THREAD_SHIFT - lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */ - andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */ - bne 77f /* then don't call hash_page now */ - /* * On iSeries, we soft-disable interrupts here, then * hard-enable interrupts so that the hash_page code can spin on @@ -838,20 +833,6 @@ handle_page_fault: bl .low_hash_fault b .ret_from_except -/* - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ -77: bl .save_nvgprs - mr r4,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - li r5,SIGSEGV - bl .bad_page_fault - b .ret_from_except - /* here we have a segment miss */ do_ste_alloc: bl .ste_allocate /* try to insert stab entry */ diff --git a/trunk/arch/powerpc/kernel/perf_callchain.c b/trunk/arch/powerpc/kernel/perf_callchain.c deleted file mode 100644 index f74b62c67511..000000000000 --- a/trunk/arch/powerpc/kernel/perf_callchain.c +++ /dev/null @@ -1,527 +0,0 @@ -/* - * Performance counter callchain support - powerpc architecture code - * - * Copyright © 2009 Paul Mackerras, IBM Corporation. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_PPC64 -#include "ppc32.h" -#endif - -/* - * Store another value in a callchain_entry. - */ -static inline void callchain_store(struct perf_callchain_entry *entry, u64 ip) -{ - unsigned int nr = entry->nr; - - if (nr < PERF_MAX_STACK_DEPTH) { - entry->ip[nr] = ip; - entry->nr = nr + 1; - } -} - -/* - * Is sp valid as the address of the next kernel stack frame after prev_sp? - * The next frame may be in a different stack area but should not go - * back down in the same stack area. - */ -static int valid_next_sp(unsigned long sp, unsigned long prev_sp) -{ - if (sp & 0xf) - return 0; /* must be 16-byte aligned */ - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) - return 0; - if (sp >= prev_sp + STACK_FRAME_OVERHEAD) - return 1; - /* - * sp could decrease when we jump off an interrupt stack - * back to the regular process stack. - */ - if ((sp & ~(THREAD_SIZE - 1)) != (prev_sp & ~(THREAD_SIZE - 1))) - return 1; - return 0; -} - -static void perf_callchain_kernel(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - unsigned long sp, next_sp; - unsigned long next_ip; - unsigned long lr; - long level = 0; - unsigned long *fp; - - lr = regs->link; - sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_KERNEL); - callchain_store(entry, regs->nip); - - if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD)) - return; - - for (;;) { - fp = (unsigned long *) sp; - next_sp = fp[0]; - - if (next_sp == sp + STACK_INT_FRAME_SIZE && - fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { - /* - * This looks like an interrupt frame for an - * interrupt that occurred in the kernel - */ - regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD); - next_ip = regs->nip; - lr = regs->link; - level = 0; - callchain_store(entry, PERF_CONTEXT_KERNEL); - - } else { - if (level == 0) - next_ip = lr; - else - next_ip = fp[STACK_FRAME_LR_SAVE]; - - /* - * We can't tell which of the first two addresses - * we get are valid, but we can filter out the - * obviously bogus ones here. We replace them - * with 0 rather than removing them entirely so - * that userspace can tell which is which. - */ - if ((level == 1 && next_ip == lr) || - (level <= 1 && !kernel_text_address(next_ip))) - next_ip = 0; - - ++level; - } - - callchain_store(entry, next_ip); - if (!valid_next_sp(next_sp, sp)) - return; - sp = next_sp; - } -} - -#ifdef CONFIG_PPC64 - -#ifdef CONFIG_HUGETLB_PAGE -#define is_huge_psize(pagesize) (HPAGE_SHIFT && mmu_huge_psizes[pagesize]) -#else -#define is_huge_psize(pagesize) 0 -#endif - -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. - */ -static int read_user_stack_slow(void __user *ptr, void *ret, int nb) -{ - pgd_t *pgdir; - pte_t *ptep, pte; - int pagesize; - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - unsigned long pfn; - void *kaddr; - - pgdir = current->mm->pgd; - if (!pgdir) - return -EFAULT; - - pagesize = get_slice_psize(current->mm, addr); - - /* align address to page boundary */ - offset = addr & ((1ul << mmu_psize_defs[pagesize].shift) - 1); - addr -= offset; - - if (is_huge_psize(pagesize)) - ptep = huge_pte_offset(current->mm, addr); - else - ptep = find_linux_pte(pgdir, addr); - - if (ptep == NULL) - return -EFAULT; - pte = *ptep; - if (!pte_present(pte) || !(pte_val(pte) & _PAGE_USER)) - return -EFAULT; - pfn = pte_pfn(pte); - if (!page_is_ram(pfn)) - return -EFAULT; - - /* no highmem to worry about here */ - kaddr = pfn_to_kaddr(pfn); - memcpy(ret, kaddr + offset, nb); - return 0; -} - -static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || - ((unsigned long)ptr & 7)) - return -EFAULT; - - if (!__get_user_inatomic(*ret, ptr)) - return 0; - - return read_user_stack_slow(ptr, ret, 8); -} - -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - if (!__get_user_inatomic(*ret, ptr)) - return 0; - - return read_user_stack_slow(ptr, ret, 4); -} - -static inline int valid_user_sp(unsigned long sp, int is_64) -{ - if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) - return 0; - return 1; -} - -/* - * 64-bit user processes use the same stack frame for RT and non-RT signals. - */ -struct signal_frame_64 { - char dummy[__SIGNAL_FRAMESIZE]; - struct ucontext uc; - unsigned long unused[2]; - unsigned int tramp[6]; - struct siginfo *pinfo; - void *puc; - struct siginfo info; - char abigap[288]; -}; - -static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) -{ - if (nip == fp + offsetof(struct signal_frame_64, tramp)) - return 1; - if (vdso64_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) - return 1; - return 0; -} - -/* - * Do some sanity checking on the signal frame pointed to by sp. - * We check the pinfo and puc pointers in the frame. - */ -static int sane_signal_64_frame(unsigned long sp) -{ - struct signal_frame_64 __user *sf; - unsigned long pinfo, puc; - - sf = (struct signal_frame_64 __user *) sp; - if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || - read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) - return 0; - return pinfo == (unsigned long) &sf->info && - puc == (unsigned long) &sf->uc; -} - -static void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - unsigned long sp, next_sp; - unsigned long next_ip; - unsigned long lr; - long level = 0; - struct signal_frame_64 __user *sigframe; - unsigned long __user *fp, *uregs; - - next_ip = regs->nip; - lr = regs->link; - sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); - - for (;;) { - fp = (unsigned long __user *) sp; - if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) - return; - if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) - return; - - /* - * Note: the next_sp - sp >= signal frame size check - * is true when next_sp < sp, which can happen when - * transitioning from an alternate signal stack to the - * normal stack. - */ - if (next_sp - sp >= sizeof(struct signal_frame_64) && - (is_sigreturn_64_address(next_ip, sp) || - (level <= 1 && is_sigreturn_64_address(lr, sp))) && - sane_signal_64_frame(sp)) { - /* - * This looks like an signal frame - */ - sigframe = (struct signal_frame_64 __user *) sp; - uregs = sigframe->uc.uc_mcontext.gp_regs; - if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || - read_user_stack_64(&uregs[PT_LNK], &lr) || - read_user_stack_64(&uregs[PT_R1], &sp)) - return; - level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); - continue; - } - - if (level == 0) - next_ip = lr; - callchain_store(entry, next_ip); - ++level; - sp = next_sp; - } -} - -static inline int current_is_64bit(void) -{ - /* - * We can't use test_thread_flag() here because we may be on an - * interrupt stack, and the thread flags don't get copied over - * from the thread_info on the main stack to the interrupt stack. - */ - return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT); -} - -#else /* CONFIG_PPC64 */ -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - return __get_user_inatomic(*ret, ptr); -} - -static inline void perf_callchain_user_64(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ -} - -static inline int current_is_64bit(void) -{ - return 0; -} - -static inline int valid_user_sp(unsigned long sp, int is_64) -{ - if (!sp || (sp & 7) || sp > TASK_SIZE - 32) - return 0; - return 1; -} - -#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE -#define sigcontext32 sigcontext -#define mcontext32 mcontext -#define ucontext32 ucontext -#define compat_siginfo_t struct siginfo - -#endif /* CONFIG_PPC64 */ - -/* - * Layout for non-RT signal frames - */ -struct signal_frame_32 { - char dummy[__SIGNAL_FRAMESIZE32]; - struct sigcontext32 sctx; - struct mcontext32 mctx; - int abigap[56]; -}; - -/* - * Layout for RT signal frames - */ -struct rt_signal_frame_32 { - char dummy[__SIGNAL_FRAMESIZE32 + 16]; - compat_siginfo_t info; - struct ucontext32 uc; - int abigap[56]; -}; - -static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) -{ - if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) - return 1; - if (vdso32_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_sigtramp) - return 1; - return 0; -} - -static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) -{ - if (nip == fp + offsetof(struct rt_signal_frame_32, - uc.uc_mcontext.mc_pad)) - return 1; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) - return 1; - return 0; -} - -static int sane_signal_32_frame(unsigned int sp) -{ - struct signal_frame_32 __user *sf; - unsigned int regs; - - sf = (struct signal_frame_32 __user *) (unsigned long) sp; - if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) - return 0; - return regs == (unsigned long) &sf->mctx; -} - -static int sane_rt_signal_32_frame(unsigned int sp) -{ - struct rt_signal_frame_32 __user *sf; - unsigned int regs; - - sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; - if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) - return 0; - return regs == (unsigned long) &sf->uc.uc_mcontext; -} - -static unsigned int __user *signal_frame_32_regs(unsigned int sp, - unsigned int next_sp, unsigned int next_ip) -{ - struct mcontext32 __user *mctx = NULL; - struct signal_frame_32 __user *sf; - struct rt_signal_frame_32 __user *rt_sf; - - /* - * Note: the next_sp - sp >= signal frame size check - * is true when next_sp < sp, for example, when - * transitioning from an alternate signal stack to the - * normal stack. - */ - if (next_sp - sp >= sizeof(struct signal_frame_32) && - is_sigreturn_32_address(next_ip, sp) && - sane_signal_32_frame(sp)) { - sf = (struct signal_frame_32 __user *) (unsigned long) sp; - mctx = &sf->mctx; - } - - if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && - is_rt_sigreturn_32_address(next_ip, sp) && - sane_rt_signal_32_frame(sp)) { - rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; - mctx = &rt_sf->uc.uc_mcontext; - } - - if (!mctx) - return NULL; - return mctx->mc_gregs; -} - -static void perf_callchain_user_32(struct pt_regs *regs, - struct perf_callchain_entry *entry) -{ - unsigned int sp, next_sp; - unsigned int next_ip; - unsigned int lr; - long level = 0; - unsigned int __user *fp, *uregs; - - next_ip = regs->nip; - lr = regs->link; - sp = regs->gpr[1]; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); - - while (entry->nr < PERF_MAX_STACK_DEPTH) { - fp = (unsigned int __user *) (unsigned long) sp; - if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) - return; - if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) - return; - - uregs = signal_frame_32_regs(sp, next_sp, next_ip); - if (!uregs && level <= 1) - uregs = signal_frame_32_regs(sp, next_sp, lr); - if (uregs) { - /* - * This looks like an signal frame, so restart - * the stack trace with the values in it. - */ - if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || - read_user_stack_32(&uregs[PT_LNK], &lr) || - read_user_stack_32(&uregs[PT_R1], &sp)) - return; - level = 0; - callchain_store(entry, PERF_CONTEXT_USER); - callchain_store(entry, next_ip); - continue; - } - - if (level == 0) - next_ip = lr; - callchain_store(entry, next_ip); - ++level; - sp = next_sp; - } -} - -/* - * Since we can't get PMU interrupts inside a PMU interrupt handler, - * we don't need separate irq and nmi entries here. - */ -static DEFINE_PER_CPU(struct perf_callchain_entry, callchain); - -struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) -{ - struct perf_callchain_entry *entry = &__get_cpu_var(callchain); - - entry->nr = 0; - - if (current->pid == 0) /* idle task? */ - return entry; - - if (!user_mode(regs)) { - perf_callchain_kernel(regs, entry); - if (current->mm) - regs = task_pt_regs(current); - else - regs = NULL; - } - - if (regs) { - if (current_is_64bit()) - perf_callchain_user_64(regs, entry); - else - perf_callchain_user_32(regs, entry); - } - - return entry; -} diff --git a/trunk/arch/powerpc/mm/slb.c b/trunk/arch/powerpc/mm/slb.c index a685652effeb..5b7038f248b6 100644 --- a/trunk/arch/powerpc/mm/slb.c +++ b/trunk/arch/powerpc/mm/slb.c @@ -92,13 +92,15 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, : "memory" ); } -static void __slb_flush_and_rebolt(void) +void slb_flush_and_rebolt(void) { /* If you change this make sure you change SLB_NUM_BOLTED * appropriately too. */ unsigned long linear_llp, vmalloc_llp, lflags, vflags; unsigned long ksp_esid_data, ksp_vsid_data; + WARN_ON(!irqs_disabled()); + linear_llp = mmu_psize_defs[mmu_linear_psize].sllp; vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp; lflags = SLB_VSID_KERNEL | linear_llp; @@ -115,6 +117,12 @@ static void __slb_flush_and_rebolt(void) ksp_vsid_data = get_slb_shadow()->save_area[2].vsid; } + /* + * We can't take a PMU exception in the following code, so hard + * disable interrupts. + */ + hard_irq_disable(); + /* We need to do this all in asm, so we're sure we don't touch * the stack between the slbia and rebolting it. */ asm volatile("isync\n" @@ -131,21 +139,6 @@ static void __slb_flush_and_rebolt(void) : "memory"); } -void slb_flush_and_rebolt(void) -{ - - WARN_ON(!irqs_disabled()); - - /* - * We can't take a PMU exception in the following code, so hard - * disable interrupts. - */ - hard_irq_disable(); - - __slb_flush_and_rebolt(); - get_paca()->slb_cache_ptr = 0; -} - void slb_vmalloc_update(void) { unsigned long vflags; @@ -187,20 +180,12 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2) /* Flush all user entries from the segment table of the current processor. */ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) { - unsigned long offset; + unsigned long offset = get_paca()->slb_cache_ptr; unsigned long slbie_data = 0; unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; - /* - * We need interrupts hard-disabled here, not just soft-disabled, - * so that a PMU interrupt can't occur, which might try to access - * user memory (to get a stack trace) and possible cause an SLB miss - * which would update the slb_cache/slb_cache_ptr fields in the PACA. - */ - hard_irq_disable(); - offset = get_paca()->slb_cache_ptr; if (!cpu_has_feature(CPU_FTR_NO_SLBIE_B) && offset <= SLB_CACHE_ENTRIES) { int i; @@ -215,7 +200,7 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) } asm volatile("isync" : : : "memory"); } else { - __slb_flush_and_rebolt(); + slb_flush_and_rebolt(); } /* Workaround POWER5 < DD2.1 issue */ diff --git a/trunk/arch/powerpc/mm/stab.c b/trunk/arch/powerpc/mm/stab.c index ab5fb48b3e90..98cd1dc2ae75 100644 --- a/trunk/arch/powerpc/mm/stab.c +++ b/trunk/arch/powerpc/mm/stab.c @@ -164,7 +164,7 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) { struct stab_entry *stab = (struct stab_entry *) get_paca()->stab_addr; struct stab_entry *ste; - unsigned long offset; + unsigned long offset = __get_cpu_var(stab_cache_ptr); unsigned long pc = KSTK_EIP(tsk); unsigned long stack = KSTK_ESP(tsk); unsigned long unmapped_base; @@ -172,15 +172,6 @@ void switch_stab(struct task_struct *tsk, struct mm_struct *mm) /* Force previous translations to complete. DRENG */ asm volatile("isync" : : : "memory"); - /* - * We need interrupts hard-disabled here, not just soft-disabled, - * so that a PMU interrupt can't occur, which might try to access - * user memory (to get a stack trace) and possible cause an STAB miss - * which would update the stab_cache/stab_cache_ptr per-cpu variables. - */ - hard_irq_disable(); - - offset = __get_cpu_var(stab_cache_ptr); if (offset <= NR_STAB_CACHE_ENTRIES) { int i; diff --git a/trunk/include/linux/perf_counter.h b/trunk/include/linux/perf_counter.h index e022b847c90d..b53f7006cc4e 100644 --- a/trunk/include/linux/perf_counter.h +++ b/trunk/include/linux/perf_counter.h @@ -216,7 +216,6 @@ struct perf_counter_attr { #define PERF_COUNTER_IOC_REFRESH _IO ('$', 2) #define PERF_COUNTER_IOC_RESET _IO ('$', 3) #define PERF_COUNTER_IOC_PERIOD _IOW('$', 4, u64) -#define PERF_COUNTER_IOC_SET_OUTPUT _IO ('$', 5) enum perf_counter_ioc_flags { PERF_IOC_FLAG_GROUP = 1U << 0, @@ -416,9 +415,6 @@ enum perf_callchain_context { PERF_CONTEXT_MAX = (__u64)-4095, }; -#define PERF_FLAG_FD_NO_GROUP (1U << 0) -#define PERF_FLAG_FD_OUTPUT (1U << 1) - #ifdef __KERNEL__ /* * Kernel-internal data types and definitions: @@ -540,7 +536,6 @@ struct perf_counter { struct list_head sibling_list; int nr_siblings; struct perf_counter *group_leader; - struct perf_counter *output; const struct pmu *pmu; enum perf_counter_active_state state; diff --git a/trunk/kernel/perf_counter.c b/trunk/kernel/perf_counter.c index 53abcbefa0bf..534e20d14d63 100644 --- a/trunk/kernel/perf_counter.c +++ b/trunk/kernel/perf_counter.c @@ -469,8 +469,7 @@ static void update_counter_times(struct perf_counter *counter) struct perf_counter_context *ctx = counter->ctx; u64 run_end; - if (counter->state < PERF_COUNTER_STATE_INACTIVE || - counter->group_leader->state < PERF_COUNTER_STATE_INACTIVE) + if (counter->state < PERF_COUNTER_STATE_INACTIVE) return; counter->total_time_enabled = ctx->time - counter->tstamp_enabled; @@ -519,7 +518,7 @@ static void __perf_counter_disable(void *info) */ if (counter->state >= PERF_COUNTER_STATE_INACTIVE) { update_context_time(ctx); - update_group_times(counter); + update_counter_times(counter); if (counter == counter->group_leader) group_sched_out(counter, cpuctx, ctx); else @@ -574,7 +573,7 @@ static void perf_counter_disable(struct perf_counter *counter) * in, so we can change the state safely. */ if (counter->state == PERF_COUNTER_STATE_INACTIVE) { - update_group_times(counter); + update_counter_times(counter); counter->state = PERF_COUNTER_STATE_OFF; } @@ -851,27 +850,6 @@ perf_install_in_context(struct perf_counter_context *ctx, spin_unlock_irq(&ctx->lock); } -/* - * Put a counter into inactive state and update time fields. - * Enabling the leader of a group effectively enables all - * the group members that aren't explicitly disabled, so we - * have to update their ->tstamp_enabled also. - * Note: this works for group members as well as group leaders - * since the non-leader members' sibling_lists will be empty. - */ -static void __perf_counter_mark_enabled(struct perf_counter *counter, - struct perf_counter_context *ctx) -{ - struct perf_counter *sub; - - counter->state = PERF_COUNTER_STATE_INACTIVE; - counter->tstamp_enabled = ctx->time - counter->total_time_enabled; - list_for_each_entry(sub, &counter->sibling_list, list_entry) - if (sub->state >= PERF_COUNTER_STATE_INACTIVE) - sub->tstamp_enabled = - ctx->time - sub->total_time_enabled; -} - /* * Cross CPU call to enable a performance counter */ @@ -899,7 +877,8 @@ static void __perf_counter_enable(void *info) if (counter->state >= PERF_COUNTER_STATE_INACTIVE) goto unlock; - __perf_counter_mark_enabled(counter, ctx); + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = ctx->time - counter->total_time_enabled; /* * If the counter is in a group and isn't the group leader, @@ -992,9 +971,11 @@ static void perf_counter_enable(struct perf_counter *counter) * Since we have the lock this context can't be scheduled * in, so we can change the state safely. */ - if (counter->state == PERF_COUNTER_STATE_OFF) - __perf_counter_mark_enabled(counter, ctx); - + if (counter->state == PERF_COUNTER_STATE_OFF) { + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; + } out: spin_unlock_irq(&ctx->lock); } @@ -1498,7 +1479,9 @@ static void perf_counter_enable_on_exec(struct task_struct *task) counter->attr.enable_on_exec = 0; if (counter->state >= PERF_COUNTER_STATE_INACTIVE) continue; - __perf_counter_mark_enabled(counter, ctx); + counter->state = PERF_COUNTER_STATE_INACTIVE; + counter->tstamp_enabled = + ctx->time - counter->total_time_enabled; enabled = 1; } @@ -1520,21 +1503,10 @@ static void perf_counter_enable_on_exec(struct task_struct *task) */ static void __perf_counter_read(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_counter *counter = info; struct perf_counter_context *ctx = counter->ctx; unsigned long flags; - /* - * If this is a task context, we need to check whether it is - * the current task context of this cpu. If not it has been - * scheduled out before the smp call arrived. In that case - * counter->count would have been updated to a recent sample - * when the counter was scheduled out. - */ - if (ctx->task && cpuctx->task_ctx != ctx) - return; - local_irq_save(flags); if (ctx->is_active) update_context_time(ctx); @@ -1692,11 +1664,6 @@ static void free_counter(struct perf_counter *counter) atomic_dec(&nr_task_counters); } - if (counter->output) { - fput(counter->output->filp); - counter->output = NULL; - } - if (counter->destroy) counter->destroy(counter); @@ -1813,7 +1780,7 @@ static int perf_counter_read_group(struct perf_counter *counter, size += err; list_for_each_entry(sub, &leader->sibling_list, list_entry) { - err = perf_counter_read_entry(sub, read_format, + err = perf_counter_read_entry(counter, read_format, buf + size); if (err < 0) return err; @@ -1982,8 +1949,6 @@ static int perf_counter_period(struct perf_counter *counter, u64 __user *arg) return ret; } -int perf_counter_set_output(struct perf_counter *counter, int output_fd); - static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct perf_counter *counter = file->private_data; @@ -2007,9 +1972,6 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case PERF_COUNTER_IOC_PERIOD: return perf_counter_period(counter, (u64 __user *)arg); - case PERF_COUNTER_IOC_SET_OUTPUT: - return perf_counter_set_output(counter, arg); - default: return -ENOTTY; } @@ -2046,10 +2008,6 @@ int perf_counter_task_disable(void) return 0; } -#ifndef PERF_COUNTER_INDEX_OFFSET -# define PERF_COUNTER_INDEX_OFFSET 0 -#endif - static int perf_counter_index(struct perf_counter *counter) { if (counter->state != PERF_COUNTER_STATE_ACTIVE) @@ -2280,11 +2238,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) WARN_ON_ONCE(counter->ctx->parent_ctx); mutex_lock(&counter->mmap_mutex); - if (counter->output) { - ret = -EINVAL; - goto unlock; - } - if (atomic_inc_not_zero(&counter->mmap_count)) { if (nr_pages != counter->data->nr_pages) ret = -EINVAL; @@ -2670,7 +2623,6 @@ static int perf_output_begin(struct perf_output_handle *handle, struct perf_counter *counter, unsigned int size, int nmi, int sample) { - struct perf_counter *output_counter; struct perf_mmap_data *data; unsigned int offset, head; int have_lost; @@ -2680,17 +2632,13 @@ static int perf_output_begin(struct perf_output_handle *handle, u64 lost; } lost_event; - rcu_read_lock(); /* * For inherited counters we send all the output towards the parent. */ if (counter->parent) counter = counter->parent; - output_counter = rcu_dereference(counter->output); - if (output_counter) - counter = output_counter; - + rcu_read_lock(); data = rcu_dereference(counter->data); if (!data) goto out; @@ -4238,57 +4186,6 @@ static int perf_copy_attr(struct perf_counter_attr __user *uattr, goto out; } -int perf_counter_set_output(struct perf_counter *counter, int output_fd) -{ - struct perf_counter *output_counter = NULL; - struct file *output_file = NULL; - struct perf_counter *old_output; - int fput_needed = 0; - int ret = -EINVAL; - - if (!output_fd) - goto set; - - output_file = fget_light(output_fd, &fput_needed); - if (!output_file) - return -EBADF; - - if (output_file->f_op != &perf_fops) - goto out; - - output_counter = output_file->private_data; - - /* Don't chain output fds */ - if (output_counter->output) - goto out; - - /* Don't set an output fd when we already have an output channel */ - if (counter->data) - goto out; - - atomic_long_inc(&output_file->f_count); - -set: - mutex_lock(&counter->mmap_mutex); - old_output = counter->output; - rcu_assign_pointer(counter->output, output_counter); - mutex_unlock(&counter->mmap_mutex); - - if (old_output) { - /* - * we need to make sure no existing perf_output_*() - * is still referencing this counter. - */ - synchronize_rcu(); - fput(old_output->filp); - } - - ret = 0; -out: - fput_light(output_file, fput_needed); - return ret; -} - /** * sys_perf_counter_open - open a performance counter, associate it to a task/cpu * @@ -4311,7 +4208,7 @@ SYSCALL_DEFINE5(perf_counter_open, int ret; /* for future expandability... */ - if (flags & ~(PERF_FLAG_FD_NO_GROUP | PERF_FLAG_FD_OUTPUT)) + if (flags) return -EINVAL; ret = perf_copy_attr(attr_uptr, &attr); @@ -4339,7 +4236,7 @@ SYSCALL_DEFINE5(perf_counter_open, * Look up the group leader (we will attach this counter to it): */ group_leader = NULL; - if (group_fd != -1 && !(flags & PERF_FLAG_FD_NO_GROUP)) { + if (group_fd != -1) { ret = -EINVAL; group_file = fget_light(group_fd, &fput_needed); if (!group_file) @@ -4381,12 +4278,6 @@ SYSCALL_DEFINE5(perf_counter_open, if (!counter_file) goto err_free_put_context; - if (flags & PERF_FLAG_FD_OUTPUT) { - ret = perf_counter_set_output(counter, group_fd); - if (ret) - goto err_free_put_context; - } - counter->filp = counter_file; WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); diff --git a/trunk/tools/perf/Documentation/Makefile b/trunk/tools/perf/Documentation/Makefile index bdd3b7ecad0a..5457192e1b41 100644 --- a/trunk/tools/perf/Documentation/Makefile +++ b/trunk/tools/perf/Documentation/Makefile @@ -35,7 +35,7 @@ man7dir=$(mandir)/man7 # DESTDIR= ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = --unsafe +ASCIIDOC_EXTRA = MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = INSTALL?=install diff --git a/trunk/tools/perf/Documentation/examples.txt b/trunk/tools/perf/Documentation/perf-examples.txt similarity index 100% rename from trunk/tools/perf/Documentation/examples.txt rename to trunk/tools/perf/Documentation/perf-examples.txt diff --git a/trunk/tools/perf/builtin-annotate.c b/trunk/tools/perf/builtin-annotate.c index 4c7bc4436236..96d421f7161d 100644 --- a/trunk/tools/perf/builtin-annotate.c +++ b/trunk/tools/perf/builtin-annotate.c @@ -28,7 +28,6 @@ static char const *input_name = "perf.data"; static char default_sort_order[] = "comm,symbol"; static char *sort_order = default_sort_order; -static int force; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -630,13 +629,6 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)(event->header.size), event->fork.pid, event->fork.ppid); - /* - * A thread clone will have the same PID for both - * parent and child. - */ - if (thread == parent) - return 0; - if (!thread || !parent || thread__fork(thread, parent)) { dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); return -1; @@ -984,11 +976,6 @@ static int __cmd_annotate(void) exit(-1); } - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user or root\n", input_name); - exit(-1); - } - if (!input_stat.st_size) { fprintf(stderr, "zero-sized file, nothing to do!\n"); exit(0); @@ -1094,7 +1081,6 @@ static const struct option options[] = { "input file name"), OPT_STRING('s', "symbol", &sym_hist_filter, "symbol", "symbol to annotate"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, diff --git a/trunk/tools/perf/builtin-report.c b/trunk/tools/perf/builtin-report.c index ea6328a893cc..1e3ad22d53dc 100644 --- a/trunk/tools/perf/builtin-report.c +++ b/trunk/tools/perf/builtin-report.c @@ -37,7 +37,6 @@ static char *dso_list_str, *comm_list_str, *sym_list_str, static struct strlist *dso_list, *comm_list, *sym_list; static char *field_sep; -static int force; static int input; static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; @@ -666,27 +665,6 @@ static void dso__calc_col_width(struct dso *self) self->slen_calculated = 1; } -static int thread__set_comm_adjust(struct thread *self, const char *comm) -{ - int ret = thread__set_comm(self, comm); - - if (ret) - return ret; - - if (!col_width_list_str && !field_sep && - (!comm_list || strlist__has_entry(comm_list, comm))) { - unsigned int slen = strlen(comm); - - if (slen > comms__col_width) { - comms__col_width = slen; - threads__col_width = slen + 6; - } - } - - return 0; -} - - static struct symbol * resolve_symbol(struct thread *thread, struct map **mapp, struct dso **dsop, u64 *ipp) @@ -1078,7 +1056,7 @@ static void register_idle_thread(void) struct thread *thread = threads__findnew(0, &threads, &last_match); if (thread == NULL || - thread__set_comm_adjust(thread, "[idle]")) { + thread__set_comm(thread, "[idle]")) { fprintf(stderr, "problem inserting idle task.\n"); exit(-1); } @@ -1248,7 +1226,7 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) event->comm.comm, event->comm.pid); if (thread == NULL || - thread__set_comm_adjust(thread, event->comm.comm)) { + thread__set_comm(thread, event->comm.comm)) { dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); return -1; } @@ -1405,11 +1383,6 @@ static int __cmd_report(void) exit(-1); } - if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - fprintf(stderr, "file: %s not owned by current user or root\n", input_name); - exit(-1); - } - if (!input_stat.st_size) { fprintf(stderr, "zero-sized file, nothing to do!\n"); exit(0); @@ -1621,7 +1594,6 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), - OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, diff --git a/trunk/tools/perf/builtin-top.c b/trunk/tools/perf/builtin-top.c index 4002ccb36750..62b55ecab2c6 100644 --- a/trunk/tools/perf/builtin-top.c +++ b/trunk/tools/perf/builtin-top.c @@ -483,16 +483,11 @@ static void print_sym_table(void) if (nr_counters == 1) printf(" samples pcnt"); else - printf(" weight samples pcnt"); + printf(" weight samples pcnt"); - if (verbose) - printf(" RIP "); - printf(" kernel function\n"); - printf(" %s _______ _____", - nr_counters == 1 ? " " : "______"); - if (verbose) - printf(" ________________"); - printf(" _______________\n\n"); + printf(" RIP kernel function\n" + " ______ _______ _____ ________________ _______________\n\n" + ); for (nd = rb_first(&tmp); nd; nd = rb_next(nd)) { struct symbol *sym; @@ -513,9 +508,7 @@ static void print_sym_table(void) printf("%9.1f %10ld - ", syme->weight, syme->snap_count); percent_color_fprintf(stdout, "%4.1f%%", pcnt); - if (verbose) - printf(" - %016llx", sym->start); - printf(" : %s", sym->name); + printf(" - %016llx : %s", sym->start, sym->name); if (sym->module) printf("\t[%s]", sym->module->name); printf("\n"); diff --git a/trunk/tools/perf/util/thread.c b/trunk/tools/perf/util/thread.c index f98032c135c6..00c14b98d651 100644 --- a/trunk/tools/perf/util/thread.c +++ b/trunk/tools/perf/util/thread.c @@ -4,7 +4,6 @@ #include #include "thread.h" #include "util.h" -#include "debug.h" static struct thread *thread__new(pid_t pid) { @@ -86,27 +85,9 @@ void thread__insert_map(struct thread *self, struct map *map) list_for_each_entry_safe(pos, tmp, &self->maps, node) { if (map__overlap(pos, map)) { - if (verbose >= 2) { - printf("overlapping maps:\n"); - map__fprintf(map, stdout); - map__fprintf(pos, stdout); - } - - if (map->start <= pos->start && map->end > pos->start) - pos->start = map->end; - - if (map->end >= pos->end && map->start < pos->end) - pos->end = map->start; - - if (verbose >= 2) { - printf("after collision:\n"); - map__fprintf(pos, stdout); - } - - if (pos->start >= pos->end) { - list_del_init(&pos->node); - free(pos); - } + list_del_init(&pos->node); + /* XXX leaks dsos */ + free(pos); } } diff --git a/trunk/tools/perf/util/trace-event-info.c b/trunk/tools/perf/util/trace-event-info.c new file mode 100644 index 000000000000..78adff189bba --- /dev/null +++ b/trunk/tools/perf/util/trace-event-info.c @@ -0,0 +1,491 @@ +/* + * Copyright (C) 2008,2009, Steven Rostedt + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License (not later!) + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "trace-event.h" + + +#define VERSION "0.5" + +#define _STR(x) #x +#define STR(x) _STR(x) +#define MAX_PATH 256 + +#define TRACE_CTRL "tracing_on" +#define TRACE "trace" +#define AVAILABLE "available_tracers" +#define CURRENT "current_tracer" +#define ITER_CTRL "trace_options" +#define MAX_LATENCY "tracing_max_latency" + +unsigned int page_size; + +static const char *output_file = "trace.info"; +static int output_fd; + +struct event_list { + struct event_list *next; + const char *event; +}; + +struct events { + struct events *sibling; + struct events *children; + struct events *next; + char *name; +}; + + + +static void die(const char *fmt, ...) +{ + va_list ap; + int ret = errno; + + if (errno) + perror("trace-cmd"); + else + ret = -1; + + va_start(ap, fmt); + fprintf(stderr, " "); + vfprintf(stderr, fmt, ap); + va_end(ap); + + fprintf(stderr, "\n"); + exit(ret); +} + +void *malloc_or_die(unsigned int size) +{ + void *data; + + data = malloc(size); + if (!data) + die("malloc"); + return data; +} + +static const char *find_debugfs(void) +{ + static char debugfs[MAX_PATH+1]; + static int debugfs_found; + char type[100]; + FILE *fp; + + if (debugfs_found) + return debugfs; + + if ((fp = fopen("/proc/mounts","r")) == NULL) + die("Can't open /proc/mounts for read"); + + while (fscanf(fp, "%*s %" + STR(MAX_PATH) + "s %99s %*s %*d %*d\n", + debugfs, type) == 2) { + if (strcmp(type, "debugfs") == 0) + break; + } + fclose(fp); + + if (strcmp(type, "debugfs") != 0) + die("debugfs not mounted, please mount"); + + debugfs_found = 1; + + return debugfs; +} + +/* + * Finds the path to the debugfs/tracing + * Allocates the string and stores it. + */ +static const char *find_tracing_dir(void) +{ + static char *tracing; + static int tracing_found; + const char *debugfs; + + if (tracing_found) + return tracing; + + debugfs = find_debugfs(); + + tracing = malloc_or_die(strlen(debugfs) + 9); + + sprintf(tracing, "%s/tracing", debugfs); + + tracing_found = 1; + return tracing; +} + +static char *get_tracing_file(const char *name) +{ + const char *tracing; + char *file; + + tracing = find_tracing_dir(); + if (!tracing) + return NULL; + + file = malloc_or_die(strlen(tracing) + strlen(name) + 2); + + sprintf(file, "%s/%s", tracing, name); + return file; +} + +static void put_tracing_file(char *file) +{ + free(file); +} + +static ssize_t write_or_die(const void *buf, size_t len) +{ + int ret; + + ret = write(output_fd, buf, len); + if (ret < 0) + die("writing to '%s'", output_file); + + return ret; +} + +int bigendian(void) +{ + unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0}; + unsigned int *ptr; + + ptr = (unsigned int *)str; + return *ptr == 0x01020304; +} + +static unsigned long long copy_file_fd(int fd) +{ + unsigned long long size = 0; + char buf[BUFSIZ]; + int r; + + do { + r = read(fd, buf, BUFSIZ); + if (r > 0) { + size += r; + write_or_die(buf, r); + } + } while (r > 0); + + return size; +} + +static unsigned long long copy_file(const char *file) +{ + unsigned long long size = 0; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + die("Can't read '%s'", file); + size = copy_file_fd(fd); + close(fd); + + return size; +} + +static unsigned long get_size_fd(int fd) +{ + unsigned long long size = 0; + char buf[BUFSIZ]; + int r; + + do { + r = read(fd, buf, BUFSIZ); + if (r > 0) + size += r; + } while (r > 0); + + lseek(fd, 0, SEEK_SET); + + return size; +} + +static unsigned long get_size(const char *file) +{ + unsigned long long size = 0; + int fd; + + fd = open(file, O_RDONLY); + if (fd < 0) + die("Can't read '%s'", file); + size = get_size_fd(fd); + close(fd); + + return size; +} + +static void read_header_files(void) +{ + unsigned long long size, check_size; + char *path; + int fd; + + path = get_tracing_file("events/header_page"); + fd = open(path, O_RDONLY); + if (fd < 0) + die("can't read '%s'", path); + + /* unfortunately, you can not stat debugfs files for size */ + size = get_size_fd(fd); + + write_or_die("header_page", 12); + write_or_die(&size, 8); + check_size = copy_file_fd(fd); + if (size != check_size) + die("wrong size for '%s' size=%lld read=%lld", + path, size, check_size); + put_tracing_file(path); + + path = get_tracing_file("events/header_event"); + fd = open(path, O_RDONLY); + if (fd < 0) + die("can't read '%s'", path); + + size = get_size_fd(fd); + + write_or_die("header_event", 13); + write_or_die(&size, 8); + check_size = copy_file_fd(fd); + if (size != check_size) + die("wrong size for '%s'", path); + put_tracing_file(path); +} + +static void copy_event_system(const char *sys) +{ + unsigned long long size, check_size; + struct dirent *dent; + struct stat st; + char *format; + DIR *dir; + int count = 0; + int ret; + + dir = opendir(sys); + if (!dir) + die("can't read directory '%s'", sys); + + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); + sprintf(format, "%s/%s/format", sys, dent->d_name); + ret = stat(format, &st); + free(format); + if (ret < 0) + continue; + count++; + } + + write_or_die(&count, 4); + + rewinddir(dir); + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0) + continue; + format = malloc_or_die(strlen(sys) + strlen(dent->d_name) + 10); + sprintf(format, "%s/%s/format", sys, dent->d_name); + ret = stat(format, &st); + + if (ret >= 0) { + /* unfortunately, you can not stat debugfs files for size */ + size = get_size(format); + write_or_die(&size, 8); + check_size = copy_file(format); + if (size != check_size) + die("error in size of file '%s'", format); + } + + free(format); + } +} + +static void read_ftrace_files(void) +{ + char *path; + + path = get_tracing_file("events/ftrace"); + + copy_event_system(path); + + put_tracing_file(path); +} + +static void read_event_files(void) +{ + struct dirent *dent; + struct stat st; + char *path; + char *sys; + DIR *dir; + int count = 0; + int ret; + + path = get_tracing_file("events"); + + dir = opendir(path); + if (!dir) + die("can't read directory '%s'", path); + + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0 || + strcmp(dent->d_name, "ftrace") == 0) + continue; + sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); + sprintf(sys, "%s/%s", path, dent->d_name); + ret = stat(sys, &st); + free(sys); + if (ret < 0) + continue; + if (S_ISDIR(st.st_mode)) + count++; + } + + write_or_die(&count, 4); + + rewinddir(dir); + while ((dent = readdir(dir))) { + if (strcmp(dent->d_name, ".") == 0 || + strcmp(dent->d_name, "..") == 0 || + strcmp(dent->d_name, "ftrace") == 0) + continue; + sys = malloc_or_die(strlen(path) + strlen(dent->d_name) + 2); + sprintf(sys, "%s/%s", path, dent->d_name); + ret = stat(sys, &st); + if (ret >= 0) { + if (S_ISDIR(st.st_mode)) { + write_or_die(dent->d_name, strlen(dent->d_name) + 1); + copy_event_system(sys); + } + } + free(sys); + } + + put_tracing_file(path); +} + +static void read_proc_kallsyms(void) +{ + unsigned int size, check_size; + const char *path = "/proc/kallsyms"; + struct stat st; + int ret; + + ret = stat(path, &st); + if (ret < 0) { + /* not found */ + size = 0; + write_or_die(&size, 4); + return; + } + size = get_size(path); + write_or_die(&size, 4); + check_size = copy_file(path); + if (size != check_size) + die("error in size of file '%s'", path); + +} + +static void read_ftrace_printk(void) +{ + unsigned int size, check_size; + const char *path; + struct stat st; + int ret; + + path = get_tracing_file("printk_formats"); + ret = stat(path, &st); + if (ret < 0) { + /* not found */ + size = 0; + write_or_die(&size, 4); + return; + } + size = get_size(path); + write_or_die(&size, 4); + check_size = copy_file(path); + if (size != check_size) + die("error in size of file '%s'", path); + +} + +void read_tracing_data(void) +{ + char buf[BUFSIZ]; + + output_fd = open(output_file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); + if (output_fd < 0) + die("creating file '%s'", output_file); + + buf[0] = 23; + buf[1] = 8; + buf[2] = 68; + memcpy(buf + 3, "tracing", 7); + + write_or_die(buf, 10); + + write_or_die(VERSION, strlen(VERSION) + 1); + + /* save endian */ + if (bigendian()) + buf[0] = 1; + else + buf[0] = 0; + + write_or_die(buf, 1); + + /* save size of long */ + buf[0] = sizeof(long); + write_or_die(buf, 1); + + /* save page_size */ + page_size = getpagesize(); + write_or_die(&page_size, 4); + + read_header_files(); + read_ftrace_files(); + read_event_files(); + read_proc_kallsyms(); + read_ftrace_printk(); +} diff --git a/trunk/tools/perf/util/trace-event.h b/trunk/tools/perf/util/trace-event.h new file mode 100644 index 000000000000..3ddb8947be8a --- /dev/null +++ b/trunk/tools/perf/util/trace-event.h @@ -0,0 +1,238 @@ +#ifndef _PARSE_EVENTS_H +#define _PARSE_EVENTS_H + + +#define __unused __attribute__((unused)) + + +#ifndef PAGE_MASK +#define PAGE_MASK (page_size - 1) +#endif + +enum { + RINGBUF_TYPE_PADDING = 29, + RINGBUF_TYPE_TIME_EXTEND = 30, + RINGBUF_TYPE_TIME_STAMP = 31, +}; + +#ifndef TS_SHIFT +#define TS_SHIFT 27 +#endif + +#define NSECS_PER_SEC 1000000000ULL +#define NSECS_PER_USEC 1000ULL + +enum format_flags { + FIELD_IS_ARRAY = 1, + FIELD_IS_POINTER = 2, +}; + +struct format_field { + struct format_field *next; + char *type; + char *name; + int offset; + int size; + unsigned long flags; +}; + +struct format { + int nr_common; + int nr_fields; + struct format_field *common_fields; + struct format_field *fields; +}; + +struct print_arg_atom { + char *atom; +}; + +struct print_arg_string { + char *string; +}; + +struct print_arg_field { + char *name; + struct format_field *field; +}; + +struct print_flag_sym { + struct print_flag_sym *next; + char *value; + char *str; +}; + +struct print_arg_typecast { + char *type; + struct print_arg *item; +}; + +struct print_arg_flags { + struct print_arg *field; + char *delim; + struct print_flag_sym *flags; +}; + +struct print_arg_symbol { + struct print_arg *field; + struct print_flag_sym *symbols; +}; + +struct print_arg; + +struct print_arg_op { + char *op; + int prio; + struct print_arg *left; + struct print_arg *right; +}; + +struct print_arg_func { + char *name; + struct print_arg *args; +}; + +enum print_arg_type { + PRINT_NULL, + PRINT_ATOM, + PRINT_FIELD, + PRINT_FLAGS, + PRINT_SYMBOL, + PRINT_TYPE, + PRINT_STRING, + PRINT_OP, +}; + +struct print_arg { + struct print_arg *next; + enum print_arg_type type; + union { + struct print_arg_atom atom; + struct print_arg_field field; + struct print_arg_typecast typecast; + struct print_arg_flags flags; + struct print_arg_symbol symbol; + struct print_arg_func func; + struct print_arg_string string; + struct print_arg_op op; + }; +}; + +struct print_fmt { + char *format; + struct print_arg *args; +}; + +struct event { + struct event *next; + char *name; + int id; + int flags; + struct format format; + struct print_fmt print_fmt; +}; + +enum { + EVENT_FL_ISFTRACE = 1, + EVENT_FL_ISPRINT = 2, + EVENT_FL_ISBPRINT = 4, + EVENT_FL_ISFUNC = 8, + EVENT_FL_ISFUNCENT = 16, + EVENT_FL_ISFUNCRET = 32, +}; + +struct record { + unsigned long long ts; + int size; + void *data; +}; + +struct record *trace_peek_data(int cpu); +struct record *trace_read_data(int cpu); + +void parse_set_info(int nr_cpus, int long_sz); + +void trace_report(void); + +void *malloc_or_die(unsigned int size); + +void parse_cmdlines(char *file, int size); +void parse_proc_kallsyms(char *file, unsigned int size); +void parse_ftrace_printk(char *file, unsigned int size); + +void print_funcs(void); +void print_printk(void); + +int parse_ftrace_file(char *buf, unsigned long size); +int parse_event_file(char *buf, unsigned long size, char *system); +void print_event(int cpu, void *data, int size, unsigned long long nsecs, + char *comm); + +extern int file_bigendian; +extern int host_bigendian; + +int bigendian(void); + +static inline unsigned short __data2host2(unsigned short data) +{ + unsigned short swap; + + if (host_bigendian == file_bigendian) + return data; + + swap = ((data & 0xffULL) << 8) | + ((data & (0xffULL << 8)) >> 8); + + return swap; +} + +static inline unsigned int __data2host4(unsigned int data) +{ + unsigned int swap; + + if (host_bigendian == file_bigendian) + return data; + + swap = ((data & 0xffULL) << 24) | + ((data & (0xffULL << 8)) << 8) | + ((data & (0xffULL << 16)) >> 8) | + ((data & (0xffULL << 24)) >> 24); + + return swap; +} + +static inline unsigned long long __data2host8(unsigned long long data) +{ + unsigned long long swap; + + if (host_bigendian == file_bigendian) + return data; + + swap = ((data & 0xffULL) << 56) | + ((data & (0xffULL << 8)) << 40) | + ((data & (0xffULL << 16)) << 24) | + ((data & (0xffULL << 24)) << 8) | + ((data & (0xffULL << 32)) >> 8) | + ((data & (0xffULL << 40)) >> 24) | + ((data & (0xffULL << 48)) >> 40) | + ((data & (0xffULL << 56)) >> 56); + + return swap; +} + +#define data2host2(ptr) __data2host2(*(unsigned short *)ptr) +#define data2host4(ptr) __data2host4(*(unsigned int *)ptr) +#define data2host8(ptr) __data2host8(*(unsigned long long *)ptr) + +extern int header_page_ts_offset; +extern int header_page_ts_size; +extern int header_page_size_offset; +extern int header_page_size_size; +extern int header_page_data_offset; +extern int header_page_data_size; + +int parse_header_page(char *buf, unsigned long size); + +void read_tracing_data(void); + +#endif /* _PARSE_EVENTS_H */ diff --git a/trunk/tools/perf/util/util.h b/trunk/tools/perf/util/util.h index 15004d211663..d61a6f037631 100644 --- a/trunk/tools/perf/util/util.h +++ b/trunk/tools/perf/util/util.h @@ -39,6 +39,10 @@ /* Approximation of the length of the decimal representation of this type. */ #define decimal_length(x) ((int)(sizeof(x) * 2.56 + 0.5) + 1) +#if !defined(__APPLE__) && !defined(__FreeBSD__) && !defined(__USLC__) && !defined(_M_UNIX) +#define _XOPEN_SOURCE 600 /* glibc2 and AIX 5.3L need 500, OpenBSD needs 600 for S_ISLNK() */ +#define _XOPEN_SOURCE_EXTENDED 1 /* AIX 5.3L needs this */ +#endif #define _ALL_SOURCE 1 #define _GNU_SOURCE 1 #define _BSD_SOURCE 1