diff --git a/[refs] b/[refs] index ab166c3d778a..0da100fa8ded 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 184d3da8ef0ca552dffa0fdd35c046e058a2cf9a +refs/heads/master: 827f3b4974c5db2968d4979fe6a0ae00ab37bdd8 diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig index eef3bbb97075..7f418bbc261a 100644 --- a/trunk/arch/Kconfig +++ b/trunk/arch/Kconfig @@ -126,11 +126,4 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool -config HAVE_HW_BREAKPOINT - bool - depends on HAVE_PERF_EVENTS - select ANON_INODES - select PERF_EVENTS - - source "kernel/gcov/Kconfig" diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 178084b4377c..72ace9515a07 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -49,7 +49,6 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA - select HAVE_HW_BREAKPOINT select HAVE_ARCH_KMEMCHECK config OUTPUT_FORMAT diff --git a/trunk/arch/x86/include/asm/Kbuild b/trunk/arch/x86/include/asm/Kbuild index 9f828f87ca35..4a8e80cdcfa5 100644 --- a/trunk/arch/x86/include/asm/Kbuild +++ b/trunk/arch/x86/include/asm/Kbuild @@ -10,7 +10,6 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h -header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/trunk/arch/x86/include/asm/a.out-core.h b/trunk/arch/x86/include/asm/a.out-core.h index 7a15588e45d4..bb70e397aa84 100644 --- a/trunk/arch/x86/include/asm/a.out-core.h +++ b/trunk/arch/x86/include/asm/a.out-core.h @@ -17,7 +17,6 @@ #include #include -#include /* * fill in the user structure for an a.out core dump @@ -33,7 +32,14 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - aout_dump_debugregs(dump); + dump->u_debugreg[0] = current->thread.debugreg0; + dump->u_debugreg[1] = current->thread.debugreg1; + dump->u_debugreg[2] = current->thread.debugreg2; + dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + dump->u_debugreg[7] = current->thread.debugreg7; if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) diff --git a/trunk/arch/x86/include/asm/debugreg.h b/trunk/arch/x86/include/asm/debugreg.h index fdabd8435765..3ea6f37be9e2 100644 --- a/trunk/arch/x86/include/asm/debugreg.h +++ b/trunk/arch/x86/include/asm/debugreg.h @@ -18,7 +18,6 @@ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ #define DR_TRAP3 (0x8) /* db3 */ -#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ @@ -50,8 +49,6 @@ #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ -#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ -#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ @@ -70,34 +67,4 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ -/* - * HW breakpoint additions - */ -#ifdef __KERNEL__ - -DECLARE_PER_CPU(unsigned long, dr7); - -static inline void hw_breakpoint_disable(void) -{ - /* Zero the control register for HW Breakpoint */ - set_debugreg(0UL, 7); - - /* Zero-out the individual HW breakpoint address registers */ - set_debugreg(0UL, 0); - set_debugreg(0UL, 1); - set_debugreg(0UL, 2); - set_debugreg(0UL, 3); -} - -static inline int hw_breakpoint_active(void) -{ - return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; -} - -extern void aout_dump_debugregs(struct user *dump); - -extern void hw_breakpoint_restore(void); - -#endif /* __KERNEL__ */ - #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/trunk/arch/x86/include/asm/hw_breakpoint.h b/trunk/arch/x86/include/asm/hw_breakpoint.h deleted file mode 100644 index 0675a7c4c20e..000000000000 --- a/trunk/arch/x86/include/asm/hw_breakpoint.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef _I386_HW_BREAKPOINT_H -#define _I386_HW_BREAKPOINT_H - -#ifdef __KERNEL__ -#define __ARCH_HW_BREAKPOINT_H - -/* - * The name should probably be something dealt in - * a higher level. While dealing with the user - * (display/resolving) - */ -struct arch_hw_breakpoint { - char *name; /* Contains name of the symbol to set bkpt */ - unsigned long address; - u8 len; - u8 type; -}; - -#include -#include -#include - -/* Available HW breakpoint length encodings */ -#define X86_BREAKPOINT_LEN_1 0x40 -#define X86_BREAKPOINT_LEN_2 0x44 -#define X86_BREAKPOINT_LEN_4 0x4c -#define X86_BREAKPOINT_LEN_EXECUTE 0x40 - -#ifdef CONFIG_X86_64 -#define X86_BREAKPOINT_LEN_8 0x48 -#endif - -/* Available HW breakpoint type encodings */ - -/* trigger on instruction execute */ -#define X86_BREAKPOINT_EXECUTE 0x80 -/* trigger on memory write */ -#define X86_BREAKPOINT_WRITE 0x81 -/* trigger on memory read or write */ -#define X86_BREAKPOINT_RW 0x83 - -/* Total number of available HW breakpoint registers */ -#define HBP_NUM 4 - -struct perf_event; -struct pmu; - -extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp, - struct task_struct *tsk); -extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, - unsigned long val, void *data); - - -int arch_install_hw_breakpoint(struct perf_event *bp); -void arch_uninstall_hw_breakpoint(struct perf_event *bp); -void hw_breakpoint_pmu_read(struct perf_event *bp); -void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); - -extern void -arch_fill_perf_breakpoint(struct perf_event *bp); - -unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); -int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); - -extern int arch_bp_generic_fields(int x86_len, int x86_type, - int *gen_len, int *gen_type); - -extern struct pmu perf_ops_bp; - -#endif /* __KERNEL__ */ -#endif /* _I386_HW_BREAKPOINT_H */ - diff --git a/trunk/arch/x86/include/asm/processor.h b/trunk/arch/x86/include/asm/processor.h index 6f8ec1c37e0a..c9786480f0fe 100644 --- a/trunk/arch/x86/include/asm/processor.h +++ b/trunk/arch/x86/include/asm/processor.h @@ -30,7 +30,6 @@ struct mm_struct; #include #include -#define HBP_NUM 4 /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -423,8 +422,6 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; -struct perf_event; - struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -446,10 +443,13 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Save middle states of ptrace breakpoints */ - struct perf_event *ptrace_bps[HBP_NUM]; - /* Debug status used for traps, single steps, etc... */ - unsigned long debugreg6; + /* Hardware debugging registers: */ + unsigned long debugreg0; + unsigned long debugreg1; + unsigned long debugreg2; + unsigned long debugreg3; + unsigned long debugreg6; + unsigned long debugreg7; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/trunk/arch/x86/kernel/Makefile b/trunk/arch/x86/kernel/Makefile index 4f2e66e29ecc..d8e5d0cdd678 100644 --- a/trunk/arch/x86/kernel/Makefile +++ b/trunk/arch/x86/kernel/Makefile @@ -40,7 +40,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o +obj-y += alternative.o i8253.o pci-nommu.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/trunk/arch/x86/kernel/cpu/Makefile b/trunk/arch/x86/kernel/cpu/Makefile index 1d2cb383410e..68537e957a9b 100644 --- a/trunk/arch/x86/kernel/cpu/Makefile +++ b/trunk/arch/x86/kernel/cpu/Makefile @@ -5,7 +5,6 @@ # Don't trace early stages of a secondary CPU boot ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_common.o = -pg -CFLAGS_REMOVE_perf_event.o = -pg endif # Make sure load_percpu_segment has no stackprotector diff --git a/trunk/arch/x86/kernel/hw_breakpoint.c b/trunk/arch/x86/kernel/hw_breakpoint.c deleted file mode 100644 index 4d267fb77828..000000000000 --- a/trunk/arch/x86/kernel/hw_breakpoint.c +++ /dev/null @@ -1,549 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) 2007 Alan Stern - * Copyright (C) 2009 IBM Corporation - * Copyright (C) 2009 Frederic Weisbecker - * - * Authors: Alan Stern - * K.Prasad - * Frederic Weisbecker - */ - -/* - * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, - * using the CPU's debug registers. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -/* Per cpu debug control register value */ -DEFINE_PER_CPU(unsigned long, dr7); -EXPORT_PER_CPU_SYMBOL(dr7); - -/* Per cpu debug address registers values */ -static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); - -/* - * Stores the breakpoints currently in use on each breakpoint address - * register for each cpus - */ -static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); - - -/* - * Encode the length, type, Exact, and Enable bits for a particular breakpoint - * as stored in debug register 7. - */ -unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) -{ - unsigned long bp_info; - - bp_info = (len | type) & 0xf; - bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); - bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | - DR_GLOBAL_SLOWDOWN; - return bp_info; -} - -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - -/* - * Install a perf counter breakpoint. - * - * We seek a free debug address register and use it for this - * breakpoint. Eventually we enable it in the debug control register. - * - * Atomic: we hold the counter->ctx->lock and we only handle variables - * and registers local to this cpu. - */ -int arch_install_hw_breakpoint(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned long *dr7; - int i; - - for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); - - if (!*slot) { - *slot = bp; - break; - } - } - - if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) - return -EBUSY; - - set_debugreg(info->address, i); - __get_cpu_var(cpu_debugreg[i]) = info->address; - - dr7 = &__get_cpu_var(dr7); - *dr7 |= encode_dr7(i, info->len, info->type); - - set_debugreg(*dr7, 7); - - return 0; -} - -/* - * Uninstall the breakpoint contained in the given counter. - * - * First we search the debug address register it uses and then we disable - * it. - * - * Atomic: we hold the counter->ctx->lock and we only handle variables - * and registers local to this cpu. - */ -void arch_uninstall_hw_breakpoint(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned long *dr7; - int i; - - for (i = 0; i < HBP_NUM; i++) { - struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); - - if (*slot == bp) { - *slot = NULL; - break; - } - } - - if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) - return; - - dr7 = &__get_cpu_var(dr7); - *dr7 &= ~encode_dr7(i, info->len, info->type); - - set_debugreg(*dr7, 7); -} - -static int get_hbp_len(u8 hbp_len) -{ - unsigned int len_in_bytes = 0; - - switch (hbp_len) { - case X86_BREAKPOINT_LEN_1: - len_in_bytes = 1; - break; - case X86_BREAKPOINT_LEN_2: - len_in_bytes = 2; - break; - case X86_BREAKPOINT_LEN_4: - len_in_bytes = 4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - len_in_bytes = 8; - break; -#endif - } - return len_in_bytes; -} - -/* - * Check for virtual address in user space. - */ -int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) -{ - unsigned int len; - - len = get_hbp_len(hbp_len); - - return (va <= TASK_SIZE - len); -} - -/* - * Check for virtual address in kernel space. - */ -static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) -{ - unsigned int len; - - len = get_hbp_len(hbp_len); - - return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); -} - -/* - * Store a breakpoint's encoded address, length, and type. - */ -static int arch_store_info(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - /* - * For kernel-addresses, either the address or symbol name can be - * specified. - */ - if (info->name) - info->address = (unsigned long) - kallsyms_lookup_name(info->name); - if (info->address) - return 0; - - return -EINVAL; -} - -int arch_bp_generic_fields(int x86_len, int x86_type, - int *gen_len, int *gen_type) -{ - /* Len */ - switch (x86_len) { - case X86_BREAKPOINT_LEN_1: - *gen_len = HW_BREAKPOINT_LEN_1; - break; - case X86_BREAKPOINT_LEN_2: - *gen_len = HW_BREAKPOINT_LEN_2; - break; - case X86_BREAKPOINT_LEN_4: - *gen_len = HW_BREAKPOINT_LEN_4; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - *gen_len = HW_BREAKPOINT_LEN_8; - break; -#endif - default: - return -EINVAL; - } - - /* Type */ - switch (x86_type) { - case X86_BREAKPOINT_EXECUTE: - *gen_type = HW_BREAKPOINT_X; - break; - case X86_BREAKPOINT_WRITE: - *gen_type = HW_BREAKPOINT_W; - break; - case X86_BREAKPOINT_RW: - *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; - break; - default: - return -EINVAL; - } - - return 0; -} - - -static int arch_build_bp_info(struct perf_event *bp) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - - info->address = bp->attr.bp_addr; - - /* Len */ - switch (bp->attr.bp_len) { - case HW_BREAKPOINT_LEN_1: - info->len = X86_BREAKPOINT_LEN_1; - break; - case HW_BREAKPOINT_LEN_2: - info->len = X86_BREAKPOINT_LEN_2; - break; - case HW_BREAKPOINT_LEN_4: - info->len = X86_BREAKPOINT_LEN_4; - break; -#ifdef CONFIG_X86_64 - case HW_BREAKPOINT_LEN_8: - info->len = X86_BREAKPOINT_LEN_8; - break; -#endif - default: - return -EINVAL; - } - - /* Type */ - switch (bp->attr.bp_type) { - case HW_BREAKPOINT_W: - info->type = X86_BREAKPOINT_WRITE; - break; - case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - info->type = X86_BREAKPOINT_RW; - break; - case HW_BREAKPOINT_X: - info->type = X86_BREAKPOINT_EXECUTE; - break; - default: - return -EINVAL; - } - - return 0; -} -/* - * Validate the arch-specific HW Breakpoint register settings - */ -int arch_validate_hwbkpt_settings(struct perf_event *bp, - struct task_struct *tsk) -{ - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - unsigned int align; - int ret; - - - ret = arch_build_bp_info(bp); - if (ret) - return ret; - - ret = -EINVAL; - - if (info->type == X86_BREAKPOINT_EXECUTE) - /* - * Ptrace-refactoring code - * For now, we'll allow instruction breakpoint only for user-space - * addresses - */ - if ((!arch_check_va_in_userspace(info->address, info->len)) && - info->len != X86_BREAKPOINT_EXECUTE) - return ret; - - switch (info->len) { - case X86_BREAKPOINT_LEN_1: - align = 0; - break; - case X86_BREAKPOINT_LEN_2: - align = 1; - break; - case X86_BREAKPOINT_LEN_4: - align = 3; - break; -#ifdef CONFIG_X86_64 - case X86_BREAKPOINT_LEN_8: - align = 7; - break; -#endif - default: - return ret; - } - - if (bp->callback) - ret = arch_store_info(bp); - - if (ret < 0) - return ret; - /* - * Check that the low-order bits of the address are appropriate - * for the alignment implied by len. - */ - if (info->address & align) - return -EINVAL; - - /* Check that the virtual address is in the proper range */ - if (tsk) { - if (!arch_check_va_in_userspace(info->address, info->len)) - return -EFAULT; - } else { - if (!arch_check_va_in_kernelspace(info->address, info->len)) - return -EFAULT; - } - - return 0; -} - -/* - * Dump the debug register contents to the user. - * We can't dump our per cpu values because it - * may contain cpu wide breakpoint, something that - * doesn't belong to the current task. - * - * TODO: include non-ptrace user breakpoints (perf) - */ -void aout_dump_debugregs(struct user *dump) -{ - int i; - int dr7 = 0; - struct perf_event *bp; - struct arch_hw_breakpoint *info; - struct thread_struct *thread = ¤t->thread; - - for (i = 0; i < HBP_NUM; i++) { - bp = thread->ptrace_bps[i]; - - if (bp && !bp->attr.disabled) { - dump->u_debugreg[i] = bp->attr.bp_addr; - info = counter_arch_bp(bp); - dr7 |= encode_dr7(i, info->len, info->type); - } else { - dump->u_debugreg[i] = 0; - } - } - - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - - dump->u_debugreg[7] = dr7; -} -EXPORT_SYMBOL_GPL(aout_dump_debugregs); - -/* - * Release the user breakpoints used by ptrace - */ -void flush_ptrace_hw_breakpoint(struct task_struct *tsk) -{ - int i; - struct thread_struct *t = &tsk->thread; - - for (i = 0; i < HBP_NUM; i++) { - unregister_hw_breakpoint(t->ptrace_bps[i]); - t->ptrace_bps[i] = NULL; - } -} - -void hw_breakpoint_restore(void) -{ - set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); - set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); - set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); - set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(__get_cpu_var(dr7), 7); -} -EXPORT_SYMBOL_GPL(hw_breakpoint_restore); - -/* - * Handle debug exception notifications. - * - * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. - * - * NOTIFY_DONE returned if one of the following conditions is true. - * i) When the causative address is from user-space and the exception - * is a valid one, i.e. not triggered as a result of lazy debug register - * switching - * ii) When there are more bits than trap set in DR6 register (such - * as BD, BS or BT) indicating that more than one debug condition is - * met and requires some more action in do_debug(). - * - * NOTIFY_STOP returned for all other cases - * - */ -static int __kprobes hw_breakpoint_handler(struct die_args *args) -{ - int i, cpu, rc = NOTIFY_STOP; - struct perf_event *bp; - unsigned long dr7, dr6; - unsigned long *dr6_p; - - /* The DR6 value is pointed by args->err */ - dr6_p = (unsigned long *)ERR_PTR(args->err); - dr6 = *dr6_p; - - /* Do an early return if no trap bits are set in DR6 */ - if ((dr6 & DR_TRAP_BITS) == 0) - return NOTIFY_DONE; - - get_debugreg(dr7, 7); - /* Disable breakpoints during exception handling */ - set_debugreg(0UL, 7); - /* - * Assert that local interrupts are disabled - * Reset the DRn bits in the virtualized register value. - * The ptrace trigger routine will add in whatever is needed. - */ - current->thread.debugreg6 &= ~DR_TRAP_BITS; - cpu = get_cpu(); - - /* Handle all the breakpoints that were triggered */ - for (i = 0; i < HBP_NUM; ++i) { - if (likely(!(dr6 & (DR_TRAP0 << i)))) - continue; - - /* - * The counter may be concurrently released but that can only - * occur from a call_rcu() path. We can then safely fetch - * the breakpoint, use its callback, touch its counter - * while we are in an rcu_read_lock() path. - */ - rcu_read_lock(); - - bp = per_cpu(bp_per_reg[i], cpu); - if (bp) - rc = NOTIFY_DONE; - /* - * Reset the 'i'th TRAP bit in dr6 to denote completion of - * exception handling - */ - (*dr6_p) &= ~(DR_TRAP0 << i); - /* - * bp can be NULL due to lazy debug register switching - * or due to concurrent perf counter removing. - */ - if (!bp) { - rcu_read_unlock(); - break; - } - - (bp->callback)(bp, args->regs); - - rcu_read_unlock(); - } - if (dr6 & (~DR_TRAP_BITS)) - rc = NOTIFY_DONE; - - set_debugreg(dr7, 7); - put_cpu(); - - return rc; -} - -/* - * Handle debug exception notifications. - */ -int __kprobes hw_breakpoint_exceptions_notify( - struct notifier_block *unused, unsigned long val, void *data) -{ - if (val != DIE_DEBUG) - return NOTIFY_DONE; - - return hw_breakpoint_handler(data); -} - -void hw_breakpoint_pmu_read(struct perf_event *bp) -{ - /* TODO */ -} - -void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) -{ - /* TODO */ -} diff --git a/trunk/arch/x86/kernel/kgdb.c b/trunk/arch/x86/kernel/kgdb.c index 34e86b67550c..8d82a77a3f3b 100644 --- a/trunk/arch/x86/kernel/kgdb.c +++ b/trunk/arch/x86/kernel/kgdb.c @@ -43,7 +43,6 @@ #include #include -#include #include #include @@ -435,11 +434,6 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/trunk/arch/x86/kernel/kprobes.c b/trunk/arch/x86/kernel/kprobes.c index 3fe86d706a14..c5f1f117e0c0 100644 --- a/trunk/arch/x86/kernel/kprobes.c +++ b/trunk/arch/x86/kernel/kprobes.c @@ -56,7 +56,6 @@ #include #include #include -#include void jprobe_return_end(void); @@ -946,14 +945,8 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; + if (post_kprobe_handler(args->regs)) ret = NOTIFY_STOP; - } break; case DIE_GPF: /* diff --git a/trunk/arch/x86/kernel/machine_kexec_32.c b/trunk/arch/x86/kernel/machine_kexec_32.c index c843f8406da2..c1c429d00130 100644 --- a/trunk/arch/x86/kernel/machine_kexec_32.c +++ b/trunk/arch/x86/kernel/machine_kexec_32.c @@ -25,7 +25,6 @@ #include #include #include -#include static void set_idt(void *newidt, __u16 limit) { @@ -203,7 +202,6 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); - hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/trunk/arch/x86/kernel/machine_kexec_64.c b/trunk/arch/x86/kernel/machine_kexec_64.c index 4a8bb82248ae..84c3bf209e98 100644 --- a/trunk/arch/x86/kernel/machine_kexec_64.c +++ b/trunk/arch/x86/kernel/machine_kexec_64.c @@ -18,7 +18,6 @@ #include #include #include -#include static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -283,7 +282,6 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); - hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/trunk/arch/x86/kernel/process.c b/trunk/arch/x86/kernel/process.c index 744508e7cfdd..5284cd2b5776 100644 --- a/trunk/arch/x86/kernel/process.c +++ b/trunk/arch/x86/kernel/process.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -18,7 +17,6 @@ #include #include #include -#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -105,7 +103,14 @@ void flush_thread(void) } #endif - flush_ptrace_hw_breakpoint(tsk); + clear_tsk_thread_flag(tsk, TIF_DEBUG); + + tsk->thread.debugreg0 = 0; + tsk->thread.debugreg1 = 0; + tsk->thread.debugreg2 = 0; + tsk->thread.debugreg3 = 0; + tsk->thread.debugreg6 = 0; + tsk->thread.debugreg7 = 0; memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -187,6 +192,16 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { + set_debugreg(next->debugreg0, 0); + set_debugreg(next->debugreg1, 1); + set_debugreg(next->debugreg2, 2); + set_debugreg(next->debugreg3, 3); + /* no 4 and 5 */ + set_debugreg(next->debugreg6, 6); + set_debugreg(next->debugreg7, 7); + } + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/trunk/arch/x86/kernel/process_32.c b/trunk/arch/x86/kernel/process_32.c index d5bd3132ee70..4cf79567cdab 100644 --- a/trunk/arch/x86/kernel/process_32.c +++ b/trunk/arch/x86/kernel/process_32.c @@ -58,7 +58,6 @@ #include #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -260,12 +259,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); - p->thread.io_bitmap_ptr = NULL; tsk = current; - err = -ENOMEM; - - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); diff --git a/trunk/arch/x86/kernel/process_64.c b/trunk/arch/x86/kernel/process_64.c index 70cf15873f3d..eb62cbcaa490 100644 --- a/trunk/arch/x86/kernel/process_64.c +++ b/trunk/arch/x86/kernel/process_64.c @@ -52,7 +52,6 @@ #include #include #include -#include asmlinkage extern void ret_from_fork(void); @@ -298,16 +297,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; - p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); - err = -ENOMEM; - memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); - if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -346,7 +341,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - return err; } @@ -501,7 +495,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); - return prev_p; } diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c index b25f8947ed7a..c4f76d275ee4 100644 --- a/trunk/arch/x86/kernel/ptrace.c +++ b/trunk/arch/x86/kernel/ptrace.c @@ -22,8 +22,6 @@ #include #include #include -#include -#include #include #include @@ -36,7 +34,6 @@ #include #include #include -#include #include "tls.h" @@ -252,6 +249,11 @@ static int set_segment_reg(struct task_struct *task, return 0; } +static unsigned long debugreg_addr_limit(struct task_struct *task) +{ + return TASK_SIZE - 3; +} + #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -376,6 +378,15 @@ static int set_segment_reg(struct task_struct *task, return 0; } +static unsigned long debugreg_addr_limit(struct task_struct *task) +{ +#ifdef CONFIG_IA32_EMULATION + if (test_tsk_thread_flag(task, TIF_IA32)) + return IA32_PAGE_OFFSET - 3; +#endif + return TASK_SIZE_MAX - 7; +} + #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -555,228 +566,98 @@ static int genregs_set(struct task_struct *target, return ret; } -static void ptrace_triggered(struct perf_event *bp, void *data) -{ - int i; - struct thread_struct *thread = &(current->thread); - - /* - * Store in the virtual DR6 register the fact that the breakpoint - * was hit so the thread's debugger will see it. - */ - for (i = 0; i < HBP_NUM; i++) { - if (thread->ptrace_bps[i] == bp) - break; - } - - thread->debugreg6 |= (DR_TRAP0 << i); -} - /* - * Walk through every ptrace breakpoints for this thread and - * build the dr7 value on top of their attributes. - * + * This function is trivial and will be inlined by the compiler. + * Having it separates the implementation details of debug + * registers from the interface details of ptrace. */ -static unsigned long ptrace_get_dr7(struct perf_event *bp[]) +static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) { - int i; - int dr7 = 0; - struct arch_hw_breakpoint *info; - - for (i = 0; i < HBP_NUM; i++) { - if (bp[i] && !bp[i]->attr.disabled) { - info = counter_arch_bp(bp[i]); - dr7 |= encode_dr7(i, info->len, info->type); - } + switch (n) { + case 0: return child->thread.debugreg0; + case 1: return child->thread.debugreg1; + case 2: return child->thread.debugreg2; + case 3: return child->thread.debugreg3; + case 6: return child->thread.debugreg6; + case 7: return child->thread.debugreg7; } - - return dr7; + return 0; } -/* - * Handle ptrace writes to debug register 7. - */ -static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +static int ptrace_set_debugreg(struct task_struct *child, + int n, unsigned long data) { - struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7; - int i, orig_ret = 0, rc = 0; - int enabled, second_pass = 0; - unsigned len, type; - int gen_len, gen_type; - struct perf_event *bp; - - data &= ~DR_CONTROL_RESERVED; - old_dr7 = ptrace_get_dr7(thread->ptrace_bps); -restore: - /* - * Loop through all the hardware breakpoints, making the - * appropriate changes to each. - */ - for (i = 0; i < HBP_NUM; i++) { - enabled = decode_dr7(data, i, &len, &type); - bp = thread->ptrace_bps[i]; - - if (!enabled) { - if (bp) { - /* - * Don't unregister the breakpoints right-away, - * unless all register_user_hw_breakpoint() - * requests have succeeded. This prevents - * any window of opportunity for debug - * register grabbing by other users. - */ - if (!second_pass) - continue; - thread->ptrace_bps[i] = NULL; - unregister_hw_breakpoint(bp); - } - continue; - } - - /* - * We shoud have at least an inactive breakpoint at this - * slot. It means the user is writing dr7 without having - * written the address register first - */ - if (!bp) { - rc = -EINVAL; - break; - } - - rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); - if (rc) - break; - - /* - * This is a temporary thing as bp is unregistered/registered - * to simulate modification - */ - bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, - gen_type, bp->callback, - tsk, true); - thread->ptrace_bps[i] = NULL; + int i; - if (!bp) { /* incorrect bp, or we have a bug in bp API */ - rc = -EINVAL; - break; - } - if (IS_ERR(bp)) { - rc = PTR_ERR(bp); - bp = NULL; - break; - } - thread->ptrace_bps[i] = bp; - } - /* - * Make a second pass to free the remaining unused breakpoints - * or to restore the original breakpoints if an error occurred. - */ - if (!second_pass) { - second_pass = 1; - if (rc < 0) { - orig_ret = rc; - data = old_dr7; - } - goto restore; - } - return ((orig_ret < 0) ? orig_ret : rc); -} + if (unlikely(n == 4 || n == 5)) + return -EIO; -/* - * Handle PTRACE_PEEKUSR calls for the debug register area. - */ -static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) -{ - struct thread_struct *thread = &(tsk->thread); - unsigned long val = 0; + if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) + return -EIO; - if (n < HBP_NUM) { - struct perf_event *bp; - bp = thread->ptrace_bps[n]; - if (!bp) - return 0; - val = bp->hw.info.address; - } else if (n == 6) { - val = thread->debugreg6; - } else if (n == 7) { - val = ptrace_get_dr7(thread->ptrace_bps); - } - return val; -} + switch (n) { + case 0: child->thread.debugreg0 = data; break; + case 1: child->thread.debugreg1 = data; break; + case 2: child->thread.debugreg2 = data; break; + case 3: child->thread.debugreg3 = data; break; -static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, - unsigned long addr) -{ - struct perf_event *bp; - struct thread_struct *t = &tsk->thread; + case 6: + if ((data & ~0xffffffffUL) != 0) + return -EIO; + child->thread.debugreg6 = data; + break; - if (!t->ptrace_bps[nr]) { + case 7: /* - * Put stub len and type to register (reserve) an inactive but - * correct bp + * Sanity-check data. Take one half-byte at once with + * check = (val >> (16 + 4*i)) & 0xf. It contains the + * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits + * 2 and 3 are LENi. Given a list of invalid values, + * we do mask |= 1 << invalid_value, so that + * (mask >> check) & 1 is a correct test for invalid + * values. + * + * R/Wi contains the type of the breakpoint / + * watchpoint, LENi contains the length of the watched + * data in the watchpoint case. + * + * The invalid values are: + * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] + * - R/Wi == 0x10 (break on I/O reads or writes), so + * mask |= 0x4444. + * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= + * 0x1110. + * + * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. + * + * See the Intel Manual "System Programming Guide", + * 15.2.4 + * + * Note that LENi == 0x10 is defined on x86_64 in long + * mode (i.e. even for 32-bit userspace software, but + * 64-bit kernel), so the x86_64 mask value is 0x5454. + * See the AMD manual no. 24593 (AMD64 System Programming) */ - bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, - HW_BREAKPOINT_W, - ptrace_triggered, tsk, - false); - } else { - bp = t->ptrace_bps[nr]; - t->ptrace_bps[nr] = NULL; - bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, - bp->attr.bp_type, - bp->callback, - tsk, - bp->attr.disabled); +#ifdef CONFIG_X86_32 +#define DR7_MASK 0x5f54 +#else +#define DR7_MASK 0x5554 +#endif + data &= ~DR_CONTROL_RESERVED; + for (i = 0; i < 4; i++) + if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) + return -EIO; + child->thread.debugreg7 = data; + if (data) + set_tsk_thread_flag(child, TIF_DEBUG); + else + clear_tsk_thread_flag(child, TIF_DEBUG); + break; } - if (!bp) - return -EIO; - /* - * CHECKME: the previous code returned -EIO if the addr wasn't a - * valid task virtual addr. The new one will return -EINVAL in this - * case. - * -EINVAL may be what we want for in-kernel breakpoints users, but - * -EIO looks better for ptrace, since we refuse a register writing - * for the user. And anyway this is the previous behaviour. - */ - if (IS_ERR(bp)) - return PTR_ERR(bp); - - t->ptrace_bps[nr] = bp; - return 0; } -/* - * Handle PTRACE_POKEUSR calls for the debug register area. - */ -int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) -{ - struct thread_struct *thread = &(tsk->thread); - int rc = 0; - - /* There are no DR4 or DR5 registers */ - if (n == 4 || n == 5) - return -EIO; - - if (n == 6) { - thread->debugreg6 = val; - goto ret_path; - } - if (n < HBP_NUM) { - rc = ptrace_set_breakpoint_addr(tsk, n, val); - if (rc) - return rc; - } - /* All that's left is DR7 */ - if (n == 7) - rc = ptrace_write_dr7(tsk, val); - -ret_path: - return rc; -} - /* * These access the current or another (stopped) task's io permission * bitmap for debugging or core dump. diff --git a/trunk/arch/x86/kernel/signal.c b/trunk/arch/x86/kernel/signal.c index fbf3b07c8567..6a44a76055ad 100644 --- a/trunk/arch/x86/kernel/signal.c +++ b/trunk/arch/x86/kernel/signal.c @@ -799,6 +799,15 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { + /* + * Re-enable any watchpoints before delivering the + * signal to user space. The processor register will + * have been cleared if the watchpoint triggered + * inside the kernel. + */ + if (current->thread.debugreg7) + set_debugreg(current->thread.debugreg7, 7); + /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* diff --git a/trunk/arch/x86/kernel/traps.c b/trunk/arch/x86/kernel/traps.c index 33399176512a..7e37dcee0cc3 100644 --- a/trunk/arch/x86/kernel/traps.c +++ b/trunk/arch/x86/kernel/traps.c @@ -529,56 +529,77 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long dr6; + unsigned long condition; int si_code; - get_debugreg(dr6, 6); + get_debugreg(condition, 6); /* Catch kmemcheck conditions first of all! */ - if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) + if (condition & DR_STEP && kmemcheck_trap(regs)) return; - /* DR6 may or may not be cleared by the CPU */ - set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - /* Store the virtualized DR6 value */ - tsk->thread.debugreg6 = dr6; - - if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, - SIGTRAP) == NOTIFY_STOP) + if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - if (regs->flags & X86_VM_MASK) { - handle_vm86_trap((struct kernel_vm86_regs *) regs, - error_code, 1); - return; + /* Mask out spurious debug traps due to lazy DR7 setting */ + if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { + if (!tsk->thread.debugreg7) + goto clear_dr7; } +#ifdef CONFIG_X86_32 + if (regs->flags & X86_VM_MASK) + goto debug_vm86; +#endif + + /* Save debug status register where ptrace can see it */ + tsk->thread.debugreg6 = condition; + /* - * Single-stepping through system calls: ignore any exceptions in - * kernel space, but re-enable TF when returning to user mode. - * - * We already checked v86 mode above, so we can check for kernel mode - * by just checking the CPL of CS. + * Single-stepping through TF: make sure we ignore any events in + * kernel space (but re-enable TF when returning to user mode). */ - if ((dr6 & DR_STEP) && !user_mode(regs)) { - tsk->thread.debugreg6 &= ~DR_STEP; - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; + if (condition & DR_STEP) { + if (!user_mode(regs)) + goto clear_TF_reenable; } - si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) - send_sigtrap(tsk, regs, error_code, si_code); + + si_code = get_si_code(condition); + /* Ok, finally something we can handle */ + send_sigtrap(tsk, regs, error_code, si_code); + + /* + * Disable additional traps. They'll be re-enabled when + * the signal is delivered. + */ +clear_dr7: + set_debugreg(0, 7); preempt_conditional_cli(regs); + return; +#ifdef CONFIG_X86_32 +debug_vm86: + /* reenable preemption: handle_vm86_trap() might sleep */ + dec_preempt_count(); + handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); + conditional_cli(regs); + return; +#endif + +clear_TF_reenable: + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; + preempt_conditional_cli(regs); return; } diff --git a/trunk/arch/x86/kvm/x86.c b/trunk/arch/x86/kvm/x86.c index 4fc80174191c..ae07d261527c 100644 --- a/trunk/arch/x86/kvm/x86.c +++ b/trunk/arch/x86/kvm/x86.c @@ -42,7 +42,6 @@ #define CREATE_TRACE_POINTS #include "trace.h" -#include #include #include #include @@ -3644,15 +3643,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - /* - * If the guest has used debug registers, at least dr7 - * will be disabled while returning to the host. - * If we don't have active breakpoints in the host, we don't - * care about the messed up debug address registers. But if - * we have some of them active, restore the old state. - */ - if (hw_breakpoint_active()) - hw_breakpoint_restore(); + if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { + set_debugreg(current->thread.debugreg0, 0); + set_debugreg(current->thread.debugreg1, 1); + set_debugreg(current->thread.debugreg2, 2); + set_debugreg(current->thread.debugreg3, 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(current->thread.debugreg7, 7); + } set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/trunk/arch/x86/mm/kmmio.c b/trunk/arch/x86/mm/kmmio.c index 11a4ad4d6253..16ccbd77917f 100644 --- a/trunk/arch/x86/mm/kmmio.c +++ b/trunk/arch/x86/mm/kmmio.c @@ -540,14 +540,8 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) struct die_args *arg = args; if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) { - /* - * Reset the BS bit in dr6 (pointed by args->err) to - * denote completion of processing - */ - (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; + if (post_kmmio_handler(arg->err, arg->regs) == 1) return NOTIFY_STOP; - } return NOTIFY_DONE; } diff --git a/trunk/arch/x86/power/cpu.c b/trunk/arch/x86/power/cpu.c index 0a979f3e5b8a..8aa85f17667e 100644 --- a/trunk/arch/x86/power/cpu.c +++ b/trunk/arch/x86/power/cpu.c @@ -18,7 +18,6 @@ #include #include #include -#include #ifdef CONFIG_X86_32 static struct saved_context saved_context; @@ -143,6 +142,31 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ + + /* + * Now maybe reload the debug registers + */ + if (current->thread.debugreg7) { +#ifdef CONFIG_X86_32 + set_debugreg(current->thread.debugreg0, 0); + set_debugreg(current->thread.debugreg1, 1); + set_debugreg(current->thread.debugreg2, 2); + set_debugreg(current->thread.debugreg3, 3); + /* no 4 and 5 */ + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(current->thread.debugreg7, 7); +#else + /* CONFIG_X86_64 */ + loaddebug(¤t->thread, 0); + loaddebug(¤t->thread, 1); + loaddebug(¤t->thread, 2); + loaddebug(¤t->thread, 3); + /* no 4 and 5 */ + loaddebug(¤t->thread, 6); + loaddebug(¤t->thread, 7); +#endif + } + } /** diff --git a/trunk/arch/x86/tools/test_get_len.c b/trunk/arch/x86/tools/test_get_len.c index d8214dc03fa7..af75e07217ba 100644 --- a/trunk/arch/x86/tools/test_get_len.c +++ b/trunk/arch/x86/tools/test_get_len.c @@ -114,7 +114,6 @@ int main(int argc, char **argv) unsigned char insn_buf[16]; struct insn insn; int insns = 0, c; - int warnings = 0; parse_args(argc, argv); @@ -152,22 +151,18 @@ int main(int argc, char **argv) insn_init(&insn, insn_buf, x86_64); insn_get_length(&insn); if (insn.length != nb) { - warnings++; - fprintf(stderr, "Warning: %s found difference at %s\n", + fprintf(stderr, "Error: %s found a difference at %s\n", prog, sym); - fprintf(stderr, "Warning: %s", line); - fprintf(stderr, "Warning: objdump says %d bytes, but " + fprintf(stderr, "Error: %s", line); + fprintf(stderr, "Error: objdump says %d bytes, but " "insn_get_length() says %d\n", nb, insn.length); if (verbose) dump_insn(stderr, &insn); + exit(2); } } - if (warnings) - fprintf(stderr, "Warning: decoded and checked %d" - " instructions with %d warnings\n", insns, warnings); - else - fprintf(stderr, "Succeed: decoded and checked %d" - " instructions\n", insns); + fprintf(stderr, "Succeed: decoded and checked %d instructions\n", + insns); return 0; } diff --git a/trunk/include/linux/ftrace_event.h b/trunk/include/linux/ftrace_event.h index 47bbdf9c38d0..43360c1d8f70 100644 --- a/trunk/include/linux/ftrace_event.h +++ b/trunk/include/linux/ftrace_event.h @@ -137,8 +137,13 @@ struct ftrace_event_call { #define FTRACE_MAX_PROFILE_SIZE 2048 -extern char *perf_trace_buf; -extern char *perf_trace_buf_nmi; +struct perf_trace_buf { + char buf[FTRACE_MAX_PROFILE_SIZE]; + int recursion; +}; + +extern struct perf_trace_buf *perf_trace_buf; +extern struct perf_trace_buf *perf_trace_buf_nmi; #define MAX_FILTER_PRED 32 #define MAX_FILTER_STR_VAL 256 /* Should handle KSYM_SYMBOL_LEN */ diff --git a/trunk/include/linux/hw_breakpoint.h b/trunk/include/linux/hw_breakpoint.h deleted file mode 100644 index c9f7f7c7b0e0..000000000000 --- a/trunk/include/linux/hw_breakpoint.h +++ /dev/null @@ -1,140 +0,0 @@ -#ifndef _LINUX_HW_BREAKPOINT_H -#define _LINUX_HW_BREAKPOINT_H - -enum { - HW_BREAKPOINT_LEN_1 = 1, - HW_BREAKPOINT_LEN_2 = 2, - HW_BREAKPOINT_LEN_4 = 4, - HW_BREAKPOINT_LEN_8 = 8, -}; - -enum { - HW_BREAKPOINT_R = 1, - HW_BREAKPOINT_W = 2, - HW_BREAKPOINT_X = 4, -}; - -#ifdef __KERNEL__ - -#include - -#ifdef CONFIG_HAVE_HW_BREAKPOINT - -static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) -{ - return bp->attr.bp_addr; -} - -static inline int hw_breakpoint_type(struct perf_event *bp) -{ - return bp->attr.bp_type; -} - -static inline int hw_breakpoint_len(struct perf_event *bp) -{ - return bp->attr.bp_len; -} - -extern struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active); - -/* FIXME: only change from the attr, and don't unregister */ -extern struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active); - -/* - * Kernel breakpoints are not associated with any particular thread. - */ -extern struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - int cpu, - bool active); - -extern struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active); - -extern int register_perf_hw_breakpoint(struct perf_event *bp); -extern int __register_perf_hw_breakpoint(struct perf_event *bp); -extern void unregister_hw_breakpoint(struct perf_event *bp); -extern void unregister_wide_hw_breakpoint(struct perf_event **cpu_events); - -extern int reserve_bp_slot(struct perf_event *bp); -extern void release_bp_slot(struct perf_event *bp); - -extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); - -static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) -{ - return &bp->hw.info; -} - -#else /* !CONFIG_HAVE_HW_BREAKPOINT */ - -static inline struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } -static inline struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active) { return NULL; } -static inline struct perf_event * -register_wide_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - int cpu, - bool active) { return NULL; } -static inline struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) { return NULL; } -static inline int -register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } -static inline int -__register_perf_hw_breakpoint(struct perf_event *bp) { return -ENOSYS; } -static inline void unregister_hw_breakpoint(struct perf_event *bp) { } -static inline void -unregister_wide_hw_breakpoint(struct perf_event **cpu_events) { } -static inline int -reserve_bp_slot(struct perf_event *bp) {return -ENOSYS; } -static inline void release_bp_slot(struct perf_event *bp) { } - -static inline void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { } - -static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) -{ - return NULL; -} - -#endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ - -#endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/trunk/include/linux/perf_event.h b/trunk/include/linux/perf_event.h index 43adbd7f0010..7f87563c8485 100644 --- a/trunk/include/linux/perf_event.h +++ b/trunk/include/linux/perf_event.h @@ -18,10 +18,6 @@ #include #include -#ifdef CONFIG_HAVE_HW_BREAKPOINT -#include -#endif - /* * User-space ABI bits: */ @@ -35,7 +31,6 @@ enum perf_type_id { PERF_TYPE_TRACEPOINT = 2, PERF_TYPE_HW_CACHE = 3, PERF_TYPE_RAW = 4, - PERF_TYPE_BREAKPOINT = 5, PERF_TYPE_MAX, /* non-ABI */ }; @@ -214,15 +209,6 @@ struct perf_event_attr { __u32 wakeup_events; /* wakeup every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; - - union { - struct { /* Hardware breakpoint info */ - __u64 bp_addr; - __u32 bp_type; - __u32 bp_len; - }; - }; - __u32 __reserved_2; __u64 __reserved_3; @@ -492,11 +478,6 @@ struct hw_perf_event { s64 remaining; struct hrtimer hrtimer; }; -#ifdef CONFIG_HAVE_HW_BREAKPOINT - union { /* breakpoint */ - struct arch_hw_breakpoint info; - }; -#endif }; atomic64_t prev_count; u64 sample_period; @@ -565,10 +546,6 @@ struct perf_pending_entry { void (*func)(struct perf_pending_entry *); }; -typedef void (*perf_callback_t)(struct perf_event *, void *); - -struct perf_sample_data; - /** * struct perf_event - performance event kernel representation: */ @@ -611,7 +588,7 @@ struct perf_event { u64 tstamp_running; u64 tstamp_stopped; - struct perf_event_attr attr; + struct perf_event_attr attr; struct hw_perf_event hw; struct perf_event_context *ctx; @@ -660,18 +637,10 @@ struct perf_event { struct pid_namespace *ns; u64 id; - void (*overflow_handler)(struct perf_event *event, - int nmi, struct perf_sample_data *data, - struct pt_regs *regs); - #ifdef CONFIG_EVENT_PROFILE struct event_filter *filter; #endif - perf_callback_t callback; - - perf_callback_t event_callback; - #endif /* CONFIG_PERF_EVENTS */ }; @@ -776,14 +745,6 @@ extern int hw_perf_group_sched_in(struct perf_event *group_leader, struct perf_cpu_context *cpuctx, struct perf_event_context *ctx, int cpu); extern void perf_event_update_userpage(struct perf_event *event); -extern int perf_event_release_kernel(struct perf_event *event); -extern struct perf_event * -perf_event_create_kernel_counter(struct perf_event_attr *attr, - int cpu, - pid_t pid, - perf_callback_t callback); -extern u64 perf_event_read_value(struct perf_event *event, - u64 *enabled, u64 *running); struct perf_sample_data { u64 type; @@ -860,7 +821,6 @@ extern int sysctl_perf_event_sample_rate; extern void perf_event_init(void); extern void perf_tp_event(int event_id, u64 addr, u64 count, void *record, int entry_size); -extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags #define perf_misc_flags(regs) (user_mode(regs) ? PERF_RECORD_MISC_USER : \ @@ -874,8 +834,6 @@ extern int perf_output_begin(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern void perf_output_copy(struct perf_output_handle *handle, const void *buf, unsigned int len); -extern int perf_swevent_get_recursion_context(void); -extern void perf_swevent_put_recursion_context(int rctx); #else static inline void perf_event_task_sched_in(struct task_struct *task, int cpu) { } @@ -897,15 +855,11 @@ static inline int perf_event_task_enable(void) { return -EINVAL; } static inline void perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { } -static inline void -perf_bp_event(struct perf_event *event, void *data) { } static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_comm(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } -static inline int perf_swevent_get_recursion_context(void) { return -1; } -static inline void perf_swevent_put_recursion_context(int rctx) { } #endif diff --git a/trunk/include/trace/ftrace.h b/trunk/include/trace/ftrace.h index c3417c13e3ed..4945d1c99864 100644 --- a/trunk/include/trace/ftrace.h +++ b/trunk/include/trace/ftrace.h @@ -724,20 +724,17 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ - extern int perf_swevent_get_recursion_context(void); \ - extern void perf_swevent_put_recursion_context(int rctx); \ struct ftrace_event_call *event_call = &event_##call; \ extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ + struct perf_trace_buf *trace_buf; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ - char *trace_buf; \ char *raw_data; \ int __cpu; \ - int rctx; \ int pc; \ \ pc = preempt_count(); \ @@ -752,11 +749,6 @@ static void ftrace_profile_##call(proto) \ return; \ \ local_irq_save(irq_flags); \ - \ - rctx = perf_swevent_get_recursion_context(); \ - if (rctx < 0) \ - goto end_recursion; \ - \ __cpu = smp_processor_id(); \ \ if (in_nmi()) \ @@ -767,7 +759,13 @@ static void ftrace_profile_##call(proto) \ if (!trace_buf) \ goto end; \ \ - raw_data = per_cpu_ptr(trace_buf, __cpu); \ + trace_buf = per_cpu_ptr(trace_buf, __cpu); \ + if (trace_buf->recursion++) \ + goto end_recursion; \ + \ + barrier(); \ + \ + raw_data = trace_buf->buf; \ \ *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ entry = (struct ftrace_raw_##call *)raw_data; \ @@ -782,9 +780,9 @@ static void ftrace_profile_##call(proto) \ perf_tp_event(event_call->id, __addr, __count, entry, \ __entry_size); \ \ -end: \ - perf_swevent_put_recursion_context(rctx); \ end_recursion: \ + trace_buf->recursion--; \ +end: \ local_irq_restore(irq_flags); \ \ } diff --git a/trunk/kernel/Makefile b/trunk/kernel/Makefile index 6b7ce8173dfd..b8d4cd8ac0b9 100644 --- a/trunk/kernel/Makefile +++ b/trunk/kernel/Makefile @@ -21,7 +21,6 @@ CFLAGS_REMOVE_mutex-debug.o = -pg CFLAGS_REMOVE_rtmutex-debug.o = -pg CFLAGS_REMOVE_cgroup-debug.o = -pg CFLAGS_REMOVE_sched_clock.o = -pg -CFLAGS_REMOVE_perf_event.o = -pg endif obj-$(CONFIG_FREEZER) += freezer.o @@ -96,7 +95,6 @@ obj-$(CONFIG_RING_BUFFER) += trace/ obj-$(CONFIG_SMP) += sched_cpupri.o obj-$(CONFIG_SLOW_WORK) += slow-work.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o -obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y) # According to Alan Modra , the -fno-omit-frame-pointer is diff --git a/trunk/kernel/exit.c b/trunk/kernel/exit.c index 3f45e3cf931d..f7864ac2ecc1 100644 --- a/trunk/kernel/exit.c +++ b/trunk/kernel/exit.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include @@ -978,10 +977,6 @@ NORET_TYPE void do_exit(long code) proc_exit_connector(tsk); - /* - * FIXME: do that only when needed, using sched_exit tracepoint - */ - flush_ptrace_hw_breakpoint(tsk); /* * Flush inherited counters to the parent - before the parent * gets woken up by child-exit notifications. diff --git a/trunk/kernel/hw_breakpoint.c b/trunk/kernel/hw_breakpoint.c deleted file mode 100644 index 06d372fc026d..000000000000 --- a/trunk/kernel/hw_breakpoint.c +++ /dev/null @@ -1,501 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) 2007 Alan Stern - * Copyright (C) IBM Corporation, 2009 - * Copyright (C) 2009, Frederic Weisbecker - * - * Thanks to Ingo Molnar for his many suggestions. - * - * Authors: Alan Stern - * K.Prasad - * Frederic Weisbecker - */ - -/* - * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, - * using the CPU's debug registers. - * This file contains the arch-independent routines. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -/* - * Constraints data - */ - -/* Number of pinned cpu breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); - -/* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); - -/* Number of non-pinned cpu/task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); - -/* Gather the number of total pinned and un-pinned bp in a cpuset */ -struct bp_busy_slots { - unsigned int pinned; - unsigned int flexible; -}; - -/* Serialize accesses to the above constraints */ -static DEFINE_MUTEX(nr_bp_mutex); - -/* - * Report the maximum number of pinned breakpoints a task - * have in this cpu - */ -static unsigned int max_task_bp_pinned(int cpu) -{ - int i; - unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); - - for (i = HBP_NUM -1; i >= 0; i--) { - if (tsk_pinned[i] > 0) - return i + 1; - } - - return 0; -} - -/* - * Report the number of pinned/un-pinned breakpoints we have in - * a given cpu (cpu > -1) or in all of them (cpu = -1). - */ -static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) -{ - if (cpu >= 0) { - slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); - slots->pinned += max_task_bp_pinned(cpu); - slots->flexible = per_cpu(nr_bp_flexible, cpu); - - return; - } - - for_each_online_cpu(cpu) { - unsigned int nr; - - nr = per_cpu(nr_cpu_bp_pinned, cpu); - nr += max_task_bp_pinned(cpu); - - if (nr > slots->pinned) - slots->pinned = nr; - - nr = per_cpu(nr_bp_flexible, cpu); - - if (nr > slots->flexible) - slots->flexible = nr; - } -} - -/* - * Add a pinned breakpoint for the given task in our constraint table - */ -static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) -{ - int count = 0; - struct perf_event *bp; - struct perf_event_context *ctx = tsk->perf_event_ctxp; - unsigned int *task_bp_pinned; - struct list_head *list; - unsigned long flags; - - if (WARN_ONCE(!ctx, "No perf context for this task")) - return; - - list = &ctx->event_list; - - spin_lock_irqsave(&ctx->lock, flags); - - /* - * The current breakpoint counter is not included in the list - * at the open() callback time - */ - list_for_each_entry(bp, list, event_entry) { - if (bp->attr.type == PERF_TYPE_BREAKPOINT) - count++; - } - - spin_unlock_irqrestore(&ctx->lock, flags); - - if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) - return; - - task_bp_pinned = per_cpu(task_bp_pinned, cpu); - if (enable) { - task_bp_pinned[count]++; - if (count > 0) - task_bp_pinned[count-1]--; - } else { - task_bp_pinned[count]--; - if (count > 0) - task_bp_pinned[count-1]++; - } -} - -/* - * Add/remove the given breakpoint in our constraint table - */ -static void toggle_bp_slot(struct perf_event *bp, bool enable) -{ - int cpu = bp->cpu; - struct task_struct *tsk = bp->ctx->task; - - /* Pinned counter task profiling */ - if (tsk) { - if (cpu >= 0) { - toggle_bp_task_slot(tsk, cpu, enable); - return; - } - - for_each_online_cpu(cpu) - toggle_bp_task_slot(tsk, cpu, enable); - return; - } - - /* Pinned counter cpu profiling */ - if (enable) - per_cpu(nr_cpu_bp_pinned, bp->cpu)++; - else - per_cpu(nr_cpu_bp_pinned, bp->cpu)--; -} - -/* - * Contraints to check before allowing this new breakpoint counter: - * - * == Non-pinned counter == (Considered as pinned for now) - * - * - If attached to a single cpu, check: - * - * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) - * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM - * - * -> If there are already non-pinned counters in this cpu, it means - * there is already a free slot for them. - * Otherwise, we check that the maximum number of per task - * breakpoints (for this cpu) plus the number of per cpu breakpoint - * (for this cpu) doesn't cover every registers. - * - * - If attached to every cpus, check: - * - * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) - * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM - * - * -> This is roughly the same, except we check the number of per cpu - * bp for every cpu and we keep the max one. Same for the per tasks - * breakpoints. - * - * - * == Pinned counter == - * - * - If attached to a single cpu, check: - * - * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) - * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM - * - * -> Same checks as before. But now the nr_bp_flexible, if any, must keep - * one register at least (or they will never be fed). - * - * - If attached to every cpus, check: - * - * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) - * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM - */ -int reserve_bp_slot(struct perf_event *bp) -{ - struct bp_busy_slots slots = {0}; - int ret = 0; - - mutex_lock(&nr_bp_mutex); - - fetch_bp_busy_slots(&slots, bp->cpu); - - /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) == HBP_NUM) { - ret = -ENOSPC; - goto end; - } - - toggle_bp_slot(bp, true); - -end: - mutex_unlock(&nr_bp_mutex); - - return ret; -} - -void release_bp_slot(struct perf_event *bp) -{ - mutex_lock(&nr_bp_mutex); - - toggle_bp_slot(bp, false); - - mutex_unlock(&nr_bp_mutex); -} - - -int __register_perf_hw_breakpoint(struct perf_event *bp) -{ - int ret; - - ret = reserve_bp_slot(bp); - if (ret) - return ret; - - /* - * Ptrace breakpoints can be temporary perf events only - * meant to reserve a slot. In this case, it is created disabled and - * we don't want to check the params right now (as we put a null addr) - * But perf tools create events as disabled and we want to check - * the params for them. - * This is a quick hack that will be removed soon, once we remove - * the tmp breakpoints from ptrace - */ - if (!bp->attr.disabled || bp->callback == perf_bp_event) - ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); - - return ret; -} - -int register_perf_hw_breakpoint(struct perf_event *bp) -{ - bp->callback = perf_bp_event; - - return __register_perf_hw_breakpoint(bp); -} - -/* - * Register a breakpoint bound to a task and a given cpu. - * If cpu is -1, the breakpoint is active for the task in every cpu - * If the task is -1, the breakpoint is active for every tasks in the given - * cpu. - */ -static struct perf_event * -register_user_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - pid_t pid, - int cpu, - bool active) -{ - struct perf_event_attr *attr; - struct perf_event *bp; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - if (!attr) - return ERR_PTR(-ENOMEM); - - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(*attr); - attr->bp_addr = addr; - attr->bp_len = len; - attr->bp_type = type; - /* - * Such breakpoints are used by debuggers to trigger signals when - * we hit the excepted memory op. We can't miss such events, they - * must be pinned. - */ - attr->pinned = 1; - - if (!active) - attr->disabled = 1; - - bp = perf_event_create_kernel_counter(attr, cpu, pid, triggered); - kfree(attr); - - return bp; -} - -/** - * register_user_hw_breakpoint - register a hardware breakpoint for user space - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) - * @triggered: callback to trigger when we hit the breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it - * - */ -struct perf_event * -register_user_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active) -{ - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - tsk->pid, -1, active); -} -EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); - -/** - * modify_user_hw_breakpoint - modify a user-space hardware breakpoint - * @bp: the breakpoint structure to modify - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) - * @triggered: callback to trigger when we hit the breakpoint - * @tsk: pointer to 'task_struct' of the process to which the address belongs - * @active: should we activate it while registering it - */ -struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, - unsigned long addr, - int len, - int type, - perf_callback_t triggered, - struct task_struct *tsk, - bool active) -{ - /* - * FIXME: do it without unregistering - * - We don't want to lose our slot - * - If the new bp is incorrect, don't lose the older one - */ - unregister_hw_breakpoint(bp); - - return register_user_hw_breakpoint(addr, len, type, triggered, - tsk, active); -} -EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); - -/** - * unregister_hw_breakpoint - unregister a user-space hardware breakpoint - * @bp: the breakpoint structure to unregister - */ -void unregister_hw_breakpoint(struct perf_event *bp) -{ - if (!bp) - return; - perf_event_release_kernel(bp); -} -EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); - -static struct perf_event * -register_kernel_hw_breakpoint_cpu(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - int cpu, - bool active) -{ - return register_user_hw_breakpoint_cpu(addr, len, type, triggered, - -1, cpu, active); -} - -/** - * register_wide_hw_breakpoint - register a wide breakpoint in the kernel - * @addr: is the memory address that triggers the breakpoint - * @len: the length of the access to the memory (1 byte, 2 bytes etc...) - * @type: the type of the access to the memory (read/write/exec) - * @triggered: callback to trigger when we hit the breakpoint - * @active: should we activate it while registering it - * - * @return a set of per_cpu pointers to perf events - */ -struct perf_event ** -register_wide_hw_breakpoint(unsigned long addr, - int len, - int type, - perf_callback_t triggered, - bool active) -{ - struct perf_event **cpu_events, **pevent, *bp; - long err; - int cpu; - - cpu_events = alloc_percpu(typeof(*cpu_events)); - if (!cpu_events) - return ERR_PTR(-ENOMEM); - - for_each_possible_cpu(cpu) { - pevent = per_cpu_ptr(cpu_events, cpu); - bp = register_kernel_hw_breakpoint_cpu(addr, len, type, - triggered, cpu, active); - - *pevent = bp; - - if (IS_ERR(bp) || !bp) { - err = PTR_ERR(bp); - goto fail; - } - } - - return cpu_events; - -fail: - for_each_possible_cpu(cpu) { - pevent = per_cpu_ptr(cpu_events, cpu); - if (IS_ERR(*pevent) || !*pevent) - break; - unregister_hw_breakpoint(*pevent); - } - free_percpu(cpu_events); - /* return the error if any */ - return ERR_PTR(err); -} -EXPORT_SYMBOL_GPL(register_wide_hw_breakpoint); - -/** - * unregister_wide_hw_breakpoint - unregister a wide breakpoint in the kernel - * @cpu_events: the per cpu set of events to unregister - */ -void unregister_wide_hw_breakpoint(struct perf_event **cpu_events) -{ - int cpu; - struct perf_event **pevent; - - for_each_possible_cpu(cpu) { - pevent = per_cpu_ptr(cpu_events, cpu); - unregister_hw_breakpoint(*pevent); - } - free_percpu(cpu_events); -} -EXPORT_SYMBOL_GPL(unregister_wide_hw_breakpoint); - -static struct notifier_block hw_breakpoint_exceptions_nb = { - .notifier_call = hw_breakpoint_exceptions_notify, - /* we need to be notified first */ - .priority = 0x7fffffff -}; - -static int __init init_hw_breakpoint(void) -{ - return register_die_notifier(&hw_breakpoint_exceptions_nb); -} -core_initcall(init_hw_breakpoint); - - -struct pmu perf_ops_bp = { - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, - .read = hw_breakpoint_pmu_read, - .unthrottle = hw_breakpoint_pmu_unthrottle -}; diff --git a/trunk/kernel/kallsyms.c b/trunk/kernel/kallsyms.c index 8e5288a8a355..8b6b8b697c68 100644 --- a/trunk/kernel/kallsyms.c +++ b/trunk/kernel/kallsyms.c @@ -181,7 +181,6 @@ unsigned long kallsyms_lookup_name(const char *name) } return module_kallsyms_lookup_name(name); } -EXPORT_SYMBOL_GPL(kallsyms_lookup_name); int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *, unsigned long), diff --git a/trunk/kernel/perf_event.c b/trunk/kernel/perf_event.c index accfd7bfe387..3256e36ad251 100644 --- a/trunk/kernel/perf_event.c +++ b/trunk/kernel/perf_event.c @@ -29,7 +29,6 @@ #include #include #include -#include #include @@ -246,49 +245,6 @@ static void perf_unpin_context(struct perf_event_context *ctx) put_ctx(ctx); } -static inline u64 perf_clock(void) -{ - return cpu_clock(smp_processor_id()); -} - -/* - * Update the record of the current time in a context. - */ -static void update_context_time(struct perf_event_context *ctx) -{ - u64 now = perf_clock(); - - ctx->time += now - ctx->timestamp; - ctx->timestamp = now; -} - -/* - * Update the total_time_enabled and total_time_running fields for a event. - */ -static void update_event_times(struct perf_event *event) -{ - struct perf_event_context *ctx = event->ctx; - u64 run_end; - - if (event->state < PERF_EVENT_STATE_INACTIVE || - event->group_leader->state < PERF_EVENT_STATE_INACTIVE) - return; - - if (ctx->is_active) - run_end = ctx->time; - else - run_end = event->tstamp_stopped; - - event->total_time_enabled = run_end - event->tstamp_enabled; - - if (event->state == PERF_EVENT_STATE_INACTIVE) - run_end = event->tstamp_stopped; - else - run_end = ctx->time; - - event->total_time_running = run_end - event->tstamp_running; -} - /* * Add a event from the lists for its context. * Must be called with ctx->mutex and ctx->lock held. @@ -337,9 +293,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx) if (event->group_leader != event) event->group_leader->nr_siblings--; - update_event_times(event); - event->state = PERF_EVENT_STATE_OFF; - /* * If this was a group event with sibling events then * upgrade the siblings to singleton events by adding them @@ -493,11 +446,50 @@ static void perf_event_remove_from_context(struct perf_event *event) * can remove the event safely, if the call above did not * succeed. */ - if (!list_empty(&event->group_entry)) + if (!list_empty(&event->group_entry)) { list_del_event(event, ctx); + } spin_unlock_irq(&ctx->lock); } +static inline u64 perf_clock(void) +{ + return cpu_clock(smp_processor_id()); +} + +/* + * Update the record of the current time in a context. + */ +static void update_context_time(struct perf_event_context *ctx) +{ + u64 now = perf_clock(); + + ctx->time += now - ctx->timestamp; + ctx->timestamp = now; +} + +/* + * Update the total_time_enabled and total_time_running fields for a event. + */ +static void update_event_times(struct perf_event *event) +{ + struct perf_event_context *ctx = event->ctx; + u64 run_end; + + if (event->state < PERF_EVENT_STATE_INACTIVE || + event->group_leader->state < PERF_EVENT_STATE_INACTIVE) + return; + + event->total_time_enabled = ctx->time - event->tstamp_enabled; + + if (event->state == PERF_EVENT_STATE_INACTIVE) + run_end = event->tstamp_stopped; + else + run_end = ctx->time; + + event->total_time_running = run_end - event->tstamp_running; +} + /* * Update total_time_enabled and total_time_running for all events in a group. */ @@ -1040,10 +1032,10 @@ void __perf_event_sched_out(struct perf_event_context *ctx, update_context_time(ctx); perf_disable(); - if (ctx->nr_active) { + if (ctx->nr_active) list_for_each_entry(event, &ctx->group_list, group_entry) group_sched_out(event, cpuctx, ctx); - } + perf_enable(); out: spin_unlock(&ctx->lock); @@ -1068,6 +1060,8 @@ static int context_equiv(struct perf_event_context *ctx1, && !ctx1->pin_count && !ctx2->pin_count; } +static void __perf_event_read(void *event); + static void __perf_event_sync_stat(struct perf_event *event, struct perf_event *next_event) { @@ -1085,8 +1079,8 @@ static void __perf_event_sync_stat(struct perf_event *event, */ switch (event->state) { case PERF_EVENT_STATE_ACTIVE: - event->pmu->read(event); - /* fall-through */ + __perf_event_read(event); + break; case PERF_EVENT_STATE_INACTIVE: update_event_times(event); @@ -1125,8 +1119,6 @@ static void perf_event_sync_stat(struct perf_event_context *ctx, if (!ctx->nr_stat) return; - update_context_time(ctx); - event = list_first_entry(&ctx->event_list, struct perf_event, event_entry); @@ -1170,6 +1162,8 @@ void perf_event_task_sched_out(struct task_struct *task, if (likely(!ctx || !cpuctx->task_ctx)) return; + update_context_time(ctx); + rcu_read_lock(); parent = rcu_dereference(ctx->parent_ctx); next_ctx = next->perf_event_ctxp; @@ -1522,6 +1516,7 @@ static void __perf_event_read(void *info) struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + unsigned long flags; /* * If this is a task context, we need to check whether it is @@ -1533,12 +1528,12 @@ static void __perf_event_read(void *info) if (ctx->task && cpuctx->task_ctx != ctx) return; - spin_lock(&ctx->lock); - update_context_time(ctx); - update_event_times(event); - spin_unlock(&ctx->lock); - + local_irq_save(flags); + if (ctx->is_active) + update_context_time(ctx); event->pmu->read(event); + update_event_times(event); + local_irq_restore(flags); } static u64 perf_event_read(struct perf_event *event) @@ -1551,13 +1546,7 @@ static u64 perf_event_read(struct perf_event *event) smp_call_function_single(event->oncpu, __perf_event_read, event, 1); } else if (event->state == PERF_EVENT_STATE_INACTIVE) { - struct perf_event_context *ctx = event->ctx; - unsigned long flags; - - spin_lock_irqsave(&ctx->lock, flags); - update_context_time(ctx); update_event_times(event); - spin_unlock_irqrestore(&ctx->lock, flags); } return atomic64_read(&event->count); @@ -1711,10 +1700,16 @@ static void free_event(struct perf_event *event) call_rcu(&event->rcu_head, free_event_rcu); } -int perf_event_release_kernel(struct perf_event *event) +/* + * Called when the last reference to the file is gone. + */ +static int perf_release(struct inode *inode, struct file *file) { + struct perf_event *event = file->private_data; struct perf_event_context *ctx = event->ctx; + file->private_data = NULL; + WARN_ON_ONCE(ctx->parent_ctx); mutex_lock(&ctx->mutex); perf_event_remove_from_context(event); @@ -1729,19 +1724,6 @@ int perf_event_release_kernel(struct perf_event *event) return 0; } -EXPORT_SYMBOL_GPL(perf_event_release_kernel); - -/* - * Called when the last reference to the file is gone. - */ -static int perf_release(struct inode *inode, struct file *file) -{ - struct perf_event *event = file->private_data; - - file->private_data = NULL; - - return perf_event_release_kernel(event); -} static int perf_event_read_size(struct perf_event *event) { @@ -1768,94 +1750,91 @@ static int perf_event_read_size(struct perf_event *event) return size; } -u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running) +static u64 perf_event_read_value(struct perf_event *event) { struct perf_event *child; u64 total = 0; - *enabled = 0; - *running = 0; - - mutex_lock(&event->child_mutex); total += perf_event_read(event); - *enabled += event->total_time_enabled + - atomic64_read(&event->child_total_time_enabled); - *running += event->total_time_running + - atomic64_read(&event->child_total_time_running); - - list_for_each_entry(child, &event->child_list, child_list) { + list_for_each_entry(child, &event->child_list, child_list) total += perf_event_read(child); - *enabled += child->total_time_enabled; - *running += child->total_time_running; - } - mutex_unlock(&event->child_mutex); return total; } -EXPORT_SYMBOL_GPL(perf_event_read_value); + +static int perf_event_read_entry(struct perf_event *event, + u64 read_format, char __user *buf) +{ + int n = 0, count = 0; + u64 values[2]; + + values[n++] = perf_event_read_value(event); + if (read_format & PERF_FORMAT_ID) + values[n++] = primary_event_id(event); + + count = n * sizeof(u64); + + if (copy_to_user(buf, values, count)) + return -EFAULT; + + return count; +} static int perf_event_read_group(struct perf_event *event, u64 read_format, char __user *buf) { struct perf_event *leader = event->group_leader, *sub; - int n = 0, size = 0, ret = -EFAULT; - struct perf_event_context *ctx = leader->ctx; - u64 values[5]; - u64 count, enabled, running; - - mutex_lock(&ctx->mutex); - count = perf_event_read_value(leader, &enabled, &running); + int n = 0, size = 0, err = -EFAULT; + u64 values[3]; values[n++] = 1 + leader->nr_siblings; - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = enabled; - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = running; - values[n++] = count; - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_event_id(leader); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = leader->total_time_enabled + + atomic64_read(&leader->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = leader->total_time_running + + atomic64_read(&leader->child_total_time_running); + } size = n * sizeof(u64); if (copy_to_user(buf, values, size)) - goto unlock; - - ret = size; - - list_for_each_entry(sub, &leader->sibling_list, group_entry) { - n = 0; + return -EFAULT; - values[n++] = perf_event_read_value(sub, &enabled, &running); - if (read_format & PERF_FORMAT_ID) - values[n++] = primary_event_id(sub); + err = perf_event_read_entry(leader, read_format, buf + size); + if (err < 0) + return err; - size = n * sizeof(u64); + size += err; - if (copy_to_user(buf + ret, values, size)) { - ret = -EFAULT; - goto unlock; - } + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + err = perf_event_read_entry(sub, read_format, + buf + size); + if (err < 0) + return err; - ret += size; + size += err; } -unlock: - mutex_unlock(&ctx->mutex); - return ret; + return size; } static int perf_event_read_one(struct perf_event *event, u64 read_format, char __user *buf) { - u64 enabled, running; u64 values[4]; int n = 0; - values[n++] = perf_event_read_value(event, &enabled, &running); - if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) - values[n++] = enabled; - if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) - values[n++] = running; + values[n++] = perf_event_read_value(event); + if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { + values[n++] = event->total_time_enabled + + atomic64_read(&event->child_total_time_enabled); + } + if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { + values[n++] = event->total_time_running + + atomic64_read(&event->child_total_time_running); + } if (read_format & PERF_FORMAT_ID) values[n++] = primary_event_id(event); @@ -1886,10 +1865,12 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count) return -ENOSPC; WARN_ON_ONCE(event->ctx->parent_ctx); + mutex_lock(&event->child_mutex); if (read_format & PERF_FORMAT_GROUP) ret = perf_event_read_group(event, read_format, buf); else ret = perf_event_read_one(event, read_format, buf); + mutex_unlock(&event->child_mutex); return ret; } @@ -2334,7 +2315,7 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data) } if (!data->watermark) - data->watermark = max_size / 2; + data->watermark = max_t(long, PAGE_SIZE, max_size / 2); rcu_assign_pointer(event->data, data); @@ -3264,10 +3245,15 @@ static void perf_event_task_ctx(struct perf_event_context *ctx, { struct perf_event *event; + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_task_match(event)) perf_event_task_output(event, task_event); } + rcu_read_unlock(); } static void perf_event_task_event(struct perf_task_event *task_event) @@ -3275,11 +3261,11 @@ static void perf_event_task_event(struct perf_task_event *task_event) struct perf_cpu_context *cpuctx; struct perf_event_context *ctx = task_event->task_ctx; - rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_task_ctx(&cpuctx->ctx, task_event); put_cpu_var(perf_cpu_context); + rcu_read_lock(); if (!ctx) ctx = rcu_dereference(task_event->task->perf_event_ctxp); if (ctx) @@ -3371,10 +3357,15 @@ static void perf_event_comm_ctx(struct perf_event_context *ctx, { struct perf_event *event; + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_comm_match(event)) perf_event_comm_output(event, comm_event); } + rcu_read_unlock(); } static void perf_event_comm_event(struct perf_comm_event *comm_event) @@ -3385,7 +3376,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) char comm[TASK_COMM_LEN]; memset(comm, 0, sizeof(comm)); - strlcpy(comm, comm_event->task->comm, sizeof(comm)); + strncpy(comm, comm_event->task->comm, sizeof(comm)); size = ALIGN(strlen(comm)+1, sizeof(u64)); comm_event->comm = comm; @@ -3393,11 +3384,11 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) comm_event->event_id.header.size = sizeof(comm_event->event_id) + size; - rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_comm_ctx(&cpuctx->ctx, comm_event); put_cpu_var(perf_cpu_context); + rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. @@ -3490,10 +3481,15 @@ static void perf_event_mmap_ctx(struct perf_event_context *ctx, { struct perf_event *event; + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_event_mmap_match(event, mmap_event)) perf_event_mmap_output(event, mmap_event); } + rcu_read_unlock(); } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) @@ -3549,11 +3545,11 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) mmap_event->event_id.header.size = sizeof(mmap_event->event_id) + size; - rcu_read_lock(); cpuctx = &get_cpu_var(perf_cpu_context); perf_event_mmap_ctx(&cpuctx->ctx, mmap_event); put_cpu_var(perf_cpu_context); + rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. @@ -3692,11 +3688,7 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, perf_event_disable(event); } - if (event->overflow_handler) - event->overflow_handler(event, nmi, data, regs); - else - perf_event_output(event, nmi, data, regs); - + perf_event_output(event, nmi, data, regs); return ret; } @@ -3741,16 +3733,16 @@ static u64 perf_swevent_set_period(struct perf_event *event) return nr; } -static void perf_swevent_overflow(struct perf_event *event, u64 overflow, +static void perf_swevent_overflow(struct perf_event *event, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { struct hw_perf_event *hwc = &event->hw; int throttle = 0; + u64 overflow; data->period = event->hw.last_period; - if (!overflow) - overflow = perf_swevent_set_period(event); + overflow = perf_swevent_set_period(event); if (hwc->interrupts == MAX_INTERRUPTS) return; @@ -3783,19 +3775,14 @@ static void perf_swevent_add(struct perf_event *event, u64 nr, atomic64_add(nr, &event->count); - if (!regs) - return; - if (!hwc->sample_period) return; - if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq) - return perf_swevent_overflow(event, 1, nmi, data, regs); - - if (atomic64_add_negative(nr, &hwc->period_left)) + if (!regs) return; - perf_swevent_overflow(event, 0, nmi, data, regs); + if (!atomic64_add_negative(nr, &hwc->period_left)) + perf_swevent_overflow(event, nmi, data, regs); } static int perf_swevent_is_counting(struct perf_event *event) @@ -3831,20 +3818,6 @@ static int perf_swevent_is_counting(struct perf_event *event) static int perf_tp_event_match(struct perf_event *event, struct perf_sample_data *data); -static int perf_exclude_event(struct perf_event *event, - struct pt_regs *regs) -{ - if (regs) { - if (event->attr.exclude_user && user_mode(regs)) - return 1; - - if (event->attr.exclude_kernel && !user_mode(regs)) - return 1; - } - - return 0; -} - static int perf_swevent_match(struct perf_event *event, enum perf_type_id type, u32 event_id, @@ -3856,12 +3829,16 @@ static int perf_swevent_match(struct perf_event *event, if (event->attr.type != type) return 0; - if (event->attr.config != event_id) return 0; - if (perf_exclude_event(event, regs)) - return 0; + if (regs) { + if (event->attr.exclude_user && user_mode(regs)) + return 0; + + if (event->attr.exclude_kernel && !user_mode(regs)) + return 0; + } if (event->attr.type == PERF_TYPE_TRACEPOINT && !perf_tp_event_match(event, data)) @@ -3878,59 +3855,49 @@ static void perf_swevent_ctx_event(struct perf_event_context *ctx, { struct perf_event *event; + if (system_state != SYSTEM_RUNNING || list_empty(&ctx->event_list)) + return; + + rcu_read_lock(); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (perf_swevent_match(event, type, event_id, data, regs)) perf_swevent_add(event, nr, nmi, data, regs); } + rcu_read_unlock(); } -int perf_swevent_get_recursion_context(void) +static int *perf_swevent_recursion_context(struct perf_cpu_context *cpuctx) { - struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); - int rctx; - if (in_nmi()) - rctx = 3; - else if (in_irq()) - rctx = 2; - else if (in_softirq()) - rctx = 1; - else - rctx = 0; - - if (cpuctx->recursion[rctx]) { - put_cpu_var(perf_cpu_context); - return -1; - } + return &cpuctx->recursion[3]; - cpuctx->recursion[rctx]++; - barrier(); + if (in_irq()) + return &cpuctx->recursion[2]; - return rctx; -} -EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context); + if (in_softirq()) + return &cpuctx->recursion[1]; -void perf_swevent_put_recursion_context(int rctx) -{ - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); - barrier(); - cpuctx->recursion[rctx]++; - put_cpu_var(perf_cpu_context); + return &cpuctx->recursion[0]; } -EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); static void do_perf_sw_event(enum perf_type_id type, u32 event_id, u64 nr, int nmi, struct perf_sample_data *data, struct pt_regs *regs) { - struct perf_cpu_context *cpuctx; + struct perf_cpu_context *cpuctx = &get_cpu_var(perf_cpu_context); + int *recursion = perf_swevent_recursion_context(cpuctx); struct perf_event_context *ctx; - cpuctx = &__get_cpu_var(perf_cpu_context); - rcu_read_lock(); + if (*recursion) + goto out; + + (*recursion)++; + barrier(); + perf_swevent_ctx_event(&cpuctx->ctx, type, event_id, nr, nmi, data, regs); + rcu_read_lock(); /* * doesn't really matter which of the child contexts the * events ends up in. @@ -3939,24 +3906,23 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id, if (ctx) perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs); rcu_read_unlock(); + + barrier(); + (*recursion)--; + +out: + put_cpu_var(perf_cpu_context); } void __perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr) { - struct perf_sample_data data; - int rctx; - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - return; - - data.addr = addr; - data.raw = NULL; - - do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, &data, regs); + struct perf_sample_data data = { + .addr = addr, + }; - perf_swevent_put_recursion_context(rctx); + do_perf_sw_event(PERF_TYPE_SOFTWARE, event_id, nr, nmi, + &data, regs); } static void perf_swevent_read(struct perf_event *event) @@ -4179,7 +4145,6 @@ void perf_tp_event(int event_id, u64 addr, u64 count, void *record, if (!regs) regs = task_pt_regs(current); - /* Trace events already protected against recursion */ do_perf_sw_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data, regs); } @@ -4266,57 +4231,6 @@ static void perf_event_free_filter(struct perf_event *event) #endif /* CONFIG_EVENT_PROFILE */ -#ifdef CONFIG_HAVE_HW_BREAKPOINT -static void bp_perf_event_destroy(struct perf_event *event) -{ - release_bp_slot(event); -} - -static const struct pmu *bp_perf_event_init(struct perf_event *bp) -{ - int err; - /* - * The breakpoint is already filled if we haven't created the counter - * through perf syscall - * FIXME: manage to get trigerred to NULL if it comes from syscalls - */ - if (!bp->callback) - err = register_perf_hw_breakpoint(bp); - else - err = __register_perf_hw_breakpoint(bp); - if (err) - return ERR_PTR(err); - - bp->destroy = bp_perf_event_destroy; - - return &perf_ops_bp; -} - -void perf_bp_event(struct perf_event *bp, void *data) -{ - struct perf_sample_data sample; - struct pt_regs *regs = data; - - sample.addr = bp->attr.bp_addr; - - if (!perf_exclude_event(bp, regs)) - perf_swevent_add(bp, 1, 1, &sample, regs); -} -#else -static void bp_perf_event_destroy(struct perf_event *event) -{ -} - -static const struct pmu *bp_perf_event_init(struct perf_event *bp) -{ - return NULL; -} - -void perf_bp_event(struct perf_event *bp, void *regs) -{ -} -#endif - atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX]; static void sw_perf_event_destroy(struct perf_event *event) @@ -4383,7 +4297,6 @@ perf_event_alloc(struct perf_event_attr *attr, struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, - perf_callback_t callback, gfp_t gfpflags) { const struct pmu *pmu; @@ -4426,11 +4339,6 @@ perf_event_alloc(struct perf_event_attr *attr, event->state = PERF_EVENT_STATE_INACTIVE; - if (!callback && parent_event) - callback = parent_event->callback; - - event->callback = callback; - if (attr->disabled) event->state = PERF_EVENT_STATE_OFF; @@ -4465,11 +4373,6 @@ perf_event_alloc(struct perf_event_attr *attr, pmu = tp_perf_event_init(event); break; - case PERF_TYPE_BREAKPOINT: - pmu = bp_perf_event_init(event); - break; - - default: break; } @@ -4712,7 +4615,7 @@ SYSCALL_DEFINE5(perf_event_open, } event = perf_event_alloc(&attr, cpu, ctx, group_leader, - NULL, NULL, GFP_KERNEL); + NULL, GFP_KERNEL); err = PTR_ERR(event); if (IS_ERR(event)) goto err_put_context; @@ -4760,58 +4663,6 @@ SYSCALL_DEFINE5(perf_event_open, return err; } -/** - * perf_event_create_kernel_counter - * - * @attr: attributes of the counter to create - * @cpu: cpu in which the counter is bound - * @pid: task to profile - */ -struct perf_event * -perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, perf_callback_t callback) -{ - struct perf_event *event; - struct perf_event_context *ctx; - int err; - - /* - * Get the target context (task or percpu): - */ - - ctx = find_get_context(pid, cpu); - if (IS_ERR(ctx)) - return NULL; - - event = perf_event_alloc(attr, cpu, ctx, NULL, - NULL, callback, GFP_KERNEL); - err = PTR_ERR(event); - if (IS_ERR(event)) - goto err_put_context; - - event->filp = NULL; - WARN_ON_ONCE(ctx->parent_ctx); - mutex_lock(&ctx->mutex); - perf_install_in_context(ctx, event, cpu); - ++ctx->generation; - mutex_unlock(&ctx->mutex); - - event->owner = current; - get_task_struct(current); - mutex_lock(¤t->perf_event_mutex); - list_add_tail(&event->owner_entry, ¤t->perf_event_list); - mutex_unlock(¤t->perf_event_mutex); - - return event; - -err_put_context: - if (err < 0) - put_ctx(ctx); - - return NULL; -} -EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter); - /* * inherit a event from parent task to child task: */ @@ -4837,7 +4688,7 @@ inherit_event(struct perf_event *parent_event, child_event = perf_event_alloc(&parent_event->attr, parent_event->cpu, child_ctx, group_leader, parent_event, - NULL, GFP_KERNEL); + GFP_KERNEL); if (IS_ERR(child_event)) return child_event; get_ctx(child_ctx); @@ -4855,8 +4706,6 @@ inherit_event(struct perf_event *parent_event, if (parent_event->attr.freq) child_event->hw.sample_period = parent_event->hw.sample_period; - child_event->overflow_handler = parent_event->overflow_handler; - /* * Link it up in the child's context: */ @@ -4946,6 +4795,7 @@ __perf_event_exit_task(struct perf_event *child_event, { struct perf_event *parent_event; + update_event_times(child_event); perf_event_remove_from_context(child_event); parent_event = child_event->parent; @@ -4997,7 +4847,6 @@ void perf_event_exit_task(struct task_struct *child) * the events from it. */ unclone_ctx(child_ctx); - update_context_time(child_ctx); spin_unlock_irqrestore(&child_ctx->lock, flags); /* diff --git a/trunk/kernel/trace/Kconfig b/trunk/kernel/trace/Kconfig index d006554888dc..f05671609a89 100644 --- a/trunk/kernel/trace/Kconfig +++ b/trunk/kernel/trace/Kconfig @@ -339,27 +339,6 @@ config POWER_TRACER power management decisions, specifically the C-state and P-state behavior. -config KSYM_TRACER - bool "Trace read and write access on kernel memory locations" - depends on HAVE_HW_BREAKPOINT - select TRACING - help - This tracer helps find read and write operations on any given kernel - symbol i.e. /proc/kallsyms. - -config PROFILE_KSYM_TRACER - bool "Profile all kernel memory accesses on 'watched' variables" - depends on KSYM_TRACER - help - This tracer profiles kernel accesses on variables watched through the - ksym tracer ftrace plugin. Depending upon the hardware, all read - and write operations on kernel variables can be monitored for - accesses. - - The results will be displayed in: - /debugfs/tracing/profile_ksym - - Say N if unsure. config STACK_TRACER bool "Trace max stack" diff --git a/trunk/kernel/trace/Makefile b/trunk/kernel/trace/Makefile index cd9ecd89ec77..edc3a3cca1a1 100644 --- a/trunk/kernel/trace/Makefile +++ b/trunk/kernel/trace/Makefile @@ -54,7 +54,6 @@ obj-$(CONFIG_FTRACE_SYSCALLS) += trace_syscalls.o obj-$(CONFIG_EVENT_PROFILE) += trace_event_profile.o obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o -obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o obj-$(CONFIG_EVENT_TRACING) += power-traces.o libftrace-y := ftrace.o diff --git a/trunk/kernel/trace/trace.h b/trunk/kernel/trace/trace.h index 4da6ede74401..b4e4212e66d7 100644 --- a/trunk/kernel/trace/trace.h +++ b/trunk/kernel/trace/trace.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include @@ -38,7 +37,6 @@ enum trace_type { TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_BLK, - TRACE_KSYM, __TRACE_LAST_TYPE, }; @@ -234,7 +232,6 @@ extern void __ftrace_bad_type(void); TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ TRACE_KMEM_FREE); \ - IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\ __ftrace_bad_type(); \ } while (0) @@ -390,8 +387,6 @@ int register_tracer(struct tracer *type); void unregister_tracer(struct tracer *type); int is_tracing_stopped(void); -extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr); - extern unsigned long nsecs_to_usecs(unsigned long nsecs); #ifdef CONFIG_TRACER_MAX_TRACE @@ -466,8 +461,6 @@ extern int trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr); extern int trace_selftest_startup_hw_branches(struct tracer *trace, struct trace_array *tr); -extern int trace_selftest_startup_ksym(struct tracer *trace, - struct trace_array *tr); #endif /* CONFIG_FTRACE_STARTUP_TEST */ extern void *head_page(struct trace_array_cpu *data); diff --git a/trunk/kernel/trace/trace_entries.h b/trunk/kernel/trace/trace_entries.h index c16a08f399df..ead3d724599d 100644 --- a/trunk/kernel/trace/trace_entries.h +++ b/trunk/kernel/trace/trace_entries.h @@ -364,19 +364,3 @@ FTRACE_ENTRY(kmem_free, kmemtrace_free_entry, F_printk("type:%u call_site:%lx ptr:%p", __entry->type_id, __entry->call_site, __entry->ptr) ); - -FTRACE_ENTRY(ksym_trace, ksym_trace_entry, - - TRACE_KSYM, - - F_STRUCT( - __field( unsigned long, ip ) - __field( unsigned char, type ) - __array( char , cmd, TASK_COMM_LEN ) - __field( unsigned long, addr ) - ), - - F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s", - (void *)__entry->ip, (unsigned int)__entry->type, - (void *)__entry->addr, __entry->cmd) -); diff --git a/trunk/kernel/trace/trace_event_profile.c b/trunk/kernel/trace/trace_event_profile.c index d9c60f80aa0d..e0d351b01f5a 100644 --- a/trunk/kernel/trace/trace_event_profile.c +++ b/trunk/kernel/trace/trace_event_profile.c @@ -9,33 +9,31 @@ #include "trace.h" -char *perf_trace_buf; +struct perf_trace_buf *perf_trace_buf; EXPORT_SYMBOL_GPL(perf_trace_buf); -char *perf_trace_buf_nmi; +struct perf_trace_buf *perf_trace_buf_nmi; EXPORT_SYMBOL_GPL(perf_trace_buf_nmi); -typedef typeof(char [FTRACE_MAX_PROFILE_SIZE]) perf_trace_t ; - /* Count the events in use (per event id, not per instance) */ static int total_profile_count; static int ftrace_profile_enable_event(struct ftrace_event_call *event) { - char *buf; + struct perf_trace_buf *buf; int ret = -ENOMEM; if (atomic_inc_return(&event->profile_count)) return 0; if (!total_profile_count) { - buf = (char *)alloc_percpu(perf_trace_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf; rcu_assign_pointer(perf_trace_buf, buf); - buf = (char *)alloc_percpu(perf_trace_t); + buf = alloc_percpu(struct perf_trace_buf); if (!buf) goto fail_buf_nmi; @@ -81,7 +79,7 @@ int ftrace_profile_enable(int event_id) static void ftrace_profile_disable_event(struct ftrace_event_call *event) { - char *buf, *nmi_buf; + struct perf_trace_buf *buf, *nmi_buf; if (!atomic_add_negative(-1, &event->profile_count)) return; diff --git a/trunk/kernel/trace/trace_kprobe.c b/trunk/kernel/trace/trace_kprobe.c index 79ce6a2bd74f..3696476f307d 100644 --- a/trunk/kernel/trace/trace_kprobe.c +++ b/trunk/kernel/trace/trace_kprobe.c @@ -1208,12 +1208,11 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, struct trace_probe *tp = container_of(kp, struct trace_probe, rp.kp); struct ftrace_event_call *call = &tp->call; struct kprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; - char *trace_buf; char *raw_data; - int rctx; pc = preempt_count(); __size = SIZEOF_KPROBE_TRACE_ENTRY(tp->nr_args); @@ -1228,11 +1227,6 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, * This also protects the rcu read side */ local_irq_save(irq_flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - __cpu = smp_processor_id(); if (in_nmi()) @@ -1243,7 +1237,18 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, if (!trace_buf) goto end; - raw_data = per_cpu_ptr(trace_buf, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1258,9 +1263,9 @@ static __kprobes int kprobe_profile_func(struct kprobe *kp, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); end_recursion: + trace_buf->recursion--; +end: local_irq_restore(irq_flags); return 0; @@ -1273,12 +1278,11 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, struct trace_probe *tp = container_of(ri->rp, struct trace_probe, rp); struct ftrace_event_call *call = &tp->call; struct kretprobe_trace_entry *entry; + struct perf_trace_buf *trace_buf; struct trace_entry *ent; int size, __size, i, pc, __cpu; unsigned long irq_flags; - char *trace_buf; char *raw_data; - int rctx; pc = preempt_count(); __size = SIZEOF_KRETPROBE_TRACE_ENTRY(tp->nr_args); @@ -1293,11 +1297,6 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, * This also protects the rcu read side */ local_irq_save(irq_flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - __cpu = smp_processor_id(); if (in_nmi()) @@ -1308,7 +1307,18 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, if (!trace_buf) goto end; - raw_data = per_cpu_ptr(trace_buf, __cpu); + trace_buf = per_cpu_ptr(trace_buf, __cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* Zero dead bytes from alignment to avoid buffer leak to userspace */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -1324,9 +1334,9 @@ static __kprobes int kretprobe_profile_func(struct kretprobe_instance *ri, entry->args[i] = call_fetch(&tp->args[i].fetch, regs); perf_tp_event(call->id, entry->ret_ip, 1, entry, size); -end: - perf_swevent_put_recursion_context(rctx); end_recursion: + trace_buf->recursion--; +end: local_irq_restore(irq_flags); return 0; diff --git a/trunk/kernel/trace/trace_ksym.c b/trunk/kernel/trace/trace_ksym.c deleted file mode 100644 index 11935b53a6cb..000000000000 --- a/trunk/kernel/trace/trace_ksym.c +++ /dev/null @@ -1,554 +0,0 @@ -/* - * trace_ksym.c - Kernel Symbol Tracer - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * Copyright (C) IBM Corporation, 2009 - */ - -#include -#include -#include -#include -#include -#include - -#include "trace_output.h" -#include "trace_stat.h" -#include "trace.h" - -#include -#include - -/* - * For now, let us restrict the no. of symbols traced simultaneously to number - * of available hardware breakpoint registers. - */ -#define KSYM_TRACER_MAX HBP_NUM - -#define KSYM_TRACER_OP_LEN 3 /* rw- */ - -struct trace_ksym { - struct perf_event **ksym_hbp; - unsigned long ksym_addr; - int type; - int len; -#ifdef CONFIG_PROFILE_KSYM_TRACER - unsigned long counter; -#endif - struct hlist_node ksym_hlist; -}; - -static struct trace_array *ksym_trace_array; - -static unsigned int ksym_filter_entry_count; -static unsigned int ksym_tracing_enabled; - -static HLIST_HEAD(ksym_filter_head); - -static DEFINE_MUTEX(ksym_tracer_mutex); - -#ifdef CONFIG_PROFILE_KSYM_TRACER - -#define MAX_UL_INT 0xffffffff - -void ksym_collect_stats(unsigned long hbp_hit_addr) -{ - struct hlist_node *node; - struct trace_ksym *entry; - - rcu_read_lock(); - hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) { - if ((entry->ksym_addr == hbp_hit_addr) && - (entry->counter <= MAX_UL_INT)) { - entry->counter++; - break; - } - } - rcu_read_unlock(); -} -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - -void ksym_hbp_handler(struct perf_event *hbp, void *data) -{ - struct ring_buffer_event *event; - struct ksym_trace_entry *entry; - struct pt_regs *regs = data; - struct ring_buffer *buffer; - int pc; - - if (!ksym_tracing_enabled) - return; - - buffer = ksym_trace_array->buffer; - - pc = preempt_count(); - - event = trace_buffer_lock_reserve(buffer, TRACE_KSYM, - sizeof(*entry), 0, pc); - if (!event) - return; - - entry = ring_buffer_event_data(event); - entry->ip = instruction_pointer(regs); - entry->type = hw_breakpoint_type(hbp); - entry->addr = hw_breakpoint_addr(hbp); - strlcpy(entry->cmd, current->comm, TASK_COMM_LEN); - -#ifdef CONFIG_PROFILE_KSYM_TRACER - ksym_collect_stats(hw_breakpoint_addr(hbp)); -#endif /* CONFIG_PROFILE_KSYM_TRACER */ - - trace_buffer_unlock_commit(buffer, event, 0, pc); -} - -/* Valid access types are represented as - * - * rw- : Set Read/Write Access Breakpoint - * -w- : Set Write Access Breakpoint - * --- : Clear Breakpoints - * --x : Set Execution Break points (Not available yet) - * - */ -static int ksym_trace_get_access_type(char *str) -{ - int access = 0; - - if (str[0] == 'r') - access |= HW_BREAKPOINT_R; - - if (str[1] == 'w') - access |= HW_BREAKPOINT_W; - - if (str[2] == 'x') - access |= HW_BREAKPOINT_X; - - switch (access) { - case HW_BREAKPOINT_R: - case HW_BREAKPOINT_W: - case HW_BREAKPOINT_W | HW_BREAKPOINT_R: - return access; - default: - return -EINVAL; - } -} - -/* - * There can be several possible malformed requests and we attempt to capture - * all of them. We enumerate some of the rules - * 1. We will not allow kernel symbols with ':' since it is used as a delimiter. - * i.e. multiple ':' symbols disallowed. Possible uses are of the form - * ::. - * 2. No delimiter symbol ':' in the input string - * 3. Spurious operator symbols or symbols not in their respective positions - * 4. :--- i.e. clear breakpoint request when ksym_name not in file - * 5. Kernel symbol not a part of /proc/kallsyms - * 6. Duplicate requests - */ -static int parse_ksym_trace_str(char *input_string, char **ksymname, - unsigned long *addr) -{ - int ret; - - *ksymname = strsep(&input_string, ":"); - *addr = kallsyms_lookup_name(*ksymname); - - /* Check for malformed request: (2), (1) and (5) */ - if ((!input_string) || - (strlen(input_string) != KSYM_TRACER_OP_LEN) || - (*addr == 0)) - return -EINVAL;; - - ret = ksym_trace_get_access_type(input_string); - - return ret; -} - -int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) -{ - struct trace_ksym *entry; - int ret = -ENOMEM; - - if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { - printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" - " new requests for tracing can be accepted now.\n", - KSYM_TRACER_MAX); - return -ENOSPC; - } - - entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - entry->type = op; - entry->ksym_addr = addr; - entry->len = HW_BREAKPOINT_LEN_4; - - ret = -EAGAIN; - entry->ksym_hbp = register_wide_hw_breakpoint(entry->ksym_addr, - entry->len, entry->type, - ksym_hbp_handler, true); - if (IS_ERR(entry->ksym_hbp)) { - entry->ksym_hbp = NULL; - ret = PTR_ERR(entry->ksym_hbp); - } - - if (!entry->ksym_hbp) { - printk(KERN_INFO "ksym_tracer request failed. Try again" - " later!!\n"); - goto err; - } - - hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); - ksym_filter_entry_count++; - - return 0; - -err: - kfree(entry); - - return ret; -} - -static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf, - size_t count, loff_t *ppos) -{ - struct trace_ksym *entry; - struct hlist_node *node; - struct trace_seq *s; - ssize_t cnt = 0; - int ret; - - s = kmalloc(sizeof(*s), GFP_KERNEL); - if (!s) - return -ENOMEM; - trace_seq_init(s); - - mutex_lock(&ksym_tracer_mutex); - - hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - ret = trace_seq_printf(s, "%pS:", (void *)entry->ksym_addr); - if (entry->type == HW_BREAKPOINT_R) - ret = trace_seq_puts(s, "r--\n"); - else if (entry->type == HW_BREAKPOINT_W) - ret = trace_seq_puts(s, "-w-\n"); - else if (entry->type == (HW_BREAKPOINT_W | HW_BREAKPOINT_R)) - ret = trace_seq_puts(s, "rw-\n"); - WARN_ON_ONCE(!ret); - } - - cnt = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); - - mutex_unlock(&ksym_tracer_mutex); - - kfree(s); - - return cnt; -} - -static void __ksym_trace_reset(void) -{ - struct trace_ksym *entry; - struct hlist_node *node, *node1; - - mutex_lock(&ksym_tracer_mutex); - hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, - ksym_hlist) { - unregister_wide_hw_breakpoint(entry->ksym_hbp); - ksym_filter_entry_count--; - hlist_del_rcu(&(entry->ksym_hlist)); - synchronize_rcu(); - kfree(entry); - } - mutex_unlock(&ksym_tracer_mutex); -} - -static ssize_t ksym_trace_filter_write(struct file *file, - const char __user *buffer, - size_t count, loff_t *ppos) -{ - struct trace_ksym *entry; - struct hlist_node *node; - char *input_string, *ksymname = NULL; - unsigned long ksym_addr = 0; - int ret, op, changed = 0; - - input_string = kzalloc(count + 1, GFP_KERNEL); - if (!input_string) - return -ENOMEM; - - if (copy_from_user(input_string, buffer, count)) { - kfree(input_string); - return -EFAULT; - } - input_string[count] = '\0'; - - strstrip(input_string); - - /* - * Clear all breakpoints if: - * 1: echo > ksym_trace_filter - * 2: echo 0 > ksym_trace_filter - * 3: echo "*:---" > ksym_trace_filter - */ - if (!input_string[0] || !strcmp(input_string, "0") || - !strcmp(input_string, "*:---")) { - __ksym_trace_reset(); - kfree(input_string); - return count; - } - - ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr); - if (ret < 0) { - kfree(input_string); - return ret; - } - - mutex_lock(&ksym_tracer_mutex); - - ret = -EINVAL; - hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) { - if (entry->ksym_addr == ksym_addr) { - /* Check for malformed request: (6) */ - if (entry->type != op) - changed = 1; - else - goto out; - break; - } - } - if (changed) { - unregister_wide_hw_breakpoint(entry->ksym_hbp); - entry->type = op; - if (op > 0) { - entry->ksym_hbp = - register_wide_hw_breakpoint(entry->ksym_addr, - entry->len, entry->type, - ksym_hbp_handler, true); - if (IS_ERR(entry->ksym_hbp)) - entry->ksym_hbp = NULL; - if (!entry->ksym_hbp) - goto out; - } - ksym_filter_entry_count--; - hlist_del_rcu(&(entry->ksym_hlist)); - synchronize_rcu(); - kfree(entry); - ret = 0; - goto out; - } else { - /* Check for malformed request: (4) */ - if (op == 0) - goto out; - ret = process_new_ksym_entry(ksymname, op, ksym_addr); - } -out: - mutex_unlock(&ksym_tracer_mutex); - - kfree(input_string); - - if (!ret) - ret = count; - return ret; -} - -static const struct file_operations ksym_tracing_fops = { - .open = tracing_open_generic, - .read = ksym_trace_filter_read, - .write = ksym_trace_filter_write, -}; - -static void ksym_trace_reset(struct trace_array *tr) -{ - ksym_tracing_enabled = 0; - __ksym_trace_reset(); -} - -static int ksym_trace_init(struct trace_array *tr) -{ - int cpu, ret = 0; - - for_each_online_cpu(cpu) - tracing_reset(tr, cpu); - ksym_tracing_enabled = 1; - ksym_trace_array = tr; - - return ret; -} - -static void ksym_trace_print_header(struct seq_file *m) -{ - seq_puts(m, - "# TASK-PID CPU# Symbol " - "Type Function\n"); - seq_puts(m, - "# | | | " - " | |\n"); -} - -static enum print_line_t ksym_trace_output(struct trace_iterator *iter) -{ - struct trace_entry *entry = iter->ent; - struct trace_seq *s = &iter->seq; - struct ksym_trace_entry *field; - char str[KSYM_SYMBOL_LEN]; - int ret; - - if (entry->type != TRACE_KSYM) - return TRACE_TYPE_UNHANDLED; - - trace_assign_type(field, entry); - - ret = trace_seq_printf(s, "%11s-%-5d [%03d] %pS", field->cmd, - entry->pid, iter->cpu, (char *)field->addr); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - switch (field->type) { - case HW_BREAKPOINT_R: - ret = trace_seq_printf(s, " R "); - break; - case HW_BREAKPOINT_W: - ret = trace_seq_printf(s, " W "); - break; - case HW_BREAKPOINT_R | HW_BREAKPOINT_W: - ret = trace_seq_printf(s, " RW "); - break; - default: - return TRACE_TYPE_PARTIAL_LINE; - } - - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - sprint_symbol(str, field->ip); - ret = trace_seq_printf(s, "%s\n", str); - if (!ret) - return TRACE_TYPE_PARTIAL_LINE; - - return TRACE_TYPE_HANDLED; -} - -struct tracer ksym_tracer __read_mostly = -{ - .name = "ksym_tracer", - .init = ksym_trace_init, - .reset = ksym_trace_reset, -#ifdef CONFIG_FTRACE_SELFTEST - .selftest = trace_selftest_startup_ksym, -#endif - .print_header = ksym_trace_print_header, - .print_line = ksym_trace_output -}; - -__init static int init_ksym_trace(void) -{ - struct dentry *d_tracer; - struct dentry *entry; - - d_tracer = tracing_init_dentry(); - ksym_filter_entry_count = 0; - - entry = debugfs_create_file("ksym_trace_filter", 0644, d_tracer, - NULL, &ksym_tracing_fops); - if (!entry) - pr_warning("Could not create debugfs " - "'ksym_trace_filter' file\n"); - - return register_tracer(&ksym_tracer); -} -device_initcall(init_ksym_trace); - - -#ifdef CONFIG_PROFILE_KSYM_TRACER -static int ksym_tracer_stat_headers(struct seq_file *m) -{ - seq_puts(m, " Access Type "); - seq_puts(m, " Symbol Counter\n"); - seq_puts(m, " ----------- "); - seq_puts(m, " ------ -------\n"); - return 0; -} - -static int ksym_tracer_stat_show(struct seq_file *m, void *v) -{ - struct hlist_node *stat = v; - struct trace_ksym *entry; - int access_type = 0; - char fn_name[KSYM_NAME_LEN]; - - entry = hlist_entry(stat, struct trace_ksym, ksym_hlist); - - access_type = entry->type; - - switch (access_type) { - case HW_BREAKPOINT_R: - seq_puts(m, " R "); - break; - case HW_BREAKPOINT_W: - seq_puts(m, " W "); - break; - case HW_BREAKPOINT_R | HW_BREAKPOINT_W: - seq_puts(m, " RW "); - break; - default: - seq_puts(m, " NA "); - } - - if (lookup_symbol_name(entry->ksym_addr, fn_name) >= 0) - seq_printf(m, " %-36s", fn_name); - else - seq_printf(m, " %-36s", ""); - seq_printf(m, " %15lu\n", entry->counter); - - return 0; -} - -static void *ksym_tracer_stat_start(struct tracer_stat *trace) -{ - return ksym_filter_head.first; -} - -static void * -ksym_tracer_stat_next(void *v, int idx) -{ - struct hlist_node *stat = v; - - return stat->next; -} - -static struct tracer_stat ksym_tracer_stats = { - .name = "ksym_tracer", - .stat_start = ksym_tracer_stat_start, - .stat_next = ksym_tracer_stat_next, - .stat_headers = ksym_tracer_stat_headers, - .stat_show = ksym_tracer_stat_show -}; - -__init static int ksym_tracer_stat_init(void) -{ - int ret; - - ret = register_stat_tracer(&ksym_tracer_stats); - if (ret) { - printk(KERN_WARNING "Warning: could not register " - "ksym tracer stats\n"); - return 1; - } - - return 0; -} -fs_initcall(ksym_tracer_stat_init); -#endif /* CONFIG_PROFILE_KSYM_TRACER */ diff --git a/trunk/kernel/trace/trace_selftest.c b/trunk/kernel/trace/trace_selftest.c index dc98309e839a..d2cdbabb4ead 100644 --- a/trunk/kernel/trace/trace_selftest.c +++ b/trunk/kernel/trace/trace_selftest.c @@ -17,7 +17,6 @@ static inline int trace_valid_entry(struct trace_entry *entry) case TRACE_GRAPH_ENT: case TRACE_GRAPH_RET: case TRACE_HW_BRANCHES: - case TRACE_KSYM: return 1; } return 0; @@ -809,57 +808,3 @@ trace_selftest_startup_hw_branches(struct tracer *trace, return ret; } #endif /* CONFIG_HW_BRANCH_TRACER */ - -#ifdef CONFIG_KSYM_TRACER -static int ksym_selftest_dummy; - -int -trace_selftest_startup_ksym(struct tracer *trace, struct trace_array *tr) -{ - unsigned long count; - int ret; - - /* start the tracing */ - ret = tracer_init(trace, tr); - if (ret) { - warn_failed_init_tracer(trace, ret); - return ret; - } - - ksym_selftest_dummy = 0; - /* Register the read-write tracing request */ - - ret = process_new_ksym_entry("ksym_selftest_dummy", - HW_BREAKPOINT_R | HW_BREAKPOINT_W, - (unsigned long)(&ksym_selftest_dummy)); - - if (ret < 0) { - printk(KERN_CONT "ksym_trace read-write startup test failed\n"); - goto ret_path; - } - /* Perform a read and a write operation over the dummy variable to - * trigger the tracer - */ - if (ksym_selftest_dummy == 0) - ksym_selftest_dummy++; - - /* stop the tracing. */ - tracing_stop(); - /* check the trace buffer */ - ret = trace_test_buffer(tr, &count); - trace->reset(tr); - tracing_start(); - - /* read & write operations - one each is performed on the dummy variable - * triggering two entries in the trace buffer - */ - if (!ret && count != 2) { - printk(KERN_CONT "Ksym tracer startup test failed"); - ret = -1; - } - -ret_path: - return ret; -} -#endif /* CONFIG_KSYM_TRACER */ - diff --git a/trunk/kernel/trace/trace_syscalls.c b/trunk/kernel/trace/trace_syscalls.c index 9189cbe86079..51213b0aa81b 100644 --- a/trunk/kernel/trace/trace_syscalls.c +++ b/trunk/kernel/trace/trace_syscalls.c @@ -477,12 +477,11 @@ static int sys_prof_refcount_exit; static void prof_syscall_enter(struct pt_regs *regs, long id) { struct syscall_metadata *sys_data; + struct perf_trace_buf *trace_buf; struct syscall_trace_enter *rec; unsigned long flags; - char *trace_buf; char *raw_data; int syscall_nr; - int rctx; int size; int cpu; @@ -506,18 +505,28 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - cpu = smp_processor_id(); - trace_buf = rcu_dereference(perf_trace_buf); + if (in_nmi()) + trace_buf = rcu_dereference(perf_trace_buf_nmi); + else + trace_buf = rcu_dereference(perf_trace_buf); if (!trace_buf) goto end; - raw_data = per_cpu_ptr(trace_buf, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -530,9 +539,9 @@ static void prof_syscall_enter(struct pt_regs *regs, long id) (unsigned long *)&rec->args); perf_tp_event(sys_data->enter_id, 0, 1, rec, size); -end: - perf_swevent_put_recursion_context(rctx); end_recursion: + trace_buf->recursion--; +end: local_irq_restore(flags); } @@ -579,11 +588,10 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) { struct syscall_metadata *sys_data; struct syscall_trace_exit *rec; + struct perf_trace_buf *trace_buf; unsigned long flags; int syscall_nr; - char *trace_buf; char *raw_data; - int rctx; int size; int cpu; @@ -609,19 +617,28 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) /* Protect the per cpu buffer, begin the rcu read side */ local_irq_save(flags); - - rctx = perf_swevent_get_recursion_context(); - if (rctx < 0) - goto end_recursion; - cpu = smp_processor_id(); - trace_buf = rcu_dereference(perf_trace_buf); + if (in_nmi()) + trace_buf = rcu_dereference(perf_trace_buf_nmi); + else + trace_buf = rcu_dereference(perf_trace_buf); if (!trace_buf) goto end; - raw_data = per_cpu_ptr(trace_buf, cpu); + trace_buf = per_cpu_ptr(trace_buf, cpu); + + if (trace_buf->recursion++) + goto end_recursion; + + /* + * Make recursion update visible before entering perf_tp_event + * so that we protect from perf recursions. + */ + barrier(); + + raw_data = trace_buf->buf; /* zero the dead bytes from align to not leak stack to user */ *(u64 *)(&raw_data[size - sizeof(u64)]) = 0ULL; @@ -635,9 +652,9 @@ static void prof_syscall_exit(struct pt_regs *regs, long ret) perf_tp_event(sys_data->exit_id, 0, 1, rec, size); -end: - perf_swevent_put_recursion_context(rctx); end_recursion: + trace_buf->recursion--; +end: local_irq_restore(flags); } diff --git a/trunk/samples/Kconfig b/trunk/samples/Kconfig index e4be84ac3d38..b92bde3c6a89 100644 --- a/trunk/samples/Kconfig +++ b/trunk/samples/Kconfig @@ -40,11 +40,5 @@ config SAMPLE_KRETPROBES default m depends on SAMPLE_KPROBES && KRETPROBES -config SAMPLE_HW_BREAKPOINT - tristate "Build kernel hardware breakpoint examples -- loadable module only" - depends on HAVE_HW_BREAKPOINT && m - help - This builds kernel hardware breakpoint example modules. - endif # SAMPLES diff --git a/trunk/samples/Makefile b/trunk/samples/Makefile index 0f15e6d77fd6..43343a03b1f4 100644 --- a/trunk/samples/Makefile +++ b/trunk/samples/Makefile @@ -1,4 +1,3 @@ # Makefile for Linux samples code -obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ \ - hw_breakpoint/ +obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ tracepoints/ trace_events/ diff --git a/trunk/samples/hw_breakpoint/Makefile b/trunk/samples/hw_breakpoint/Makefile deleted file mode 100644 index 0f5c31c2fc47..000000000000 --- a/trunk/samples/hw_breakpoint/Makefile +++ /dev/null @@ -1 +0,0 @@ -obj-$(CONFIG_SAMPLE_HW_BREAKPOINT) += data_breakpoint.o diff --git a/trunk/samples/hw_breakpoint/data_breakpoint.c b/trunk/samples/hw_breakpoint/data_breakpoint.c deleted file mode 100644 index 95063818bcf4..000000000000 --- a/trunk/samples/hw_breakpoint/data_breakpoint.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * data_breakpoint.c - Sample HW Breakpoint file to watch kernel data address - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - * - * usage: insmod data_breakpoint.ko ksym= - * - * This file is a kernel module that places a breakpoint over ksym_name kernel - * variable using Hardware Breakpoint register. The corresponding handler which - * prints a backtrace is invoked everytime a write operation is performed on - * that variable. - * - * Copyright (C) IBM Corporation, 2009 - * - * Author: K.Prasad - */ -#include /* Needed by all modules */ -#include /* Needed for KERN_INFO */ -#include /* Needed for the macros */ -#include - -#include -#include - -struct perf_event **sample_hbp; - -static char ksym_name[KSYM_NAME_LEN] = "pid_max"; -module_param_string(ksym, ksym_name, KSYM_NAME_LEN, S_IRUGO); -MODULE_PARM_DESC(ksym, "Kernel symbol to monitor; this module will report any" - " write operations on the kernel symbol"); - -static void sample_hbp_handler(struct perf_event *temp, void *data) -{ - printk(KERN_INFO "%s value is changed\n", ksym_name); - dump_stack(); - printk(KERN_INFO "Dump stack from sample_hbp_handler\n"); -} - -static int __init hw_break_module_init(void) -{ - int ret; - unsigned long addr; - - addr = kallsyms_lookup_name(ksym_name); - - sample_hbp = register_wide_hw_breakpoint(addr, HW_BREAKPOINT_LEN_4, - HW_BREAKPOINT_W | HW_BREAKPOINT_R, - sample_hbp_handler, true); - if (IS_ERR(sample_hbp)) { - ret = PTR_ERR(sample_hbp); - goto fail; - } else if (!sample_hbp) { - ret = -EINVAL; - goto fail; - } - - printk(KERN_INFO "HW Breakpoint for %s write installed\n", ksym_name); - - return 0; - -fail: - printk(KERN_INFO "Breakpoint registration failed\n"); - - return ret; -} - -static void __exit hw_break_module_exit(void) -{ - unregister_wide_hw_breakpoint(sample_hbp); - printk(KERN_INFO "HW Breakpoint for %s write uninstalled\n", ksym_name); -} - -module_init(hw_break_module_init); -module_exit(hw_break_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("K.Prasad"); -MODULE_DESCRIPTION("ksym breakpoint"); diff --git a/trunk/tools/perf/Documentation/perf-kmem.txt b/trunk/tools/perf/Documentation/perf-kmem.txt deleted file mode 100644 index 44b0ce35c28a..000000000000 --- a/trunk/tools/perf/Documentation/perf-kmem.txt +++ /dev/null @@ -1,44 +0,0 @@ -perf-kmem(1) -============== - -NAME ----- -perf-kmem - Tool to trace/measure kernel memory(slab) properties - -SYNOPSIS --------- -[verse] -'perf kmem' {record} [] - -DESCRIPTION ------------ -There's two variants of perf kmem: - - 'perf kmem record ' to record the kmem events - of an arbitrary workload. - - 'perf kmem' to report kernel memory statistics. - -OPTIONS -------- --i :: ---input=:: - Select the input file (default: perf.data) - ---stat=:: - Select per callsite or per allocation statistics - --s :: ---sort=:: - Sort the output (default: frag,hit,bytes) - --l :: ---line=:: - Print n lines only - ---raw-ip:: - Print raw ip instead of symbol - -SEE ALSO --------- -linkperf:perf-record[1] diff --git a/trunk/tools/perf/Documentation/perf-record.txt b/trunk/tools/perf/Documentation/perf-record.txt index fc46c0b40f6e..0ff23de9e453 100644 --- a/trunk/tools/perf/Documentation/perf-record.txt +++ b/trunk/tools/perf/Documentation/perf-record.txt @@ -26,19 +26,11 @@ OPTIONS -e:: --event=:: - Select the PMU event. Selection can be: + Select the PMU event. Selection can be a symbolic event name + (use 'perf list' to list all events) or a raw PMU + event (eventsel+umask) in the form of rNNN where NNN is a + hexadecimal event descriptor. - - a symbolic event name (use 'perf list' to list all events) - - - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. - - - a hardware breakpoint event in the form of '\mem:addr[:access]' - where addr is the address in memory you want to break in. - Access is the memory access type (read, write, execute) it can - be passed as follows: '\mem:addr[:[r][w][x]]'. - If you want to profile read-write accesses in 0x1000, just set - 'mem:0x1000:rw'. -a:: System-wide collection. diff --git a/trunk/tools/perf/Makefile b/trunk/tools/perf/Makefile index 3ef6621bf6cd..53e663a5fa2f 100644 --- a/trunk/tools/perf/Makefile +++ b/trunk/tools/perf/Makefile @@ -2,7 +2,6 @@ all:: # Define V=1 to have a more verbose compile. -# Define V=2 to have an even more verbose compile. # # Define SNPRINTF_RETURNS_BOGUS if your are on a system which snprintf() # or vsnprintf() return -1 instead of number of characters which would @@ -148,8 +147,6 @@ all:: # broken, or spawning external process is slower than built-in grep perf has). # # Define LDFLAGS=-static to build a static binary. -# -# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds. PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE @$(SHELL_PATH) util/PERF-VERSION-GEN @@ -162,6 +159,22 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') +# +# Add -m32 for cross-builds: +# +ifdef NO_64BIT + MBITS := -m32 +else + # + # If we're on a 64-bit kernel (except ia64), use -m64: + # + ifneq ($(uname_M),ia64) + ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) + MBITS := -m64 + endif + endif +endif + # CFLAGS and LDFLAGS are for the users to override from the command line. # @@ -198,7 +211,7 @@ ifndef PERF_DEBUG CFLAGS_OPTIMIZE = -O6 endif -CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS = $(MBITS) -ggdb3 -Wall -Wextra -std=gnu99 -Werror $(CFLAGS_OPTIMIZE) -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) EXTLIBS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) @@ -250,7 +263,7 @@ PTHREAD_LIBS = -lpthread # explicitly what architecture to check for. Fix this up for yours.. SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ -ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null "$(QUIET_STDERR)" && echo y"), y) +ifeq ($(shell sh -c "echo 'int foo(void) {char X[2]; return 3;}' | $(CC) -x c -c -Werror -fstack-protector-all - -o /dev/null >/dev/null 2>&1 && echo y"), y) CFLAGS := $(CFLAGS) -fstack-protector-all endif @@ -419,6 +432,7 @@ BUILTIN_OBJS += builtin-bench.o # Benchmark modules BUILTIN_OBJS += bench/sched-messaging.o BUILTIN_OBJS += bench/sched-pipe.o +BUILTIN_OBJS += bench/mem-memcpy.o BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-sched.o @@ -431,15 +445,9 @@ BUILTIN_OBJS += builtin-timechart.o BUILTIN_OBJS += builtin-top.o BUILTIN_OBJS += builtin-trace.o BUILTIN_OBJS += builtin-probe.o -BUILTIN_OBJS += builtin-kmem.o PERFLIBS = $(LIB_FILE) -ifeq ($(V), 2) - QUIET_STDERR = ">/dev/null" -else - QUIET_STDERR = ">/dev/null 2>&1" -endif # # Platform specific tweaks # @@ -467,19 +475,19 @@ ifeq ($(uname_S),Darwin) PTHREAD_LIBS = endif -ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) -ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) - msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static); +ifeq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { const char * version = gnu_get_libc_version(); return (long)version; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) + msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]); endif - ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) + ifneq ($(shell sh -c "(echo '\#include '; echo 'int main(void) { Elf * elf = elf_begin(0, ELF_C_READ_MMAP, 0); return (long)elf; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) BASIC_CFLAGS += -DLIBELF_NO_MMAP endif else msg := $(error No libelf.h/libelf found, please install libelf-dev/elfutils-libelf-devel and glibc-dev[el]); endif -ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) "$(QUIET_STDERR)" && echo y"), y) +ifneq ($(shell sh -c "(echo '\#include '; echo '\#include '; echo 'int main(void) { Dwarf_Debug dbg; Dwarf_Error err; Dwarf_Ranges *rng; dwarf_init(0, DW_DLC_READ, 0, 0, &dbg, &err); dwarf_get_ranges(dbg, 0, &rng, 0, 0, &err); return (long)dbg; }') | $(CC) -x c - $(ALL_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ldwarf -lelf -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) > /dev/null 2>&1 && echo y"), y) msg := $(warning No libdwarf.h found or old libdwarf.h found, disables dwarf support. Please install libdwarf-dev/libdwarf-devel >= 20081231); BASIC_CFLAGS += -DNO_LIBDWARF else @@ -491,25 +499,25 @@ endif ifdef NO_DEMANGLE BASIC_CFLAGS += -DNO_DEMANGLE else - has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd "$(QUIET_STDERR)" && echo y") + has_bfd := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd > /dev/null 2>&1 && echo y") ifeq ($(has_bfd),y) EXTLIBS += -lbfd else - has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty "$(QUIET_STDERR)" && echo y") + has_bfd_iberty := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty > /dev/null 2>&1 && echo y") ifeq ($(has_bfd_iberty),y) EXTLIBS += -lbfd -liberty else - has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz "$(QUIET_STDERR)" && echo y") + has_bfd_iberty_z := $(shell sh -c "(echo '\#include '; echo 'int main(void) { bfd_demangle(0, 0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -lbfd -liberty -lz > /dev/null 2>&1 && echo y") ifeq ($(has_bfd_iberty_z),y) EXTLIBS += -lbfd -liberty -lz else - has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty "$(QUIET_STDERR)" && echo y") + has_cplus_demangle := $(shell sh -c "(echo 'extern char *cplus_demangle(const char *, int);'; echo 'int main(void) { cplus_demangle(0, 0); return 0; }') | $(CC) -x c - $(ALL_CFLAGS) -o /dev/null $(ALL_LDFLAGS) $(EXTLIBS) -liberty > /dev/null 2>&1 && echo y") ifeq ($(has_cplus_demangle),y) EXTLIBS += -liberty BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) + msg := $(warning No bfd.h/libbfd found, install binutils-dev[el] to gain symbol demangling) BASIC_CFLAGS += -DNO_DEMANGLE endif endif diff --git a/trunk/tools/perf/bench/bench.h b/trunk/tools/perf/bench/bench.h index 9fbd8d745fa1..f7781c6267c0 100644 --- a/trunk/tools/perf/bench/bench.h +++ b/trunk/tools/perf/bench/bench.h @@ -3,6 +3,7 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); +extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/trunk/tools/perf/bench/mem-memcpy.c b/trunk/tools/perf/bench/mem-memcpy.c new file mode 100644 index 000000000000..d4f4f9806ae4 --- /dev/null +++ b/trunk/tools/perf/bench/mem-memcpy.c @@ -0,0 +1,186 @@ +/* + * mem-memcpy.c + * + * memcpy: Simple memory copy in various ways + * + * Written by Hitoshi Mitake + */ +#include + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/string.h" +#include "../util/header.h" +#include "bench.h" + +#include +#include +#include +#include +#include + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int use_clock = 0; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_END() +}; + +struct routine { + const char *name; + const char *desc; + void * (*fn)(void *dst, const void *src, size_t len); +}; + +struct routine routines[] = { + { "default", + "Default memcpy() provided by glibc", + memcpy }, + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy ", + NULL +}; + +static int clock_fd; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __used) +{ + int i; + void *dst, *src; + size_t length; + double bps = 0.0; + struct timeval tv_start, tv_end, tv_diff; + u64 clock_start, clock_end, clock_diff; + + clock_start = clock_end = clock_diff = 0ULL; + argc = parse_options(argc, argv, options, + bench_mem_memcpy_usage, 0); + + tv_diff.tv_sec = 0; + tv_diff.tv_usec = 0; + length = (size_t)perf_atoll((char *)length_str); + if ((long long int)length <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + dst = calloc(length, sizeof(char)); + assert(dst); + src = calloc(length, sizeof(char)); + assert(src); + + if (bench_format == BENCH_FORMAT_DEFAULT) { + printf("# Copying %s Bytes from %p to %p ...\n\n", + length_str, src, dst); + } + + if (use_clock) { + init_clock(); + clock_start = get_clock(); + } else + BUG_ON(gettimeofday(&tv_start, NULL)); + + routines[i].fn(dst, src, length); + + if (use_clock) { + clock_end = get_clock(); + clock_diff = clock_end - clock_start; + } else { + BUG_ON(gettimeofday(&tv_end, NULL)); + timersub(&tv_end, &tv_start, &tv_diff); + bps = (double)((double)length / timeval2double(&tv_diff)); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)clock_diff / (double)length); + } else { + if (bps < K) + printf(" %14lf B/Sec\n", bps); + else if (bps < K * K) + printf(" %14lfd KB/Sec\n", bps / 1024); + else if (bps < K * K * K) + printf(" %14lf MB/Sec\n", bps / 1024 / 1024); + else { + printf(" %14lf GB/Sec\n", + bps / 1024 / 1024 / 1024); + } + } + break; + case BENCH_FORMAT_SIMPLE: + if (use_clock) { + printf("%14lf\n", + (double)clock_diff / (double)length); + } else + printf("%lf\n", bps); + break; + default: + /* reaching here is something disaster */ + fprintf(stderr, "Unknown format:%d\n", bench_format); + exit(1); + break; + } + + return 0; +} diff --git a/trunk/tools/perf/builtin-annotate.c b/trunk/tools/perf/builtin-annotate.c index 6b13a1ecf1e7..77d50a6d6802 100644 --- a/trunk/tools/perf/builtin-annotate.c +++ b/trunk/tools/perf/builtin-annotate.c @@ -33,11 +33,9 @@ static int input; static int full_paths; static int print_line; -static bool use_modules; static unsigned long page_size; static unsigned long mmap_window = 32; -const char *vmlinux_name; struct sym_hist { u64 sum; @@ -158,7 +156,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (event->header.misc & PERF_RECORD_MISC_KERNEL) { level = 'k'; - sym = kernel_maps__find_symbol(ip, &map, symbol_filter); + sym = kernel_maps__find_symbol(ip, &map); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (event->header.misc & PERF_RECORD_MISC_USER) { @@ -638,9 +636,9 @@ static int __cmd_annotate(void) exit(0); } - if (kernel_maps__init(vmlinux_name, true, use_modules) < 0) { - pr_err("failed to create kernel maps for symbol resolution\b"); - return -1; + if (load_kernel(symbol_filter) < 0) { + perror("failed to load kernel symbols"); + return EXIT_FAILURE; } remap: @@ -744,7 +742,7 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), - OPT_BOOLEAN('m', "modules", &use_modules, + OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, "print matching source lines (may be slow)"), diff --git a/trunk/tools/perf/builtin-bench.c b/trunk/tools/perf/builtin-bench.c index 90c39baae0de..e043eb83092a 100644 --- a/trunk/tools/perf/builtin-bench.c +++ b/trunk/tools/perf/builtin-bench.c @@ -12,6 +12,7 @@ * * Available subsystem list: * sched ... scheduler and IPC mechanism + * mem ... memory access performance * */ @@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = { NULL } }; +static struct bench_suite mem_suites[] = { + { "memcpy", + "Simple memory copy in various ways", + bench_mem_memcpy }, + { NULL, + NULL, + NULL } +}; + struct bench_subsys { const char *name; const char *summary; @@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = { { "sched", "scheduler and IPC mechanism", sched_suites }, + { "mem", + "memory access performance", + mem_suites }, { NULL, NULL, - NULL } + NULL } }; static void dump_suites(int subsys_index) diff --git a/trunk/tools/perf/builtin-kmem.c b/trunk/tools/perf/builtin-kmem.c deleted file mode 100644 index 173d6db42ecb..000000000000 --- a/trunk/tools/perf/builtin-kmem.c +++ /dev/null @@ -1,833 +0,0 @@ -#include "builtin.h" -#include "perf.h" - -#include "util/util.h" -#include "util/cache.h" -#include "util/symbol.h" -#include "util/thread.h" -#include "util/header.h" - -#include "util/parse-options.h" -#include "util/trace-event.h" - -#include "util/debug.h" -#include "util/data_map.h" - -#include - -struct alloc_stat; -typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); - -static char const *input_name = "perf.data"; - -static struct perf_header *header; -static u64 sample_type; - -static int alloc_flag; -static int caller_flag; - -static int alloc_lines = -1; -static int caller_lines = -1; - -static bool raw_ip; - -static char default_sort_order[] = "frag,hit,bytes"; - -static char *cwd; -static int cwdlen; - -static int *cpunode_map; -static int max_cpu_num; - -struct alloc_stat { - u64 call_site; - u64 ptr; - u64 bytes_req; - u64 bytes_alloc; - u32 hit; - u32 pingpong; - - short alloc_cpu; - - struct rb_node node; -}; - -static struct rb_root root_alloc_stat; -static struct rb_root root_alloc_sorted; -static struct rb_root root_caller_stat; -static struct rb_root root_caller_sorted; - -static unsigned long total_requested, total_allocated; -static unsigned long nr_allocs, nr_cross_allocs; - -struct raw_event_sample { - u32 size; - char data[0]; -}; - -#define PATH_SYS_NODE "/sys/devices/system/node" - -static void init_cpunode_map(void) -{ - FILE *fp; - int i; - - fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); - if (!fp) { - max_cpu_num = 4096; - return; - } - - if (fscanf(fp, "%d", &max_cpu_num) < 1) - die("Failed to read 'kernel_max' from sysfs"); - max_cpu_num++; - - cpunode_map = calloc(max_cpu_num, sizeof(int)); - if (!cpunode_map) - die("calloc"); - for (i = 0; i < max_cpu_num; i++) - cpunode_map[i] = -1; - fclose(fp); -} - -static void setup_cpunode_map(void) -{ - struct dirent *dent1, *dent2; - DIR *dir1, *dir2; - unsigned int cpu, mem; - char buf[PATH_MAX]; - - init_cpunode_map(); - - dir1 = opendir(PATH_SYS_NODE); - if (!dir1) - return; - - while (true) { - dent1 = readdir(dir1); - if (!dent1) - break; - - if (sscanf(dent1->d_name, "node%u", &mem) < 1) - continue; - - snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); - dir2 = opendir(buf); - if (!dir2) - continue; - while (true) { - dent2 = readdir(dir2); - if (!dent2) - break; - if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) - continue; - cpunode_map[cpu] = mem; - } - } -} - -static int -process_comm_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->comm.pid); - - dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->comm.comm, event->comm.pid); - - if (thread == NULL || - thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; - } - - return 0; -} - -static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, - int bytes_req, int bytes_alloc, int cpu) -{ - struct rb_node **node = &root_alloc_stat.rb_node; - struct rb_node *parent = NULL; - struct alloc_stat *data = NULL; - - while (*node) { - parent = *node; - data = rb_entry(*node, struct alloc_stat, node); - - if (ptr > data->ptr) - node = &(*node)->rb_right; - else if (ptr < data->ptr) - node = &(*node)->rb_left; - else - break; - } - - if (data && data->ptr == ptr) { - data->hit++; - data->bytes_req += bytes_req; - data->bytes_alloc += bytes_req; - } else { - data = malloc(sizeof(*data)); - if (!data) - die("malloc"); - data->ptr = ptr; - data->pingpong = 0; - data->hit = 1; - data->bytes_req = bytes_req; - data->bytes_alloc = bytes_alloc; - - rb_link_node(&data->node, parent, node); - rb_insert_color(&data->node, &root_alloc_stat); - } - data->call_site = call_site; - data->alloc_cpu = cpu; -} - -static void insert_caller_stat(unsigned long call_site, - int bytes_req, int bytes_alloc) -{ - struct rb_node **node = &root_caller_stat.rb_node; - struct rb_node *parent = NULL; - struct alloc_stat *data = NULL; - - while (*node) { - parent = *node; - data = rb_entry(*node, struct alloc_stat, node); - - if (call_site > data->call_site) - node = &(*node)->rb_right; - else if (call_site < data->call_site) - node = &(*node)->rb_left; - else - break; - } - - if (data && data->call_site == call_site) { - data->hit++; - data->bytes_req += bytes_req; - data->bytes_alloc += bytes_req; - } else { - data = malloc(sizeof(*data)); - if (!data) - die("malloc"); - data->call_site = call_site; - data->pingpong = 0; - data->hit = 1; - data->bytes_req = bytes_req; - data->bytes_alloc = bytes_alloc; - - rb_link_node(&data->node, parent, node); - rb_insert_color(&data->node, &root_caller_stat); - } -} - -static void process_alloc_event(struct raw_event_sample *raw, - struct event *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used, - int node) -{ - unsigned long call_site; - unsigned long ptr; - int bytes_req; - int bytes_alloc; - int node1, node2; - - ptr = raw_field_value(event, "ptr", raw->data); - call_site = raw_field_value(event, "call_site", raw->data); - bytes_req = raw_field_value(event, "bytes_req", raw->data); - bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); - - insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); - insert_caller_stat(call_site, bytes_req, bytes_alloc); - - total_requested += bytes_req; - total_allocated += bytes_alloc; - - if (node) { - node1 = cpunode_map[cpu]; - node2 = raw_field_value(event, "node", raw->data); - if (node1 != node2) - nr_cross_allocs++; - } - nr_allocs++; -} - -static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); -static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); - -static struct alloc_stat *search_alloc_stat(unsigned long ptr, - unsigned long call_site, - struct rb_root *root, - sort_fn_t sort_fn) -{ - struct rb_node *node = root->rb_node; - struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; - - while (node) { - struct alloc_stat *data; - int cmp; - - data = rb_entry(node, struct alloc_stat, node); - - cmp = sort_fn(&key, data); - if (cmp < 0) - node = node->rb_left; - else if (cmp > 0) - node = node->rb_right; - else - return data; - } - return NULL; -} - -static void process_free_event(struct raw_event_sample *raw, - struct event *event, - int cpu, - u64 timestamp __used, - struct thread *thread __used) -{ - unsigned long ptr; - struct alloc_stat *s_alloc, *s_caller; - - ptr = raw_field_value(event, "ptr", raw->data); - - s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); - if (!s_alloc) - return; - - if (cpu != s_alloc->alloc_cpu) { - s_alloc->pingpong++; - - s_caller = search_alloc_stat(0, s_alloc->call_site, - &root_caller_stat, callsite_cmp); - assert(s_caller); - s_caller->pingpong++; - } - s_alloc->alloc_cpu = -1; -} - -static void -process_raw_event(event_t *raw_event __used, void *more_data, - int cpu, u64 timestamp, struct thread *thread) -{ - struct raw_event_sample *raw = more_data; - struct event *event; - int type; - - type = trace_parse_common_type(raw->data); - event = trace_find_event(type); - - if (!strcmp(event->name, "kmalloc") || - !strcmp(event->name, "kmem_cache_alloc")) { - process_alloc_event(raw, event, cpu, timestamp, thread, 0); - return; - } - - if (!strcmp(event->name, "kmalloc_node") || - !strcmp(event->name, "kmem_cache_alloc_node")) { - process_alloc_event(raw, event, cpu, timestamp, thread, 1); - return; - } - - if (!strcmp(event->name, "kfree") || - !strcmp(event->name, "kmem_cache_free")) { - process_free_event(raw, event, cpu, timestamp, thread); - return; - } -} - -static int -process_sample_event(event_t *event, unsigned long offset, unsigned long head) -{ - u64 ip = event->ip.ip; - u64 timestamp = -1; - u32 cpu = -1; - u64 period = 1; - void *more_data = event->ip.__more_data; - struct thread *thread = threads__findnew(event->ip.pid); - - if (sample_type & PERF_SAMPLE_TIME) { - timestamp = *(u64 *)more_data; - more_data += sizeof(u64); - } - - if (sample_type & PERF_SAMPLE_CPU) { - cpu = *(u32 *)more_data; - more_data += sizeof(u32); - more_data += sizeof(u32); /* reserved */ - } - - if (sample_type & PERF_SAMPLE_PERIOD) { - period = *(u64 *)more_data; - more_data += sizeof(u64); - } - - dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->header.misc, - event->ip.pid, event->ip.tid, - (void *)(long)ip, - (long long)period); - - if (thread == NULL) { - pr_debug("problem processing %d event, skipping it.\n", - event->header.type); - return -1; - } - - dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid); - - process_raw_event(event, more_data, cpu, timestamp, thread); - - return 0; -} - -static int sample_type_check(u64 type) -{ - sample_type = type; - - if (!(sample_type & PERF_SAMPLE_RAW)) { - fprintf(stderr, - "No trace sample to read. Did you call perf record " - "without -R?"); - return -1; - } - - return 0; -} - -static struct perf_file_handler file_handler = { - .process_sample_event = process_sample_event, - .process_comm_event = process_comm_event, - .sample_type_check = sample_type_check, -}; - -static int read_events(void) -{ - register_idle_thread(); - register_perf_file_handler(&file_handler); - - return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, - &cwdlen, &cwd); -} - -static double fragmentation(unsigned long n_req, unsigned long n_alloc) -{ - if (n_alloc == 0) - return 0.0; - else - return 100.0 - (100.0 * n_req / n_alloc); -} - -static void __print_result(struct rb_root *root, int n_lines, int is_caller) -{ - struct rb_node *next; - - printf("%.102s\n", graph_dotted_line); - printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); - printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); - printf("%.102s\n", graph_dotted_line); - - next = rb_first(root); - - while (next && n_lines--) { - struct alloc_stat *data = rb_entry(next, struct alloc_stat, - node); - struct symbol *sym = NULL; - char buf[BUFSIZ]; - u64 addr; - - if (is_caller) { - addr = data->call_site; - if (!raw_ip) - sym = kernel_maps__find_symbol(addr, - NULL, NULL); - } else - addr = data->ptr; - - if (sym != NULL) - snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, - addr - sym->start); - else - snprintf(buf, sizeof(buf), "%#Lx", addr); - printf(" %-34s |", buf); - - printf(" %9llu/%-5lu | %9llu/%-5lu | %6lu | %8lu | %6.3f%%\n", - (unsigned long long)data->bytes_alloc, - (unsigned long)data->bytes_alloc / data->hit, - (unsigned long long)data->bytes_req, - (unsigned long)data->bytes_req / data->hit, - (unsigned long)data->hit, - (unsigned long)data->pingpong, - fragmentation(data->bytes_req, data->bytes_alloc)); - - next = rb_next(next); - } - - if (n_lines == -1) - printf(" ... | ... | ... | ... | ... | ... \n"); - - printf("%.102s\n", graph_dotted_line); -} - -static void print_summary(void) -{ - printf("\nSUMMARY\n=======\n"); - printf("Total bytes requested: %lu\n", total_requested); - printf("Total bytes allocated: %lu\n", total_allocated); - printf("Total bytes wasted on internal fragmentation: %lu\n", - total_allocated - total_requested); - printf("Internal fragmentation: %f%%\n", - fragmentation(total_requested, total_allocated)); - printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); -} - -static void print_result(void) -{ - if (caller_flag) - __print_result(&root_caller_sorted, caller_lines, 1); - if (alloc_flag) - __print_result(&root_alloc_sorted, alloc_lines, 0); - print_summary(); -} - -struct sort_dimension { - const char name[20]; - sort_fn_t cmp; - struct list_head list; -}; - -static LIST_HEAD(caller_sort); -static LIST_HEAD(alloc_sort); - -static void sort_insert(struct rb_root *root, struct alloc_stat *data, - struct list_head *sort_list) -{ - struct rb_node **new = &(root->rb_node); - struct rb_node *parent = NULL; - struct sort_dimension *sort; - - while (*new) { - struct alloc_stat *this; - int cmp = 0; - - this = rb_entry(*new, struct alloc_stat, node); - parent = *new; - - list_for_each_entry(sort, sort_list, list) { - cmp = sort->cmp(data, this); - if (cmp) - break; - } - - if (cmp > 0) - new = &((*new)->rb_left); - else - new = &((*new)->rb_right); - } - - rb_link_node(&data->node, parent, new); - rb_insert_color(&data->node, root); -} - -static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, - struct list_head *sort_list) -{ - struct rb_node *node; - struct alloc_stat *data; - - for (;;) { - node = rb_first(root); - if (!node) - break; - - rb_erase(node, root); - data = rb_entry(node, struct alloc_stat, node); - sort_insert(root_sorted, data, sort_list); - } -} - -static void sort_result(void) -{ - __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); - __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); -} - -static int __cmd_kmem(void) -{ - setup_pager(); - read_events(); - sort_result(); - print_result(); - - return 0; -} - -static const char * const kmem_usage[] = { - "perf kmem [] {record}", - NULL -}; - -static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) -{ - if (l->ptr < r->ptr) - return -1; - else if (l->ptr > r->ptr) - return 1; - return 0; -} - -static struct sort_dimension ptr_sort_dimension = { - .name = "ptr", - .cmp = ptr_cmp, -}; - -static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) -{ - if (l->call_site < r->call_site) - return -1; - else if (l->call_site > r->call_site) - return 1; - return 0; -} - -static struct sort_dimension callsite_sort_dimension = { - .name = "callsite", - .cmp = callsite_cmp, -}; - -static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) -{ - if (l->hit < r->hit) - return -1; - else if (l->hit > r->hit) - return 1; - return 0; -} - -static struct sort_dimension hit_sort_dimension = { - .name = "hit", - .cmp = hit_cmp, -}; - -static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) -{ - if (l->bytes_alloc < r->bytes_alloc) - return -1; - else if (l->bytes_alloc > r->bytes_alloc) - return 1; - return 0; -} - -static struct sort_dimension bytes_sort_dimension = { - .name = "bytes", - .cmp = bytes_cmp, -}; - -static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) -{ - double x, y; - - x = fragmentation(l->bytes_req, l->bytes_alloc); - y = fragmentation(r->bytes_req, r->bytes_alloc); - - if (x < y) - return -1; - else if (x > y) - return 1; - return 0; -} - -static struct sort_dimension frag_sort_dimension = { - .name = "frag", - .cmp = frag_cmp, -}; - -static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) -{ - if (l->pingpong < r->pingpong) - return -1; - else if (l->pingpong > r->pingpong) - return 1; - return 0; -} - -static struct sort_dimension pingpong_sort_dimension = { - .name = "pingpong", - .cmp = pingpong_cmp, -}; - -static struct sort_dimension *avail_sorts[] = { - &ptr_sort_dimension, - &callsite_sort_dimension, - &hit_sort_dimension, - &bytes_sort_dimension, - &frag_sort_dimension, - &pingpong_sort_dimension, -}; - -#define NUM_AVAIL_SORTS \ - (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *)) - -static int sort_dimension__add(const char *tok, struct list_head *list) -{ - struct sort_dimension *sort; - int i; - - for (i = 0; i < NUM_AVAIL_SORTS; i++) { - if (!strcmp(avail_sorts[i]->name, tok)) { - sort = malloc(sizeof(*sort)); - if (!sort) - die("malloc"); - memcpy(sort, avail_sorts[i], sizeof(*sort)); - list_add_tail(&sort->list, list); - return 0; - } - } - - return -1; -} - -static int setup_sorting(struct list_head *sort_list, const char *arg) -{ - char *tok; - char *str = strdup(arg); - - if (!str) - die("strdup"); - - while (true) { - tok = strsep(&str, ","); - if (!tok) - break; - if (sort_dimension__add(tok, sort_list) < 0) { - error("Unknown --sort key: '%s'", tok); - return -1; - } - } - - free(str); - return 0; -} - -static int parse_sort_opt(const struct option *opt __used, - const char *arg, int unset __used) -{ - if (!arg) - return -1; - - if (caller_flag > alloc_flag) - return setup_sorting(&caller_sort, arg); - else - return setup_sorting(&alloc_sort, arg); - - return 0; -} - -static int parse_stat_opt(const struct option *opt __used, - const char *arg, int unset __used) -{ - if (!arg) - return -1; - - if (strcmp(arg, "alloc") == 0) - alloc_flag = (caller_flag + 1); - else if (strcmp(arg, "caller") == 0) - caller_flag = (alloc_flag + 1); - else - return -1; - return 0; -} - -static int parse_line_opt(const struct option *opt __used, - const char *arg, int unset __used) -{ - int lines; - - if (!arg) - return -1; - - lines = strtoul(arg, NULL, 10); - - if (caller_flag > alloc_flag) - caller_lines = lines; - else - alloc_lines = lines; - - return 0; -} - -static const struct option kmem_options[] = { - OPT_STRING('i', "input", &input_name, "file", - "input file name"), - OPT_CALLBACK(0, "stat", NULL, "|", - "stat selector, Pass 'alloc' or 'caller'.", - parse_stat_opt), - OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", - "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", - parse_sort_opt), - OPT_CALLBACK('l', "line", NULL, "num", - "show n lins", - parse_line_opt), - OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), - OPT_END() -}; - -static const char *record_args[] = { - "record", - "-a", - "-R", - "-M", - "-f", - "-c", "1", - "-e", "kmem:kmalloc", - "-e", "kmem:kmalloc_node", - "-e", "kmem:kfree", - "-e", "kmem:kmem_cache_alloc", - "-e", "kmem:kmem_cache_alloc_node", - "-e", "kmem:kmem_cache_free", -}; - -static int __cmd_record(int argc, const char **argv) -{ - unsigned int rec_argc, i, j; - const char **rec_argv; - - rec_argc = ARRAY_SIZE(record_args) + argc - 1; - rec_argv = calloc(rec_argc + 1, sizeof(char *)); - - for (i = 0; i < ARRAY_SIZE(record_args); i++) - rec_argv[i] = strdup(record_args[i]); - - for (j = 1; j < (unsigned int)argc; j++, i++) - rec_argv[i] = argv[j]; - - return cmd_record(i, rec_argv, NULL); -} - -int cmd_kmem(int argc, const char **argv, const char *prefix __used) -{ - symbol__init(0); - - argc = parse_options(argc, argv, kmem_options, kmem_usage, 0); - - if (argc && !strncmp(argv[0], "rec", 3)) - return __cmd_record(argc, argv); - else if (argc) - usage_with_options(kmem_usage, kmem_options); - - if (list_empty(&caller_sort)) - setup_sorting(&caller_sort, default_sort_order); - if (list_empty(&alloc_sort)) - setup_sorting(&alloc_sort, default_sort_order); - - setup_cpunode_map(); - - return __cmd_kmem(); -} - diff --git a/trunk/tools/perf/builtin-record.c b/trunk/tools/perf/builtin-record.c index 0e519c667e3a..82260c56db3d 100644 --- a/trunk/tools/perf/builtin-record.c +++ b/trunk/tools/perf/builtin-record.c @@ -307,12 +307,6 @@ static void create_counter(int counter, int cpu, pid_t pid) printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[nr_cpu][counter], strerror(err)); - -#if defined(__i386__) || defined(__x86_64__) - if (attr->type == PERF_TYPE_HARDWARE && err == EOPNOTSUPP) - die("No hardware sampling interrupt available. No APIC? If so then you can boot the kernel with the \"lapic\" boot parameter to force-enable it.\n"); -#endif - die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } @@ -406,7 +400,7 @@ static int __cmd_record(int argc, const char **argv) struct stat st; pid_t pid = 0; int flags; - int err; + int ret; unsigned long waking = 0; page_size = sysconf(_SC_PAGE_SIZE); @@ -440,18 +434,16 @@ static int __cmd_record(int argc, const char **argv) exit(-1); } - header = perf_header__new(); + if (!file_new) + header = perf_header__read(output); + else + header = perf_header__new(); + if (header == NULL) { pr_err("Not enough memory for reading perf file header\n"); return -1; } - if (!file_new) { - err = perf_header__read(header, output); - if (err < 0) - return err; - } - if (raw_samples) { perf_header__set_feat(header, HEADER_TRACE_INFO); } else { @@ -480,11 +472,8 @@ static int __cmd_record(int argc, const char **argv) } } - if (file_new) { - err = perf_header__write(header, output, false); - if (err < 0) - return err; - } + if (file_new) + perf_header__write(header, output, false); if (!system_wide) event__synthesize_thread(pid, process_synthesized_event); @@ -538,7 +527,7 @@ static int __cmd_record(int argc, const char **argv) if (hits == samples) { if (done) break; - err = poll(event_array, nr_poll, -1); + ret = poll(event_array, nr_poll, -1); waking++; } diff --git a/trunk/tools/perf/builtin-report.c b/trunk/tools/perf/builtin-report.c index fe474b7f8ad0..1a806d5f05cf 100644 --- a/trunk/tools/perf/builtin-report.c +++ b/trunk/tools/perf/builtin-report.c @@ -38,7 +38,6 @@ static char *dso_list_str, *comm_list_str, *sym_list_str, static struct strlist *dso_list, *comm_list, *sym_list; static int force; -static bool use_modules; static int full_paths; static int show_nr_samples; @@ -52,7 +51,6 @@ static char *pretty_printing_style = default_pretty_printing_style; static int exclude_other = 1; static char callchain_default_opt[] = "fractal,0.5"; -const char *vmlinux_name; static char *cwd; static int cwdlen; @@ -450,7 +448,7 @@ resolve_symbol(struct thread *thread, struct map **mapp, u64 *ipp) * trick of looking in the whole kernel symbol list. */ if ((long long)ip < 0) - return kernel_maps__find_symbol(ip, mapp, NULL); + return kernel_maps__find_symbol(ip, mapp); } dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); @@ -468,7 +466,7 @@ static int call__match(struct symbol *sym) return 0; } -static struct symbol **resolve_callchain(struct thread *thread, +static struct symbol **resolve_callchain(struct thread *thread, struct map *map, struct ip_callchain *chain, struct symbol **parent) { @@ -497,10 +495,10 @@ static struct symbol **resolve_callchain(struct thread *thread, case PERF_CONTEXT_HV: break; case PERF_CONTEXT_KERNEL: - sym = kernel_maps__find_symbol(ip, NULL, NULL); + sym = kernel_maps__find_symbol(ip, &map); break; default: - sym = resolve_symbol(thread, NULL, &ip); + sym = resolve_symbol(thread, &map, &ip); break; } @@ -530,7 +528,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct hist_entry *he; if ((sort__has_parent || callchain) && chain) - syms = resolve_callchain(thread, chain, &parent); + syms = resolve_callchain(thread, map, chain, &parent); he = __hist_entry__add(thread, map, sym, parent, ip, count, level, &hit); @@ -717,7 +715,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (cpumode == PERF_RECORD_MISC_KERNEL) { level = 'k'; - sym = kernel_maps__find_symbol(ip, &map, NULL); + sym = kernel_maps__find_symbol(ip, &map); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : ""); } else if (cpumode == PERF_RECORD_MISC_USER) { @@ -926,9 +924,8 @@ static int __cmd_report(void) register_perf_file_handler(&file_handler); - ret = mmap_dispatch_perf_file(&header, input_name, vmlinux_name, - !vmlinux_name, force, - full_paths, &cwdlen, &cwd); + ret = mmap_dispatch_perf_file(&header, input_name, force, full_paths, + &cwdlen, &cwd); if (ret) return ret; @@ -1026,7 +1023,7 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), - OPT_BOOLEAN('m', "modules", &use_modules, + OPT_BOOLEAN('m', "modules", &modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('n', "show-nr-samples", &show_nr_samples, "Show a column with the number of samples"), diff --git a/trunk/tools/perf/builtin-sched.c b/trunk/tools/perf/builtin-sched.c index 260f57a72ee0..df44b756cecc 100644 --- a/trunk/tools/perf/builtin-sched.c +++ b/trunk/tools/perf/builtin-sched.c @@ -1718,8 +1718,7 @@ static int read_events(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, - &cwdlen, &cwd); + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } static void print_bad_events(void) diff --git a/trunk/tools/perf/builtin-timechart.c b/trunk/tools/perf/builtin-timechart.c index dd4d82ac7aa4..665877e4a944 100644 --- a/trunk/tools/perf/builtin-timechart.c +++ b/trunk/tools/perf/builtin-timechart.c @@ -1093,7 +1093,7 @@ static void process_samples(void) static int __cmd_timechart(void) { - int err, rc = EXIT_FAILURE; + int ret, rc = EXIT_FAILURE; unsigned long offset = 0; unsigned long head, shift; struct stat statbuf; @@ -1111,8 +1111,8 @@ static int __cmd_timechart(void) exit(-1); } - err = fstat(input, &statbuf); - if (err < 0) { + ret = fstat(input, &statbuf); + if (ret < 0) { perror("failed to stat file"); exit(-1); } @@ -1122,16 +1122,7 @@ static int __cmd_timechart(void) exit(0); } - header = perf_header__new(); - if (header == NULL) - return -ENOMEM; - - err = perf_header__read(header, input); - if (err < 0) { - perf_header__delete(header); - return err; - } - + header = perf_header__read(input); head = header->data_offset; sample_type = perf_header__sample_type(header); diff --git a/trunk/tools/perf/builtin-top.c b/trunk/tools/perf/builtin-top.c index 6a5de90e9b83..07b92c378ae2 100644 --- a/trunk/tools/perf/builtin-top.c +++ b/trunk/tools/perf/builtin-top.c @@ -79,7 +79,13 @@ static int dump_symtab = 0; static bool hide_kernel_symbols = false; static bool hide_user_symbols = false; static struct winsize winsize; -const char *vmlinux_name; +static const char *graph_line = + "_____________________________________________________________________" + "_____________________________________________________________________"; +static const char *graph_dotted_line = + "---------------------------------------------------------------------" + "---------------------------------------------------------------------" + "---------------------------------------------------------------------"; /* * Source @@ -824,8 +830,6 @@ static void handle_keypress(int c) case 'q': case 'Q': printf("exiting.\n"); - if (dump_symtab) - dsos__fprintf(stderr); exit(0); case 's': prompt_symbol(&sym_filter_entry, "Enter details symbol"); @@ -942,6 +946,17 @@ static int symbol_filter(struct map *map, struct symbol *sym) return 0; } +static int parse_symbols(void) +{ + if (dsos__load_kernel(vmlinux_name, symbol_filter, 1) <= 0) + return -1; + + if (dump_symtab) + dsos__fprintf(stderr); + + return 0; +} + static void event__process_sample(const event_t *self, int counter) { u64 ip = self->ip.ip; @@ -984,7 +999,7 @@ static void event__process_sample(const event_t *self, int counter) if (hide_kernel_symbols) return; - sym = kernel_maps__find_symbol(ip, &map, symbol_filter); + sym = kernel_maps__find_symbol(ip, &map); if (sym == NULL) return; break; @@ -1311,7 +1326,7 @@ static const struct option options[] = { int cmd_top(int argc, const char **argv, const char *prefix __used) { - int counter, err; + int counter; page_size = sysconf(_SC_PAGE_SIZE); @@ -1335,11 +1350,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __used) if (delay_secs < 1) delay_secs = 1; - err = kernel_maps__init(vmlinux_name, !vmlinux_name, true); - if (err < 0) - return err; + parse_symbols(); parse_source(sym_filter_entry); + /* * User specified count overrides default frequency. */ diff --git a/trunk/tools/perf/builtin-trace.c b/trunk/tools/perf/builtin-trace.c index b71198e5dc14..d042d656c561 100644 --- a/trunk/tools/perf/builtin-trace.c +++ b/trunk/tools/perf/builtin-trace.c @@ -131,8 +131,7 @@ static int __cmd_trace(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, NULL, false, - 0, 0, &cwdlen, &cwd); + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } static const char * const annotate_usage[] = { diff --git a/trunk/tools/perf/builtin.h b/trunk/tools/perf/builtin.h index a3d8bf65f26c..9b02d85091fe 100644 --- a/trunk/tools/perf/builtin.h +++ b/trunk/tools/perf/builtin.h @@ -28,6 +28,5 @@ extern int cmd_top(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_version(int argc, const char **argv, const char *prefix); extern int cmd_probe(int argc, const char **argv, const char *prefix); -extern int cmd_kmem(int argc, const char **argv, const char *prefix); #endif diff --git a/trunk/tools/perf/command-list.txt b/trunk/tools/perf/command-list.txt index 02b09ea17a3e..d3a6e18e4a5e 100644 --- a/trunk/tools/perf/command-list.txt +++ b/trunk/tools/perf/command-list.txt @@ -14,4 +14,3 @@ perf-timechart mainporcelain common perf-top mainporcelain common perf-trace mainporcelain common perf-probe mainporcelain common -perf-kmem mainporcelain common diff --git a/trunk/tools/perf/perf.c b/trunk/tools/perf/perf.c index cf64049bc9bd..89b82acac7d9 100644 --- a/trunk/tools/perf/perf.c +++ b/trunk/tools/perf/perf.c @@ -285,21 +285,20 @@ static void handle_internal_command(int argc, const char **argv) { const char *cmd = argv[0]; static struct cmd_struct commands[] = { + { "help", cmd_help, 0 }, + { "list", cmd_list, 0 }, { "buildid-list", cmd_buildid_list, 0 }, - { "help", cmd_help, 0 }, - { "list", cmd_list, 0 }, - { "record", cmd_record, 0 }, - { "report", cmd_report, 0 }, - { "bench", cmd_bench, 0 }, - { "stat", cmd_stat, 0 }, - { "timechart", cmd_timechart, 0 }, - { "top", cmd_top, 0 }, - { "annotate", cmd_annotate, 0 }, - { "version", cmd_version, 0 }, - { "trace", cmd_trace, 0 }, - { "sched", cmd_sched, 0 }, - { "probe", cmd_probe, 0 }, - { "kmem", cmd_kmem, 0 }, + { "record", cmd_record, 0 }, + { "report", cmd_report, 0 }, + { "bench", cmd_bench, 0 }, + { "stat", cmd_stat, 0 }, + { "timechart", cmd_timechart, 0 }, + { "top", cmd_top, 0 }, + { "annotate", cmd_annotate, 0 }, + { "version", cmd_version, 0 }, + { "trace", cmd_trace, 0 }, + { "sched", cmd_sched, 0 }, + { "probe", cmd_probe, 0 }, }; unsigned int i; static const char ext[] = STRIP_EXTENSION; diff --git a/trunk/tools/perf/util/ctype.c b/trunk/tools/perf/util/ctype.c index 35073621e5de..0b791bd346bc 100644 --- a/trunk/tools/perf/util/ctype.c +++ b/trunk/tools/perf/util/ctype.c @@ -29,11 +29,3 @@ unsigned char sane_ctype[256] = { A, A, A, A, A, A, A, A, A, A, A, R, R, P, P, 0, /* 112..127 */ /* Nothing in the 128.. range */ }; - -const char *graph_line = - "_____________________________________________________________________" - "_____________________________________________________________________"; -const char *graph_dotted_line = - "---------------------------------------------------------------------" - "---------------------------------------------------------------------" - "---------------------------------------------------------------------"; diff --git a/trunk/tools/perf/util/data_map.c b/trunk/tools/perf/util/data_map.c index f318d19b2562..14cb8465eb08 100644 --- a/trunk/tools/perf/util/data_map.c +++ b/trunk/tools/perf/util/data_map.c @@ -101,14 +101,12 @@ int perf_header__read_build_ids(int input, off_t offset, off_t size) int mmap_dispatch_perf_file(struct perf_header **pheader, const char *input_name, - const char *vmlinux_name, - bool try_vmlinux_path, int force, int full_paths, int *cwdlen, char **cwd) { - int err; + int ret, rc = EXIT_FAILURE; struct perf_header *header; unsigned long head, shift; unsigned long offset = 0; @@ -120,69 +118,56 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, int input; char *buf; - if (curr_handler == NULL) { - pr_debug("Forgot to register perf file handler\n"); - return -EINVAL; - } + if (!curr_handler) + die("Forgot to register perf file handler"); page_size = getpagesize(); input = open(input_name, O_RDONLY); if (input < 0) { - pr_err("Failed to open file: %s", input_name); + fprintf(stderr, " failed to open file: %s", input_name); if (!strcmp(input_name, "perf.data")) - pr_err(" (try 'perf record' first)"); - pr_err("\n"); - return -errno; + fprintf(stderr, " (try 'perf record' first)"); + fprintf(stderr, "\n"); + exit(-1); } - if (fstat(input, &input_stat) < 0) { - pr_err("failed to stat file"); - err = -errno; - goto out_close; + ret = fstat(input, &input_stat); + if (ret < 0) { + perror("failed to stat file"); + exit(-1); } - err = -EACCES; if (!force && input_stat.st_uid && (input_stat.st_uid != geteuid())) { - pr_err("file: %s not owned by current user or root\n", + fprintf(stderr, "file: %s not owned by current user or root\n", input_name); - goto out_close; + exit(-1); } - if (input_stat.st_size == 0) { - pr_info("zero-sized file, nothing to do!\n"); - goto done; + if (!input_stat.st_size) { + fprintf(stderr, "zero-sized file, nothing to do!\n"); + exit(0); } - err = -ENOMEM; - header = perf_header__new(); - if (header == NULL) - goto out_close; - - err = perf_header__read(header, input); - if (err < 0) - goto out_delete; - *pheader = header; + *pheader = perf_header__read(input); + header = *pheader; head = header->data_offset; sample_type = perf_header__sample_type(header); - err = -EINVAL; - if (curr_handler->sample_type_check && - curr_handler->sample_type_check(sample_type) < 0) - goto out_delete; + if (curr_handler->sample_type_check) + if (curr_handler->sample_type_check(sample_type) < 0) + exit(-1); - err = -ENOMEM; - if (kernel_maps__init(vmlinux_name, try_vmlinux_path, true) < 0) { - pr_err("failed to setup the kernel maps to resolve symbols\n"); - goto out_delete; + if (load_kernel(NULL) < 0) { + perror("failed to load kernel symbols"); + return EXIT_FAILURE; } if (!full_paths) { if (getcwd(__cwd, sizeof(__cwd)) == NULL) { - pr_err("failed to get the current directory\n"); - err = -errno; - goto out_delete; + perror("failed to get the current directory"); + return EXIT_FAILURE; } *cwd = __cwd; *cwdlen = strlen(*cwd); @@ -196,12 +181,11 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, head -= shift; remap: - buf = mmap(NULL, page_size * mmap_window, PROT_READ, - MAP_SHARED, input, offset); + buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, + MAP_SHARED, input, offset); if (buf == MAP_FAILED) { - pr_err("failed to mmap file\n"); - err = -errno; - goto out_delete; + perror("failed to mmap file"); + exit(-1); } more: @@ -258,12 +242,10 @@ int mmap_dispatch_perf_file(struct perf_header **pheader, goto more; done: - err = 0; -out_close: + rc = EXIT_SUCCESS; close(input); - return err; -out_delete: - perf_header__delete(header); - goto out_close; + return rc; } + + diff --git a/trunk/tools/perf/util/data_map.h b/trunk/tools/perf/util/data_map.h index 3f0d21b3819e..ae036ecd7625 100644 --- a/trunk/tools/perf/util/data_map.h +++ b/trunk/tools/perf/util/data_map.h @@ -23,8 +23,6 @@ struct perf_file_handler { void register_perf_file_handler(struct perf_file_handler *handler); int mmap_dispatch_perf_file(struct perf_header **pheader, const char *input_name, - const char *vmlinux_name, - bool try_vmlinux_path, int force, int full_paths, int *cwdlen, diff --git a/trunk/tools/perf/util/event.h b/trunk/tools/perf/util/event.h index f1e392612652..1f771ce3a957 100644 --- a/trunk/tools/perf/util/event.h +++ b/trunk/tools/perf/util/event.h @@ -69,6 +69,13 @@ struct build_id_event { char filename[]; }; +struct build_id_list { + struct build_id_event event; + struct list_head list; + const char *dso_name; + int len; +}; + typedef union event_union { struct perf_event_header header; struct ip_event ip; @@ -115,13 +122,10 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); void map__init(struct map *self, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen); -void map__delete(struct map *self); struct map *map__clone(struct map *self); int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *self, FILE *fp); struct symbol *map__find_symbol(struct map *self, u64 ip, symbol_filter_t filter); -void map__fixup_start(struct map *self); -void map__fixup_end(struct map *self); int event__synthesize_thread(pid_t pid, int (*process)(event_t *event)); void event__synthesize_threads(int (*process)(event_t *event)); diff --git a/trunk/tools/perf/util/header.c b/trunk/tools/perf/util/header.c index 1332f8ec04aa..b01a9537977f 100644 --- a/trunk/tools/perf/util/header.c +++ b/trunk/tools/perf/util/header.c @@ -78,24 +78,16 @@ struct perf_header *perf_header__new(void) return self; } -void perf_header__delete(struct perf_header *self) -{ - int i; - - for (i = 0; i < self->attrs; ++i) - perf_header_attr__delete(self->attr[i]); - - free(self->attr); - free(self); -} - int perf_header__add_attr(struct perf_header *self, struct perf_header_attr *attr) { + int pos = self->attrs; + if (self->frozen) return -1; - if (self->attrs == self->size) { + self->attrs++; + if (self->attrs > self->size) { int nsize = self->size * 2; struct perf_header_attr **nattr; @@ -106,8 +98,7 @@ int perf_header__add_attr(struct perf_header *self, self->size = nsize; self->attr = nattr; } - - self->attr[self->attrs++] = attr; + self->attr[pos] = attr; return 0; } @@ -176,7 +167,7 @@ static int do_write(int fd, const void *buf, size_t size) int ret = write(fd, buf, size); if (ret < 0) - return -errno; + return -1; size -= ret; buf += ret; @@ -185,51 +176,43 @@ static int do_write(int fd, const void *buf, size_t size) return 0; } -static int dsos__write_buildid_table(int fd) +static int write_buildid_table(int fd, struct list_head *id_head) { - struct dso *pos; - - list_for_each_entry(pos, &dsos, node) { - int err; - struct build_id_event b; - size_t len; - - if (!pos->has_build_id) - continue; - len = pos->long_name_len + 1; - len = ALIGN(len, 64); - memset(&b, 0, sizeof(b)); - memcpy(&b.build_id, pos->build_id, sizeof(pos->build_id)); - b.header.size = sizeof(b) + len; - err = do_write(fd, &b, sizeof(b)); - if (err < 0) - return err; - err = do_write(fd, pos->long_name, len); - if (err < 0) - return err; + struct build_id_list *iter, *next; + + list_for_each_entry_safe(iter, next, id_head, list) { + struct build_id_event *b = &iter->event; + + if (do_write(fd, b, sizeof(*b)) < 0 || + do_write(fd, iter->dso_name, iter->len) < 0) + return -1; + list_del(&iter->list); + free(iter); } return 0; } -static int perf_header__adds_write(struct perf_header *self, int fd) +static void +perf_header__adds_write(struct perf_header *self, int fd) { + LIST_HEAD(id_list); int nr_sections; struct perf_file_section *feat_sec; int sec_size; u64 sec_start; - int idx = 0, err; + int idx = 0; - if (dsos__read_build_ids()) + if (fetch_build_id_table(&id_list)) perf_header__set_feat(self, HEADER_BUILD_ID); nr_sections = bitmap_weight(self->adds_features, HEADER_FEAT_BITS); if (!nr_sections) - return 0; + return; feat_sec = calloc(sizeof(*feat_sec), nr_sections); - if (feat_sec == NULL) - return -ENOMEM; + if (!feat_sec) + die("No memory"); sec_size = sizeof(*feat_sec) * nr_sections; @@ -253,37 +236,25 @@ static int perf_header__adds_write(struct perf_header *self, int fd) buildid_sec = &feat_sec[idx++]; - /* - * Read the kernel buildid nad the list of loaded modules with - * its build_ids: - */ - kernel_maps__init(NULL, false, true); - /* Write build-ids */ buildid_sec->offset = lseek(fd, 0, SEEK_CUR); - err = dsos__write_buildid_table(fd); - if (err < 0) { - pr_debug("failed to write buildid table\n"); - goto out_free; - } + if (write_buildid_table(fd, &id_list) < 0) + die("failed to write buildid table"); buildid_sec->size = lseek(fd, 0, SEEK_CUR) - buildid_sec->offset; } lseek(fd, sec_start, SEEK_SET); - err = do_write(fd, feat_sec, sec_size); - if (err < 0) - pr_debug("failed to write feature section\n"); -out_free: + if (do_write(fd, feat_sec, sec_size) < 0) + die("failed to write feature section"); free(feat_sec); - return err; } -int perf_header__write(struct perf_header *self, int fd, bool at_exit) +void perf_header__write(struct perf_header *self, int fd, bool at_exit) { struct perf_file_header f_header; struct perf_file_attr f_attr; struct perf_header_attr *attr; - int i, err; + int i; lseek(fd, sizeof(f_header), SEEK_SET); @@ -292,11 +263,8 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit) attr = self->attr[i]; attr->id_offset = lseek(fd, 0, SEEK_CUR); - err = do_write(fd, attr->id, attr->ids * sizeof(u64)); - if (err < 0) { - pr_debug("failed to write perf header\n"); - return err; - } + if (do_write(fd, attr->id, attr->ids * sizeof(u64)) < 0) + die("failed to write perf header"); } @@ -312,30 +280,20 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit) .size = attr->ids * sizeof(u64), } }; - err = do_write(fd, &f_attr, sizeof(f_attr)); - if (err < 0) { - pr_debug("failed to write perf header attribute\n"); - return err; - } + if (do_write(fd, &f_attr, sizeof(f_attr)) < 0) + die("failed to write perf header attribute"); } self->event_offset = lseek(fd, 0, SEEK_CUR); self->event_size = event_count * sizeof(struct perf_trace_event_type); - if (events) { - err = do_write(fd, events, self->event_size); - if (err < 0) { - pr_debug("failed to write perf header events\n"); - return err; - } - } + if (events) + if (do_write(fd, events, self->event_size) < 0) + die("failed to write perf header events"); self->data_offset = lseek(fd, 0, SEEK_CUR); - if (at_exit) { - err = perf_header__adds_write(self, fd); - if (err < 0) - return err; - } + if (at_exit) + perf_header__adds_write(self, fd); f_header = (struct perf_file_header){ .magic = PERF_MAGIC, @@ -358,15 +316,11 @@ int perf_header__write(struct perf_header *self, int fd, bool at_exit) memcpy(&f_header.adds_features, &self->adds_features, sizeof(self->adds_features)); lseek(fd, 0, SEEK_SET); - err = do_write(fd, &f_header, sizeof(f_header)); - if (err < 0) { - pr_debug("failed to write perf header\n"); - return err; - } + if (do_write(fd, &f_header, sizeof(f_header)) < 0) + die("failed to write perf header"); lseek(fd, self->data_offset + self->data_size, SEEK_SET); self->frozen = 1; - return 0; } static void do_read(int fd, void *buf, size_t size) @@ -476,17 +430,19 @@ static int perf_file_section__process(struct perf_file_section *self, return 0; } -int perf_header__read(struct perf_header *self, int fd) +struct perf_header *perf_header__read(int fd) { + struct perf_header *self = perf_header__new(); struct perf_file_header f_header; struct perf_file_attr f_attr; u64 f_id; int nr_attrs, nr_ids, i, j; - if (perf_file_header__read(&f_header, self, fd) < 0) { - pr_debug("incompatible file format\n"); - return -EINVAL; - } + if (self == NULL) + die("nomem"); + + if (perf_file_header__read(&f_header, self, fd) < 0) + die("incompatible file format"); nr_attrs = f_header.attrs.size / sizeof(f_attr); lseek(fd, f_header.attrs.offset, SEEK_SET); @@ -500,7 +456,7 @@ int perf_header__read(struct perf_header *self, int fd) attr = perf_header_attr__new(&f_attr.attr); if (attr == NULL) - return -ENOMEM; + die("nomem"); nr_ids = f_attr.ids.size / sizeof(u64); lseek(fd, f_attr.ids.offset, SEEK_SET); @@ -508,15 +464,11 @@ int perf_header__read(struct perf_header *self, int fd) for (j = 0; j < nr_ids; j++) { do_read(fd, &f_id, sizeof(f_id)); - if (perf_header_attr__add_id(attr, f_id) < 0) { - perf_header_attr__delete(attr); - return -ENOMEM; - } - } - if (perf_header__add_attr(self, attr) < 0) { - perf_header_attr__delete(attr); - return -ENOMEM; + if (perf_header_attr__add_id(attr, f_id) < 0) + die("nomem"); } + if (perf_header__add_attr(self, attr) < 0) + die("nomem"); lseek(fd, tmp, SEEK_SET); } @@ -524,8 +476,8 @@ int perf_header__read(struct perf_header *self, int fd) if (f_header.event_types.size) { lseek(fd, f_header.event_types.offset, SEEK_SET); events = malloc(f_header.event_types.size); - if (events == NULL) - return -ENOMEM; + if (!events) + die("nomem"); do_read(fd, events, f_header.event_types.size); event_count = f_header.event_types.size / sizeof(struct perf_trace_event_type); } @@ -535,7 +487,8 @@ int perf_header__read(struct perf_header *self, int fd) lseek(fd, self->data_offset, SEEK_SET); self->frozen = 1; - return 0; + + return self; } u64 perf_header__sample_type(struct perf_header *header) diff --git a/trunk/tools/perf/util/header.h b/trunk/tools/perf/util/header.h index d1dbe2b79c42..f46a94e09eea 100644 --- a/trunk/tools/perf/util/header.h +++ b/trunk/tools/perf/util/header.h @@ -55,11 +55,8 @@ struct perf_header { DECLARE_BITMAP(adds_features, HEADER_FEAT_BITS); }; -struct perf_header *perf_header__new(void); -void perf_header__delete(struct perf_header *self); - -int perf_header__read(struct perf_header *self, int fd); -int perf_header__write(struct perf_header *self, int fd, bool at_exit); +struct perf_header *perf_header__read(int fd); +void perf_header__write(struct perf_header *self, int fd, bool at_exit); int perf_header__add_attr(struct perf_header *self, struct perf_header_attr *attr); @@ -78,6 +75,8 @@ perf_header__find_attr(u64 id, struct perf_header *header); void perf_header__set_feat(struct perf_header *self, int feat); bool perf_header__has_feat(const struct perf_header *self, int feat); +struct perf_header *perf_header__new(void); + int perf_header__process_sections(struct perf_header *self, int fd, int (*process)(struct perf_file_section *self, int feat, int fd)); diff --git a/trunk/tools/perf/util/include/linux/bitops.h b/trunk/tools/perf/util/include/linux/bitops.h index 8d63116e9435..ace57c36d1d0 100644 --- a/trunk/tools/perf/util/include/linux/bitops.h +++ b/trunk/tools/perf/util/include/linux/bitops.h @@ -7,8 +7,6 @@ #define CONFIG_GENERIC_FIND_FIRST_BIT #include "../../../../include/linux/bitops.h" -#undef __KERNEL__ - static inline void set_bit(int nr, unsigned long *addr) { addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG); diff --git a/trunk/tools/perf/util/map.c b/trunk/tools/perf/util/map.c index 09412321a80d..94ca95073c40 100644 --- a/trunk/tools/perf/util/map.c +++ b/trunk/tools/perf/util/map.c @@ -75,29 +75,6 @@ struct map *map__new(struct mmap_event *event, char *cwd, int cwdlen) return NULL; } -void map__delete(struct map *self) -{ - free(self); -} - -void map__fixup_start(struct map *self) -{ - struct rb_node *nd = rb_first(&self->dso->syms); - if (nd != NULL) { - struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - self->start = sym->start; - } -} - -void map__fixup_end(struct map *self) -{ - struct rb_node *nd = rb_last(&self->dso->syms); - if (nd != NULL) { - struct symbol *sym = rb_entry(nd, struct symbol, rb_node); - self->end = sym->end; - } -} - #define DSO__DELETED "(deleted)" struct symbol * diff --git a/trunk/tools/perf/util/parse-events.c b/trunk/tools/perf/util/parse-events.c index 070027469270..0faf4f2bb5ca 100644 --- a/trunk/tools/perf/util/parse-events.c +++ b/trunk/tools/perf/util/parse-events.c @@ -1,4 +1,4 @@ -#include "../../../include/linux/hw_breakpoint.h" + #include "util.h" #include "../perf.h" #include "parse-options.h" @@ -540,81 +540,6 @@ static enum event_result parse_tracepoint_event(const char **strp, attr, strp); } -static enum event_result -parse_breakpoint_type(const char *type, const char **strp, - struct perf_event_attr *attr) -{ - int i; - - for (i = 0; i < 3; i++) { - if (!type[i]) - break; - - switch (type[i]) { - case 'r': - attr->bp_type |= HW_BREAKPOINT_R; - break; - case 'w': - attr->bp_type |= HW_BREAKPOINT_W; - break; - case 'x': - attr->bp_type |= HW_BREAKPOINT_X; - break; - default: - return EVT_FAILED; - } - } - if (!attr->bp_type) /* Default */ - attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; - - *strp = type + i; - - return EVT_HANDLED; -} - -static enum event_result -parse_breakpoint_event(const char **strp, struct perf_event_attr *attr) -{ - const char *target; - const char *type; - char *endaddr; - u64 addr; - enum event_result err; - - target = strchr(*strp, ':'); - if (!target) - return EVT_FAILED; - - if (strncmp(*strp, "mem", target - *strp) != 0) - return EVT_FAILED; - - target++; - - addr = strtoull(target, &endaddr, 0); - if (target == endaddr) - return EVT_FAILED; - - attr->bp_addr = addr; - *strp = endaddr; - - type = strchr(target, ':'); - - /* If no type is defined, just rw as default */ - if (!type) { - attr->bp_type = HW_BREAKPOINT_R | HW_BREAKPOINT_W; - } else { - err = parse_breakpoint_type(++type, strp, attr); - if (err == EVT_FAILED) - return EVT_FAILED; - } - - /* We should find a nice way to override the access type */ - attr->bp_len = HW_BREAKPOINT_LEN_4; - attr->type = PERF_TYPE_BREAKPOINT; - - return EVT_HANDLED; -} - static int check_events(const char *str, unsigned int i) { int n; @@ -748,10 +673,6 @@ parse_event_symbols(const char **str, struct perf_event_attr *attr) if (ret != EVT_FAILED) goto modifier; - ret = parse_breakpoint_event(str, attr); - if (ret != EVT_FAILED) - goto modifier; - fprintf(stderr, "invalid or unsupported event: '%s'\n", *str); fprintf(stderr, "Run 'perf list' for a list of valid events\n"); return EVT_FAILED; @@ -938,9 +859,6 @@ void print_events(void) "rNNN"); printf("\n"); - printf(" %-42s [hardware breakpoint]\n", "mem:[:access]"); - printf("\n"); - print_tracepoint_events(); exit(129); diff --git a/trunk/tools/perf/util/symbol.c b/trunk/tools/perf/util/symbol.c index 44d81d5ae8cf..5cc96c86861b 100644 --- a/trunk/tools/perf/util/symbol.c +++ b/trunk/tools/perf/util/symbol.c @@ -9,13 +9,8 @@ #include #include #include -#include #include -#ifndef NT_GNU_BUILD_ID -#define NT_GNU_BUILD_ID 3 -#endif - enum dso_origin { DSO__ORIG_KERNEL = 0, DSO__ORIG_JAVA_JIT, @@ -31,11 +26,7 @@ static void dsos__add(struct dso *dso); static struct dso *dsos__find(const char *name); static struct map *map__new2(u64 start, struct dso *dso); static void kernel_maps__insert(struct map *map); -static int dso__load_kernel_sym(struct dso *self, struct map *map, - symbol_filter_t filter); unsigned int symbol__priv_size; -static int vmlinux_path__nr_entries; -static char **vmlinux_path; static struct rb_root kernel_maps; @@ -78,11 +69,11 @@ static void kernel_maps__fixup_end(void) prev->end = curr->start - 1; } - /* - * We still haven't the actual symbols, so guess the - * last map final address. - */ - curr->end = ~0UL; + nd = rb_last(&curr->dso->syms); + if (nd) { + struct symbol *sym = rb_entry(nd, struct symbol, rb_node); + curr->end = sym->end; + } } static struct symbol *symbol__new(u64 start, u64 len, const char *name) @@ -120,8 +111,6 @@ static size_t symbol__fprintf(struct symbol *self, FILE *fp) static void dso__set_long_name(struct dso *self, char *name) { - if (name == NULL) - return; self->long_name = name; self->long_name_len = strlen(name); } @@ -334,7 +323,7 @@ static int kernel_maps__load_all_kallsyms(void) * kernel range is broken in several maps, named [kernel].N, as we don't have * the original ELF section names vmlinux have. */ -static int kernel_maps__split_kallsyms(symbol_filter_t filter) +static int kernel_maps__split_kallsyms(symbol_filter_t filter, int use_modules) { struct map *map = kernel_map; struct symbol *pos; @@ -350,6 +339,9 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) module = strchr(pos->name, '\t'); if (module) { + if (!use_modules) + goto delete_symbol; + *module++ = '\0'; if (strcmp(map->dso->name, module)) { @@ -389,6 +381,7 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) } if (filter && filter(map, pos)) { +delete_symbol: rb_erase(&pos->rb_node, &kernel_map->dso->syms); symbol__delete(pos); } else { @@ -404,18 +397,17 @@ static int kernel_maps__split_kallsyms(symbol_filter_t filter) } -static int kernel_maps__load_kallsyms(symbol_filter_t filter) +static int kernel_maps__load_kallsyms(symbol_filter_t filter, int use_modules) { if (kernel_maps__load_all_kallsyms()) return -1; dso__fixup_sym_end(kernel_map->dso); - kernel_map->dso->origin = DSO__ORIG_KERNEL; - return kernel_maps__split_kallsyms(filter); + return kernel_maps__split_kallsyms(filter, use_modules); } -size_t kernel_maps__fprintf(FILE *fp) +static size_t kernel_maps__fprintf(FILE *fp) { size_t printed = fprintf(fp, "Kernel maps:\n"); struct rb_node *nd; @@ -891,40 +883,47 @@ static int dso__load_sym(struct dso *self, struct map *map, const char *name, return err; } -static bool dso__build_id_equal(const struct dso *self, u8 *build_id) -{ - return memcmp(self->build_id, build_id, sizeof(self->build_id)) == 0; -} - -bool dsos__read_build_ids(void) +bool fetch_build_id_table(struct list_head *head) { - bool have_build_id = false; + bool have_buildid = false; struct dso *pos; - list_for_each_entry(pos, &dsos, node) - if (filename__read_build_id(pos->long_name, pos->build_id, - sizeof(pos->build_id)) > 0) { - have_build_id = true; - pos->has_build_id = true; - } + list_for_each_entry(pos, &dsos, node) { + struct build_id_list *new; + struct build_id_event b; + size_t len; - return have_build_id; -} + if (filename__read_build_id(pos->long_name, + &b.build_id, + sizeof(b.build_id)) < 0) + continue; + have_buildid = true; + memset(&b.header, 0, sizeof(b.header)); + len = pos->long_name_len + 1; + len = ALIGN(len, 64); + b.header.size = sizeof(b) + len; -/* - * Align offset to 4 bytes as needed for note name and descriptor data. - */ -#define NOTE_ALIGN(n) (((n) + 3) & -4U) + new = malloc(sizeof(*new)); + if (!new) + die("No memory\n"); + + memcpy(&new->event, &b, sizeof(b)); + new->dso_name = pos->long_name; + new->len = len; + + list_add_tail(&new->list, head); + } + + return have_buildid; +} int filename__read_build_id(const char *filename, void *bf, size_t size) { int fd, err = -1; GElf_Ehdr ehdr; GElf_Shdr shdr; - Elf_Data *data; + Elf_Data *build_id_data; Elf_Scn *sec; - Elf_Kind ek; - void *ptr; Elf *elf; if (size < BUILD_ID_SIZE) @@ -940,10 +939,6 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) goto out_close; } - ek = elf_kind(elf); - if (ek != ELF_K_ELF) - goto out_elf_end; - if (gelf_getehdr(elf, &ehdr) == NULL) { pr_err("%s: cannot get elf header.\n", __func__); goto out_elf_end; @@ -951,37 +946,14 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) sec = elf_section_by_name(elf, &ehdr, &shdr, ".note.gnu.build-id", NULL); - if (sec == NULL) { - sec = elf_section_by_name(elf, &ehdr, &shdr, - ".notes", NULL); - if (sec == NULL) - goto out_elf_end; - } - - data = elf_getdata(sec, NULL); - if (data == NULL) + if (sec == NULL) goto out_elf_end; - ptr = data->d_buf; - while (ptr < (data->d_buf + data->d_size)) { - GElf_Nhdr *nhdr = ptr; - int namesz = NOTE_ALIGN(nhdr->n_namesz), - descsz = NOTE_ALIGN(nhdr->n_descsz); - const char *name; - - ptr += sizeof(*nhdr); - name = ptr; - ptr += namesz; - if (nhdr->n_type == NT_GNU_BUILD_ID && - nhdr->n_namesz == sizeof("GNU")) { - if (memcmp(name, "GNU", sizeof("GNU")) == 0) { - memcpy(bf, ptr, BUILD_ID_SIZE); - err = BUILD_ID_SIZE; - break; - } - } - ptr += descsz; - } + build_id_data = elf_getdata(sec, NULL); + if (build_id_data == NULL) + goto out_elf_end; + memcpy(bf, build_id_data->d_buf + 16, BUILD_ID_SIZE); + err = BUILD_ID_SIZE; out_elf_end: elf_end(elf); out_close: @@ -990,48 +962,23 @@ int filename__read_build_id(const char *filename, void *bf, size_t size) return err; } -int sysfs__read_build_id(const char *filename, void *build_id, size_t size) +static char *dso__read_build_id(struct dso *self) { - int fd, err = -1; + int len; + char *build_id = NULL; + unsigned char rawbf[BUILD_ID_SIZE]; - if (size < BUILD_ID_SIZE) + len = filename__read_build_id(self->long_name, rawbf, sizeof(rawbf)); + if (len < 0) goto out; - fd = open(filename, O_RDONLY); - if (fd < 0) + build_id = malloc(len * 2 + 1); + if (build_id == NULL) goto out; - while (1) { - char bf[BUFSIZ]; - GElf_Nhdr nhdr; - int namesz, descsz; - - if (read(fd, &nhdr, sizeof(nhdr)) != sizeof(nhdr)) - break; - - namesz = NOTE_ALIGN(nhdr.n_namesz); - descsz = NOTE_ALIGN(nhdr.n_descsz); - if (nhdr.n_type == NT_GNU_BUILD_ID && - nhdr.n_namesz == sizeof("GNU")) { - if (read(fd, bf, namesz) != namesz) - break; - if (memcmp(bf, "GNU", sizeof("GNU")) == 0) { - if (read(fd, build_id, - BUILD_ID_SIZE) == BUILD_ID_SIZE) { - err = 0; - break; - } - } else if (read(fd, bf, descsz) != descsz) - break; - } else { - int n = namesz + descsz; - if (read(fd, bf, n) != n) - break; - } - } - close(fd); + build_id__sprintf(rawbf, len, build_id); out: - return err; + return build_id; } char dso__symtab_origin(const struct dso *self) @@ -1054,17 +1001,12 @@ char dso__symtab_origin(const struct dso *self) int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) { int size = PATH_MAX; - char *name; - u8 build_id[BUILD_ID_SIZE]; + char *name = malloc(size), *build_id = NULL; int ret = -1; int fd; self->loaded = 1; - if (self->kernel) - return dso__load_kernel_sym(self, map, filter); - - name = malloc(size); if (!name) return -1; @@ -1081,6 +1023,8 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) more: do { + int berr = 0; + self->origin++; switch (self->origin) { case DSO__ORIG_FEDORA: @@ -1092,18 +1036,12 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) self->long_name); break; case DSO__ORIG_BUILDID: - if (filename__read_build_id(self->long_name, build_id, - sizeof(build_id))) { - char build_id_hex[BUILD_ID_SIZE * 2 + 1]; - - build_id__sprintf(build_id, sizeof(build_id), - build_id_hex); + build_id = dso__read_build_id(self); + if (build_id != NULL) { snprintf(name, size, "/usr/lib/debug/.build-id/%.2s/%s.debug", - build_id_hex, build_id_hex + 2); - if (self->has_build_id) - goto compare_build_id; - break; + build_id, build_id + 2); + goto compare_build_id; } self->origin++; /* Fall thru */ @@ -1116,11 +1054,18 @@ int dso__load(struct dso *self, struct map *map, symbol_filter_t filter) } if (self->has_build_id) { - if (filename__read_build_id(name, build_id, - sizeof(build_id)) < 0) + bool match; + build_id = malloc(BUILD_ID_SIZE); + if (build_id == NULL) goto more; + berr = filename__read_build_id(name, build_id, + BUILD_ID_SIZE); compare_build_id: - if (!dso__build_id_equal(self, build_id)) + match = berr > 0 && memcmp(build_id, self->build_id, + sizeof(self->build_id)) == 0; + free(build_id); + build_id = NULL; + if (!match) goto more; } @@ -1155,8 +1100,7 @@ static void kernel_maps__insert(struct map *map) maps__insert(&kernel_maps, map); } -struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp, - symbol_filter_t filter) +struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp) { struct map *map = maps__find(&kernel_maps, ip); @@ -1165,7 +1109,7 @@ struct symbol *kernel_maps__find_symbol(u64 ip, struct map **mapp, if (map) { ip = map->map_ip(map, ip); - return map__find_symbol(map, ip, filter); + return map->dso->find_symbol(map->dso, ip); } return NULL; @@ -1185,13 +1129,32 @@ struct map *kernel_maps__find_by_dso_name(const char *name) return NULL; } -static int dsos__set_modules_path_dir(char *dirname) +static int dso__load_module_sym(struct dso *self, struct map *map, + symbol_filter_t filter) +{ + int err = 0, fd = open(self->long_name, O_RDONLY); + + self->loaded = 1; + + if (fd < 0) { + pr_err("%s: cannot open %s\n", __func__, self->long_name); + return err; + } + + err = dso__load_sym(self, map, self->long_name, fd, filter, 0, 1); + close(fd); + + return err; +} + +static int dsos__load_modules_sym_dir(char *dirname, symbol_filter_t filter) { struct dirent *dent; + int nr_symbols = 0, err; DIR *dir = opendir(dirname); if (!dir) { - pr_debug("%s: cannot open %s dir\n", __func__, dirname); + pr_err("%s: cannot open %s dir\n", __func__, dirname); return -1; } @@ -1205,12 +1168,14 @@ static int dsos__set_modules_path_dir(char *dirname) snprintf(path, sizeof(path), "%s/%s", dirname, dent->d_name); - if (dsos__set_modules_path_dir(path) < 0) + err = dsos__load_modules_sym_dir(path, filter); + if (err < 0) goto failure; } else { char *dot = strrchr(dent->d_name, '.'), dso_name[PATH_MAX]; struct map *map; + struct rb_node *last; char *long_name; if (dot == NULL || strcmp(dot, ".ko")) @@ -1230,16 +1195,36 @@ static int dsos__set_modules_path_dir(char *dirname) if (long_name == NULL) goto failure; dso__set_long_name(map->dso, long_name); + dso__set_basename(map->dso); + + err = dso__load_module_sym(map->dso, map, filter); + if (err < 0) + goto failure; + last = rb_last(&map->dso->syms); + if (last) { + struct symbol *sym; + /* + * We do this here as well, even having the + * symbol size found in the symtab because + * misannotated ASM symbols may have the size + * set to zero. + */ + dso__fixup_sym_end(map->dso); + + sym = rb_entry(last, struct symbol, rb_node); + map->end = map->start + sym->end; + } } + nr_symbols += err; } - return 0; + return nr_symbols; failure: closedir(dir); return -1; } -static int dsos__set_modules_path(void) +static int dsos__load_modules_sym(symbol_filter_t filter) { struct utsname uts; char modules_path[PATH_MAX]; @@ -1250,7 +1235,7 @@ static int dsos__set_modules_path(void) snprintf(modules_path, sizeof(modules_path), "/lib/modules/%s/kernel", uts.release); - return dsos__set_modules_path_dir(modules_path); + return dsos__load_modules_sym_dir(modules_path, filter); } /* @@ -1272,7 +1257,7 @@ static struct map *map__new2(u64 start, struct dso *dso) return self; } -static int kernel_maps__create_module_maps(void) +static int dsos__load_modules(void) { char *line = NULL; size_t n; @@ -1322,12 +1307,6 @@ static int kernel_maps__create_module_maps(void) goto out_delete_line; } - snprintf(name, sizeof(name), - "/sys/module/%s/notes/.note.gnu.build-id", line); - if (sysfs__read_build_id(name, dso->build_id, - sizeof(dso->build_id)) == 0) - dso->has_build_id = true; - dso->origin = DSO__ORIG_KMODULE; kernel_maps__insert(map); dsos__add(dso); @@ -1336,7 +1315,7 @@ static int kernel_maps__create_module_maps(void) free(line); fclose(file); - return dsos__set_modules_path(); + return 0; out_delete_line: free(line); @@ -1347,37 +1326,13 @@ static int kernel_maps__create_module_maps(void) static int dso__load_vmlinux(struct dso *self, struct map *map, const char *vmlinux, symbol_filter_t filter) { - int err = -1, fd; + int err, fd = open(vmlinux, O_RDONLY); - if (self->has_build_id) { - u8 build_id[BUILD_ID_SIZE]; - - if (filename__read_build_id(vmlinux, build_id, - sizeof(build_id)) < 0) { - pr_debug("No build_id in %s, ignoring it\n", vmlinux); - return -1; - } - if (!dso__build_id_equal(self, build_id)) { - char expected_build_id[BUILD_ID_SIZE * 2 + 1], - vmlinux_build_id[BUILD_ID_SIZE * 2 + 1]; - - build_id__sprintf(self->build_id, - sizeof(self->build_id), - expected_build_id); - build_id__sprintf(build_id, sizeof(build_id), - vmlinux_build_id); - pr_debug("build_id in %s is %s while expected is %s, " - "ignoring it\n", vmlinux, vmlinux_build_id, - expected_build_id); - return -1; - } - } + self->loaded = 1; - fd = open(vmlinux, O_RDONLY); if (fd < 0) return -1; - self->loaded = 1; err = dso__load_sym(self, map, self->long_name, fd, filter, 1, 0); close(fd); @@ -1385,55 +1340,78 @@ static int dso__load_vmlinux(struct dso *self, struct map *map, return err; } -static int dso__load_kernel_sym(struct dso *self, struct map *map, - symbol_filter_t filter) +int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, + int use_modules) { - int err; - bool is_kallsyms; - - if (vmlinux_path != NULL) { - int i; - pr_debug("Looking at the vmlinux_path (%d entries long)\n", - vmlinux_path__nr_entries); - for (i = 0; i < vmlinux_path__nr_entries; ++i) { - err = dso__load_vmlinux(self, map, vmlinux_path[i], - filter); - if (err > 0) { - pr_debug("Using %s for symbols\n", - vmlinux_path[i]); - dso__set_long_name(self, - strdup(vmlinux_path[i])); - goto out_fixup; - } - } + int err = -1; + struct dso *dso = dso__new(vmlinux); + + if (dso == NULL) + return -1; + + dso->short_name = "[kernel]"; + kernel_map = map__new2(0, dso); + if (kernel_map == NULL) + goto out_delete_dso; + + kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; + + if (use_modules && dsos__load_modules() < 0) { + pr_warning("Failed to load list of modules in use! " + "Continuing...\n"); + use_modules = 0; } - is_kallsyms = self->long_name[0] == '['; - if (is_kallsyms) - goto do_kallsyms; - - err = dso__load_vmlinux(self, map, self->long_name, filter); - if (err <= 0) { - pr_info("The file %s cannot be used, " - "trying to use /proc/kallsyms...", self->long_name); - sleep(2); -do_kallsyms: - err = kernel_maps__load_kallsyms(filter); - if (err > 0 && !is_kallsyms) - dso__set_long_name(self, strdup("[kernel.kallsyms]")); + if (vmlinux) { + err = dso__load_vmlinux(dso, kernel_map, vmlinux, filter); + if (err > 0 && use_modules) { + int syms = dsos__load_modules_sym(filter); + + if (syms < 0) + pr_warning("Failed to read module symbols!" + " Continuing...\n"); + else + err += syms; + } } + if (err <= 0) + err = kernel_maps__load_kallsyms(filter, use_modules); + if (err > 0) { -out_fixup: - map__fixup_start(map); - map__fixup_end(map); + struct rb_node *node = rb_first(&dso->syms); + struct symbol *sym = rb_entry(node, struct symbol, rb_node); + + kernel_map->start = sym->start; + node = rb_last(&dso->syms); + sym = rb_entry(node, struct symbol, rb_node); + kernel_map->end = sym->end; + + dso->origin = DSO__ORIG_KERNEL; + kernel_maps__insert(kernel_map); + /* + * Now that we have all sorted out, just set the ->end of all + * maps: + */ + kernel_maps__fixup_end(); + dsos__add(dso); + + if (verbose) + kernel_maps__fprintf(stderr); } return err; + +out_delete_dso: + dso__delete(dso); + return -1; } LIST_HEAD(dsos); -struct dso *vdso; +struct dso *vdso; + +const char *vmlinux_name = "vmlinux"; +int modules; static void dsos__add(struct dso *dso) { @@ -1485,116 +1463,17 @@ size_t dsos__fprintf_buildid(FILE *fp) return ret; } -static int kernel_maps__create_kernel_map(const char *vmlinux_name) +int load_kernel(symbol_filter_t filter) { - struct dso *kernel = dso__new(vmlinux_name ?: "[kernel.kallsyms]"); - - if (kernel == NULL) + if (dsos__load_kernel(vmlinux_name, filter, modules) <= 0) return -1; - kernel_map = map__new2(0, kernel); - if (kernel_map == NULL) - goto out_delete_kernel_dso; - - kernel_map->map_ip = kernel_map->unmap_ip = identity__map_ip; - kernel->short_name = "[kernel]"; - kernel->kernel = 1; - vdso = dso__new("[vdso]"); - if (vdso == NULL) - goto out_delete_kernel_map; - - if (sysfs__read_build_id("/sys/kernel/notes", kernel->build_id, - sizeof(kernel->build_id)) == 0) - kernel->has_build_id = true; - - kernel_maps__insert(kernel_map); - dsos__add(kernel); - dsos__add(vdso); - - return 0; - -out_delete_kernel_map: - map__delete(kernel_map); - kernel_map = NULL; -out_delete_kernel_dso: - dso__delete(kernel); - return -1; -} - -static void vmlinux_path__exit(void) -{ - while (--vmlinux_path__nr_entries >= 0) { - free(vmlinux_path[vmlinux_path__nr_entries]); - vmlinux_path[vmlinux_path__nr_entries] = NULL; - } - - free(vmlinux_path); - vmlinux_path = NULL; -} - -static int vmlinux_path__init(void) -{ - struct utsname uts; - char bf[PATH_MAX]; - - if (uname(&uts) < 0) + if (!vdso) return -1; - vmlinux_path = malloc(sizeof(char *) * 5); - if (vmlinux_path == NULL) - return -1; - - vmlinux_path[vmlinux_path__nr_entries] = strdup("vmlinux"); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - vmlinux_path[vmlinux_path__nr_entries] = strdup("/boot/vmlinux"); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - snprintf(bf, sizeof(bf), "/boot/vmlinux-%s", uts.release); - vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - snprintf(bf, sizeof(bf), "/lib/modules/%s/build/vmlinux", uts.release); - vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - snprintf(bf, sizeof(bf), "/usr/lib/debug/lib/modules/%s/vmlinux", - uts.release); - vmlinux_path[vmlinux_path__nr_entries] = strdup(bf); - if (vmlinux_path[vmlinux_path__nr_entries] == NULL) - goto out_fail; - ++vmlinux_path__nr_entries; - - return 0; - -out_fail: - vmlinux_path__exit(); - return -1; -} - -int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, - bool use_modules) -{ - if (try_vmlinux_path && vmlinux_path__init() < 0) - return -1; - - if (kernel_maps__create_kernel_map(vmlinux_name) < 0) { - vmlinux_path__exit(); - return -1; - } + dsos__add(vdso); - if (use_modules && kernel_maps__create_module_maps() < 0) - pr_debug("Failed to load list of modules in use, " - "continuing...\n"); - /* - * Now that we have all the maps created, just set the ->end of them: - */ - kernel_maps__fixup_end(); return 0; } diff --git a/trunk/tools/perf/util/symbol.h b/trunk/tools/perf/util/symbol.h index 8c4d026e067a..5ad1019607dd 100644 --- a/trunk/tools/perf/util/symbol.h +++ b/trunk/tools/perf/util/symbol.h @@ -64,7 +64,6 @@ struct dso { u8 slen_calculated:1; u8 loaded:1; u8 has_build_id:1; - u8 kernel:1; unsigned char origin; u8 build_id[BUILD_ID_SIZE]; u16 long_name_len; @@ -78,6 +77,7 @@ void dso__delete(struct dso *self); struct symbol *dso__find_symbol(struct dso *self, u64 ip); +int dsos__load_kernel(const char *vmlinux, symbol_filter_t filter, int modules); struct dso *dsos__findnew(const char *name); int dso__load(struct dso *self, struct map *map, symbol_filter_t filter); void dsos__fprintf(FILE *fp); @@ -89,17 +89,16 @@ char dso__symtab_origin(const struct dso *self); void dso__set_build_id(struct dso *self, void *build_id); int filename__read_build_id(const char *filename, void *bf, size_t size); -int sysfs__read_build_id(const char *filename, void *bf, size_t size); -bool dsos__read_build_ids(void); +bool fetch_build_id_table(struct list_head *head); int build_id__sprintf(u8 *self, int len, char *bf); -int kernel_maps__init(const char *vmlinux_name, bool try_vmlinux_path, - bool use_modules); -size_t kernel_maps__fprintf(FILE *fp); +int load_kernel(symbol_filter_t filter); void symbol__init(unsigned int priv_size); extern struct list_head dsos; extern struct map *kernel_map; extern struct dso *vdso; +extern const char *vmlinux_name; +extern int modules; #endif /* __PERF_SYMBOL */ diff --git a/trunk/tools/perf/util/thread.h b/trunk/tools/perf/util/thread.h index e4b8d437725a..53addd77ce8f 100644 --- a/trunk/tools/perf/util/thread.h +++ b/trunk/tools/perf/util/thread.h @@ -26,8 +26,7 @@ size_t threads__fprintf(FILE *fp); void maps__insert(struct rb_root *maps, struct map *map); struct map *maps__find(struct rb_root *maps, u64 ip); -struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp, - symbol_filter_t filter); +struct symbol *kernel_maps__find_symbol(const u64 ip, struct map **mapp); struct map *kernel_maps__find_by_dso_name(const char *name); static inline struct map *thread__find_map(struct thread *self, u64 ip) diff --git a/trunk/tools/perf/util/trace-event-info.c b/trunk/tools/perf/util/trace-event-info.c index cace35595530..831052d4b4fb 100644 --- a/trunk/tools/perf/util/trace-event-info.c +++ b/trunk/tools/perf/util/trace-event-info.c @@ -33,11 +33,11 @@ #include #include #include -#include #include "../perf.h" #include "trace-event.h" + #define VERSION "0.5" #define _STR(x) #x @@ -483,31 +483,23 @@ static struct tracepoint_path * get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) { struct tracepoint_path path, *ppath = &path; - int i, nr_tracepoints = 0; + int i; for (i = 0; i < nb_events; i++) { if (pattrs[i].type != PERF_TYPE_TRACEPOINT) continue; - ++nr_tracepoints; ppath->next = tracepoint_id_to_path(pattrs[i].config); if (!ppath->next) die("%s\n", "No memory to alloc tracepoints list"); ppath = ppath->next; } - return nr_tracepoints > 0 ? path.next : NULL; + return path.next; } - -int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) +void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) { char buf[BUFSIZ]; - struct tracepoint_path *tps = get_tracepoints_path(pattrs, nb_events); - - /* - * What? No tracepoints? No sense writing anything here, bail out. - */ - if (tps == NULL) - return -1; + struct tracepoint_path *tps; output_fd = fd; @@ -536,11 +528,11 @@ int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) page_size = getpagesize(); write_or_die(&page_size, 4); + tps = get_tracepoints_path(pattrs, nb_events); + read_header_files(); read_ftrace_files(tps); read_event_files(tps); read_proc_kallsyms(); read_ftrace_printk(); - - return 0; } diff --git a/trunk/tools/perf/util/trace-event-read.c b/trunk/tools/perf/util/trace-event-read.c index 342dfdd43f87..44292e06cca4 100644 --- a/trunk/tools/perf/util/trace-event-read.c +++ b/trunk/tools/perf/util/trace-event-read.c @@ -471,11 +471,11 @@ void trace_report(int fd) read_or_die(buf, 3); if (memcmp(buf, test, 3) != 0) - die("no trace data in the file"); + die("not an trace data file"); read_or_die(buf, 7); if (memcmp(buf, "tracing", 7) != 0) - die("not a trace file (missing 'tracing' tag)"); + die("not a trace file (missing tracing)"); version = read_string(); if (show_version) diff --git a/trunk/tools/perf/util/trace-event.h b/trunk/tools/perf/util/trace-event.h index dd51c6872a15..f6637c2fa1fe 100644 --- a/trunk/tools/perf/util/trace-event.h +++ b/trunk/tools/perf/util/trace-event.h @@ -248,7 +248,7 @@ unsigned long long raw_field_value(struct event *event, const char *name, void *data); void *raw_field_ptr(struct event *event, const char *name, void *data); -int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); +void read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events); /* taken from kernel/trace/trace.h */ enum trace_flag_type { diff --git a/trunk/tools/perf/util/util.h b/trunk/tools/perf/util/util.h index e1c623e0c99e..f2203a0946bc 100644 --- a/trunk/tools/perf/util/util.h +++ b/trunk/tools/perf/util/util.h @@ -84,9 +84,6 @@ #include #endif -extern const char *graph_line; -extern const char *graph_dotted_line; - /* On most systems would have given us this, but * not on some systems (e.g. GNU/Hurd). */