diff --git a/[refs] b/[refs] index 5f51ab64f1a4..d7d8ce2c209b 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: 777d0411cd1e384115985dac5ccd42031e3eee2b +refs/heads/master: 63e0c7715aab6085faa487d498889f4361dc6542 diff --git a/trunk/arch/Kconfig b/trunk/arch/Kconfig index acda512da2e2..f06010fb4838 100644 --- a/trunk/arch/Kconfig +++ b/trunk/arch/Kconfig @@ -137,17 +137,6 @@ config HAVE_HW_BREAKPOINT bool depends on PERF_EVENTS -config HAVE_MIXED_BREAKPOINTS_REGS - bool - depends on HAVE_HW_BREAKPOINT - help - Depending on the arch implementation of hardware breakpoints, - some of them have separate registers for data and instruction - breakpoints addresses, others have mixed registers to store - them but define the access type in a control register. - Select this option if your arch implements breakpoints under the - latter fashion. - config HAVE_USER_RETURN_NOTIFIER bool diff --git a/trunk/arch/sh/Kconfig b/trunk/arch/sh/Kconfig index e6d8ab5cfa9d..8d90564c2bcf 100644 --- a/trunk/arch/sh/Kconfig +++ b/trunk/arch/sh/Kconfig @@ -44,7 +44,6 @@ config SUPERH32 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_ARCH_KGDB select HAVE_HW_BREAKPOINT - select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS if HAVE_HW_BREAKPOINT select ARCH_HIBERNATION_POSSIBLE if MMU diff --git a/trunk/arch/sh/include/asm/hw_breakpoint.h b/trunk/arch/sh/include/asm/hw_breakpoint.h index e14cad96798f..965dd780d51b 100644 --- a/trunk/arch/sh/include/asm/hw_breakpoint.h +++ b/trunk/arch/sh/include/asm/hw_breakpoint.h @@ -46,14 +46,10 @@ struct pmu; /* Maximum number of UBC channels */ #define HBP_NUM 2 -static inline int hw_breakpoint_slots(int type) -{ - return HBP_NUM; -} - /* arch/sh/kernel/hw_breakpoint.c */ -extern int arch_check_bp_in_kernelspace(struct perf_event *bp); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int arch_check_va_in_userspace(unsigned long va, u16 hbp_len); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/trunk/arch/sh/kernel/hw_breakpoint.c b/trunk/arch/sh/kernel/hw_breakpoint.c index 1f2cf6229862..675eea7785d9 100644 --- a/trunk/arch/sh/kernel/hw_breakpoint.c +++ b/trunk/arch/sh/kernel/hw_breakpoint.c @@ -119,17 +119,26 @@ static int get_hbp_len(u16 hbp_len) return len_in_bytes; } +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u16 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + /* * Check for virtual address in kernel space. */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) { unsigned int len; - unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - va = info->address; - len = get_hbp_len(info->len); + len = get_hbp_len(hbp_len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -217,7 +226,8 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); unsigned int align; @@ -260,6 +270,15 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (info->address & align) return -EINVAL; + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(info->address, info->len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(info->address, info->len)) + return -EFAULT; + } + return 0; } @@ -344,7 +363,8 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) perf_bp_event(bp, args->regs); /* Deliver the signal to userspace */ - if (!arch_check_bp_in_kernelspace(bp)) { + if (arch_check_va_in_userspace(bp->attr.bp_addr, + bp->attr.bp_len)) { siginfo_t info; info.si_signo = args->signr; diff --git a/trunk/arch/sh/kernel/ptrace_32.c b/trunk/arch/sh/kernel/ptrace_32.c index d4104ce9fe53..7759a9a93211 100644 --- a/trunk/arch/sh/kernel/ptrace_32.c +++ b/trunk/arch/sh/kernel/ptrace_32.c @@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr) bp = thread->ptrace_bps[0]; if (!bp) { - ptrace_breakpoint_init(&attr); + hw_breakpoint_init(&attr); attr.bp_addr = addr; attr.bp_len = HW_BREAKPOINT_LEN_2; diff --git a/trunk/arch/x86/Kconfig b/trunk/arch/x86/Kconfig index 01177dcbe261..97a95dfd1181 100644 --- a/trunk/arch/x86/Kconfig +++ b/trunk/arch/x86/Kconfig @@ -53,7 +53,6 @@ config X86 select HAVE_KERNEL_LZMA select HAVE_KERNEL_LZO select HAVE_HW_BREAKPOINT - select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS select ANON_INODES select HAVE_ARCH_KMEMCHECK diff --git a/trunk/arch/x86/include/asm/hw_breakpoint.h b/trunk/arch/x86/include/asm/hw_breakpoint.h index 942255310e6a..2a1bd8f4f23a 100644 --- a/trunk/arch/x86/include/asm/hw_breakpoint.h +++ b/trunk/arch/x86/include/asm/hw_breakpoint.h @@ -41,16 +41,12 @@ struct arch_hw_breakpoint { /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -static inline int hw_breakpoint_slots(int type) -{ - return HBP_NUM; -} - struct perf_event; struct pmu; -extern int arch_check_bp_in_kernelspace(struct perf_event *bp); -extern int arch_validate_hwbkpt_settings(struct perf_event *bp); +extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, unsigned long val, void *data); diff --git a/trunk/arch/x86/kernel/hw_breakpoint.c b/trunk/arch/x86/kernel/hw_breakpoint.c index a8f1b803d2fd..d6cc065f519f 100644 --- a/trunk/arch/x86/kernel/hw_breakpoint.c +++ b/trunk/arch/x86/kernel/hw_breakpoint.c @@ -188,17 +188,26 @@ static int get_hbp_len(u8 hbp_len) return len_in_bytes; } +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + /* * Check for virtual address in kernel space. */ -int arch_check_bp_in_kernelspace(struct perf_event *bp) +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) { unsigned int len; - unsigned long va; - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - va = info->address; - len = get_hbp_len(info->len); + len = get_hbp_len(hbp_len); return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); } @@ -291,7 +300,8 @@ static int arch_build_bp_info(struct perf_event *bp) /* * Validate the arch-specific HW Breakpoint register settings */ -int arch_validate_hwbkpt_settings(struct perf_event *bp) +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); unsigned int align; @@ -304,6 +314,16 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) ret = -EINVAL; + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + switch (info->len) { case X86_BREAKPOINT_LEN_1: align = 0; @@ -330,6 +350,15 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) if (info->address & align) return -EINVAL; + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(info->address, info->len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(info->address, info->len)) + return -EFAULT; + } + return 0; } diff --git a/trunk/arch/x86/kernel/ptrace.c b/trunk/arch/x86/kernel/ptrace.c index 70c4872cd8aa..055be0afd330 100644 --- a/trunk/arch/x86/kernel/ptrace.c +++ b/trunk/arch/x86/kernel/ptrace.c @@ -688,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, struct perf_event_attr attr; if (!t->ptrace_bps[nr]) { - ptrace_breakpoint_init(&attr); + hw_breakpoint_init(&attr); /* * Put stub len and type to register (reserve) an inactive but * correct bp diff --git a/trunk/include/linux/hw_breakpoint.h b/trunk/include/linux/hw_breakpoint.h index a2d6ea49ec56..c70d27af03f9 100644 --- a/trunk/include/linux/hw_breakpoint.h +++ b/trunk/include/linux/hw_breakpoint.h @@ -9,22 +9,9 @@ enum { }; enum { - HW_BREAKPOINT_EMPTY = 0, - HW_BREAKPOINT_R = 1, - HW_BREAKPOINT_W = 2, - HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, - HW_BREAKPOINT_X = 4, - HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, -}; - -enum bp_type_idx { - TYPE_INST = 0, -#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS - TYPE_DATA = 0, -#else - TYPE_DATA = 1, -#endif - TYPE_MAX + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_X = 4, }; #ifdef __KERNEL__ @@ -47,12 +34,6 @@ static inline void hw_breakpoint_init(struct perf_event_attr *attr) attr->sample_period = 1; } -static inline void ptrace_breakpoint_init(struct perf_event_attr *attr) -{ - hw_breakpoint_init(attr); - attr->exclude_kernel = 1; -} - static inline unsigned long hw_breakpoint_addr(struct perf_event *bp) { return bp->attr.bp_addr; diff --git a/trunk/kernel/hw_breakpoint.c b/trunk/kernel/hw_breakpoint.c index 7a56b22e0602..03808ed342a6 100644 --- a/trunk/kernel/hw_breakpoint.c +++ b/trunk/kernel/hw_breakpoint.c @@ -40,29 +40,23 @@ #include #include #include -#include #include #include #include - /* * Constraints data */ /* Number of pinned cpu breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]); +static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int *, nr_task_bp_pinned[TYPE_MAX]); +static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); /* Number of non-pinned cpu/task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]); - -static int nr_slots[TYPE_MAX]; - -static int constraints_initialized; +static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); /* Gather the number of total pinned and un-pinned bp in a cpuset */ struct bp_busy_slots { @@ -73,29 +67,16 @@ struct bp_busy_slots { /* Serialize accesses to the above constraints */ static DEFINE_MUTEX(nr_bp_mutex); -__weak int hw_breakpoint_weight(struct perf_event *bp) -{ - return 1; -} - -static inline enum bp_type_idx find_slot_idx(struct perf_event *bp) -{ - if (bp->attr.bp_type & HW_BREAKPOINT_RW) - return TYPE_DATA; - - return TYPE_INST; -} - /* * Report the maximum number of pinned breakpoints a task * have in this cpu */ -static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) +static unsigned int max_task_bp_pinned(int cpu) { int i; - unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); + unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); - for (i = nr_slots[type] - 1; i >= 0; i--) { + for (i = HBP_NUM -1; i >= 0; i--) { if (tsk_pinned[i] > 0) return i + 1; } @@ -103,7 +84,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type) return 0; } -static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) +static int task_bp_pinned(struct task_struct *tsk) { struct perf_event_context *ctx = tsk->perf_event_ctxp; struct list_head *list; @@ -124,8 +105,7 @@ static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) */ list_for_each_entry(bp, list, event_entry) { if (bp->attr.type == PERF_TYPE_BREAKPOINT) - if (find_slot_idx(bp) == type) - count += hw_breakpoint_weight(bp); + count++; } raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -138,19 +118,18 @@ static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type) * a given cpu (cpu > -1) or in all of them (cpu = -1). */ static void -fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, - enum bp_type_idx type) +fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) { int cpu = bp->cpu; struct task_struct *tsk = bp->ctx->task; if (cpu >= 0) { - slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu); + slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); if (!tsk) - slots->pinned += max_task_bp_pinned(cpu, type); + slots->pinned += max_task_bp_pinned(cpu); else - slots->pinned += task_bp_pinned(tsk, type); - slots->flexible = per_cpu(nr_bp_flexible[type], cpu); + slots->pinned += task_bp_pinned(tsk); + slots->flexible = per_cpu(nr_bp_flexible, cpu); return; } @@ -158,66 +137,48 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp, for_each_online_cpu(cpu) { unsigned int nr; - nr = per_cpu(nr_cpu_bp_pinned[type], cpu); + nr = per_cpu(nr_cpu_bp_pinned, cpu); if (!tsk) - nr += max_task_bp_pinned(cpu, type); + nr += max_task_bp_pinned(cpu); else - nr += task_bp_pinned(tsk, type); + nr += task_bp_pinned(tsk); if (nr > slots->pinned) slots->pinned = nr; - nr = per_cpu(nr_bp_flexible[type], cpu); + nr = per_cpu(nr_bp_flexible, cpu); if (nr > slots->flexible) slots->flexible = nr; } } -/* - * For now, continue to consider flexible as pinned, until we can - * ensure no flexible event can ever be scheduled before a pinned event - * in a same cpu. - */ -static void -fetch_this_slot(struct bp_busy_slots *slots, int weight) -{ - slots->pinned += weight; -} - /* * Add a pinned breakpoint for the given task in our constraint table */ -static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable, - enum bp_type_idx type, int weight) +static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) { unsigned int *tsk_pinned; - int old_count = 0; - int old_idx = 0; - int idx = 0; + int count = 0; - old_count = task_bp_pinned(tsk, type); - old_idx = old_count - 1; - idx = old_idx + weight; + count = task_bp_pinned(tsk); - tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu); + tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); if (enable) { - tsk_pinned[idx]++; - if (old_count > 0) - tsk_pinned[old_idx]--; + tsk_pinned[count]++; + if (count > 0) + tsk_pinned[count-1]--; } else { - tsk_pinned[idx]--; - if (old_count > 0) - tsk_pinned[old_idx]++; + tsk_pinned[count]--; + if (count > 0) + tsk_pinned[count-1]++; } } /* * Add/remove the given breakpoint in our constraint table */ -static void -toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, - int weight) +static void toggle_bp_slot(struct perf_event *bp, bool enable) { int cpu = bp->cpu; struct task_struct *tsk = bp->ctx->task; @@ -225,20 +186,20 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, /* Pinned counter task profiling */ if (tsk) { if (cpu >= 0) { - toggle_bp_task_slot(tsk, cpu, enable, type, weight); + toggle_bp_task_slot(tsk, cpu, enable); return; } for_each_online_cpu(cpu) - toggle_bp_task_slot(tsk, cpu, enable, type, weight); + toggle_bp_task_slot(tsk, cpu, enable); return; } /* Pinned counter cpu profiling */ if (enable) - per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight; + per_cpu(nr_cpu_bp_pinned, bp->cpu)++; else - per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight; + per_cpu(nr_cpu_bp_pinned, bp->cpu)--; } /* @@ -285,29 +246,14 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, static int __reserve_bp_slot(struct perf_event *bp) { struct bp_busy_slots slots = {0}; - enum bp_type_idx type; - int weight; - /* We couldn't initialize breakpoint constraints on boot */ - if (!constraints_initialized) - return -ENOMEM; - - /* Basic checks */ - if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY || - bp->attr.bp_type == HW_BREAKPOINT_INVALID) - return -EINVAL; - - type = find_slot_idx(bp); - weight = hw_breakpoint_weight(bp); - - fetch_bp_busy_slots(&slots, bp, type); - fetch_this_slot(&slots, weight); + fetch_bp_busy_slots(&slots, bp); /* Flexible counters need to keep at least one slot */ - if (slots.pinned + (!!slots.flexible) > nr_slots[type]) + if (slots.pinned + (!!slots.flexible) == HBP_NUM) return -ENOSPC; - toggle_bp_slot(bp, true, type, weight); + toggle_bp_slot(bp, true); return 0; } @@ -327,12 +273,7 @@ int reserve_bp_slot(struct perf_event *bp) static void __release_bp_slot(struct perf_event *bp) { - enum bp_type_idx type; - int weight; - - type = find_slot_idx(bp); - weight = hw_breakpoint_weight(bp); - toggle_bp_slot(bp, false, type, weight); + toggle_bp_slot(bp, false); } void release_bp_slot(struct perf_event *bp) @@ -367,28 +308,6 @@ int dbg_release_bp_slot(struct perf_event *bp) return 0; } -static int validate_hw_breakpoint(struct perf_event *bp) -{ - int ret; - - ret = arch_validate_hwbkpt_settings(bp); - if (ret) - return ret; - - if (arch_check_bp_in_kernelspace(bp)) { - if (bp->attr.exclude_kernel) - return -EINVAL; - /* - * Don't let unprivileged users set a breakpoint in the trap - * path to avoid trap recursion attacks. - */ - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - } - - return 0; -} - int register_perf_hw_breakpoint(struct perf_event *bp) { int ret; @@ -397,7 +316,17 @@ int register_perf_hw_breakpoint(struct perf_event *bp) if (ret) return ret; - ret = validate_hw_breakpoint(bp); + /* + * Ptrace breakpoints can be temporary perf events only + * meant to reserve a slot. In this case, it is created disabled and + * we don't want to check the params right now (as we put a null addr) + * But perf tools create events as disabled and we want to check + * the params for them. + * This is a quick hack that will be removed soon, once we remove + * the tmp breakpoints from ptrace + */ + if (!bp->attr.disabled || !bp->overflow_handler) + ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); /* if arch_validate_hwbkpt_settings() fails then release bp slot */ if (ret) @@ -444,7 +373,7 @@ int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *att if (attr->disabled) goto end; - err = validate_hw_breakpoint(bp); + err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); if (!err) perf_event_enable(bp); @@ -551,36 +480,7 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { static int __init init_hw_breakpoint(void) { - unsigned int **task_bp_pinned; - int cpu, err_cpu; - int i; - - for (i = 0; i < TYPE_MAX; i++) - nr_slots[i] = hw_breakpoint_slots(i); - - for_each_possible_cpu(cpu) { - for (i = 0; i < TYPE_MAX; i++) { - task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu); - *task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i], - GFP_KERNEL); - if (!*task_bp_pinned) - goto err_alloc; - } - } - - constraints_initialized = 1; - return register_die_notifier(&hw_breakpoint_exceptions_nb); - - err_alloc: - for_each_possible_cpu(err_cpu) { - if (err_cpu == cpu) - break; - for (i = 0; i < TYPE_MAX; i++) - kfree(per_cpu(nr_task_bp_pinned[i], cpu)); - } - - return -ENOMEM; } core_initcall(init_hw_breakpoint); diff --git a/trunk/kernel/trace/trace_ksym.c b/trunk/kernel/trace/trace_ksym.c index 8eaf00749b65..d59cd6879477 100644 --- a/trunk/kernel/trace/trace_ksym.c +++ b/trunk/kernel/trace/trace_ksym.c @@ -34,6 +34,12 @@ #include +/* + * For now, let us restrict the no. of symbols traced simultaneously to number + * of available hardware breakpoint registers. + */ +#define KSYM_TRACER_MAX HBP_NUM + #define KSYM_TRACER_OP_LEN 3 /* rw- */ struct trace_ksym { @@ -47,6 +53,7 @@ struct trace_ksym { static struct trace_array *ksym_trace_array; +static unsigned int ksym_filter_entry_count; static unsigned int ksym_tracing_enabled; static HLIST_HEAD(ksym_filter_head); @@ -174,6 +181,13 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) struct trace_ksym *entry; int ret = -ENOMEM; + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) { + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No" + " new requests for tracing can be accepted now.\n", + KSYM_TRACER_MAX); + return -ENOSPC; + } + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL); if (!entry) return -ENOMEM; @@ -189,17 +203,13 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr) if (IS_ERR(entry->ksym_hbp)) { ret = PTR_ERR(entry->ksym_hbp); - if (ret == -ENOSPC) { - printk(KERN_ERR "ksym_tracer: Maximum limit reached." - " No new requests for tracing can be accepted now.\n"); - } else { - printk(KERN_INFO "ksym_tracer request failed. Try again" - " later!!\n"); - } + printk(KERN_INFO "ksym_tracer request failed. Try again" + " later!!\n"); goto err; } hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head); + ksym_filter_entry_count++; return 0; @@ -255,6 +265,7 @@ static void __ksym_trace_reset(void) hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head, ksym_hlist) { unregister_wide_hw_breakpoint(entry->ksym_hbp); + ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry); @@ -327,6 +338,7 @@ static ssize_t ksym_trace_filter_write(struct file *file, goto out_unlock; } /* Error or "symbol:---" case: drop it */ + ksym_filter_entry_count--; hlist_del_rcu(&(entry->ksym_hlist)); synchronize_rcu(); kfree(entry); diff --git a/trunk/tools/perf/Documentation/perf-trace-perl.txt b/trunk/tools/perf/Documentation/perf-trace-perl.txt index ee6525ee6d69..d729cee8d987 100644 --- a/trunk/tools/perf/Documentation/perf-trace-perl.txt +++ b/trunk/tools/perf/Documentation/perf-trace-perl.txt @@ -49,10 +49,12 @@ available as calls back into the perf executable (see below). As an example, the following perf record command can be used to record all sched_wakeup events in the system: - # perf record -a -e sched:sched_wakeup + # perf record -c 1 -f -a -M -R -e sched:sched_wakeup Traces meant to be processed using a script should be recorded with -the above option: -a to enable system-wide collection. +the above options: -c 1 says to sample every event, -a to enable +system-wide collection, -M to multiplex the output, and -R to collect +raw samples. The format file for the sched_wakep event defines the following fields (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): diff --git a/trunk/tools/perf/Documentation/perf-trace-python.txt b/trunk/tools/perf/Documentation/perf-trace-python.txt index 16a86500dcf1..a241aca77184 100644 --- a/trunk/tools/perf/Documentation/perf-trace-python.txt +++ b/trunk/tools/perf/Documentation/perf-trace-python.txt @@ -93,7 +93,7 @@ don't care how it exited, so we'll use 'perf record' to record only the sys_enter events: ---- -# perf record -a -e raw_syscalls:sys_enter +# perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ] @@ -359,7 +359,7 @@ your script: # cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record #!/bin/bash -perf record -a -e raw_syscalls:sys_enter +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter ---- The 'report' script is also a shell script with the same base name as @@ -449,10 +449,12 @@ available as calls back into the perf executable (see below). As an example, the following perf record command can be used to record all sched_wakeup events in the system: - # perf record -a -e sched:sched_wakeup + # perf record -c 1 -f -a -M -R -e sched:sched_wakeup Traces meant to be processed using a script should be recorded with -the above option: -a to enable system-wide collection. +the above options: -c 1 says to sample every event, -a to enable +system-wide collection, -M to multiplex the output, and -R to collect +raw samples. The format file for the sched_wakep event defines the following fields (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): diff --git a/trunk/tools/perf/builtin-record.c b/trunk/tools/perf/builtin-record.c index ac989e9ba8fe..0ff67d1c4752 100644 --- a/trunk/tools/perf/builtin-record.c +++ b/trunk/tools/perf/builtin-record.c @@ -560,11 +560,12 @@ static int __cmd_record(int argc, const char **argv) return err; } - if (raw_samples) { + if (raw_samples && have_tracepoints(attrs, nr_counters)) { perf_header__set_feat(&session->header, HEADER_TRACE_INFO); } else { for (i = 0; i < nr_counters; i++) { - if (attrs[i].sample_type & PERF_SAMPLE_RAW) { + if (attrs[i].sample_type & PERF_SAMPLE_RAW && + attrs[i].type == PERF_TYPE_TRACEPOINT) { perf_header__set_feat(&session->header, HEADER_TRACE_INFO); break; } @@ -662,19 +663,25 @@ static int __cmd_record(int argc, const char **argv) return err; } - err = event__synthesize_tracing_data(output, attrs, - nr_counters, - process_synthesized_event, - session); - /* - * FIXME err <= 0 here actually means that there were no tracepoints - * so its not really an error, just that we don't need to synthesize - * anything. - * We really have to return this more properly and also propagate - * errors that now are calling die() - */ - if (err > 0) + if (have_tracepoints(attrs, nr_counters)) { + /* + * FIXME err <= 0 here actually means that + * there were no tracepoints so its not really + * an error, just that we don't need to + * synthesize anything. We really have to + * return this more properly and also + * propagate errors that now are calling die() + */ + err = event__synthesize_tracing_data(output, attrs, + nr_counters, + process_synthesized_event, + session); + if (err <= 0) { + pr_err("Couldn't record tracing data.\n"); + return err; + } advance_output(err); + } } machine = perf_session__find_host_machine(session); diff --git a/trunk/tools/perf/scripts/perl/bin/check-perf-trace-record b/trunk/tools/perf/scripts/perl/bin/check-perf-trace-record index 423ad6aed056..e6cb1474f8e8 100644 --- a/trunk/tools/perf/scripts/perl/bin/check-perf-trace-record +++ b/trunk/tools/perf/scripts/perl/bin/check-perf-trace-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree +perf record -c 1 -f -a -M -R -e kmem:kmalloc -e irq:softirq_entry -e kmem:kfree diff --git a/trunk/tools/perf/scripts/perl/bin/failed-syscalls-record b/trunk/tools/perf/scripts/perl/bin/failed-syscalls-record index eb5846bcb565..6ad9b8f5f009 100644 --- a/trunk/tools/perf/scripts/perl/bin/failed-syscalls-record +++ b/trunk/tools/perf/scripts/perl/bin/failed-syscalls-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e raw_syscalls:sys_exit $@ +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@ diff --git a/trunk/tools/perf/scripts/perl/bin/rw-by-file-record b/trunk/tools/perf/scripts/perl/bin/rw-by-file-record index 5bfaae5a6cba..a828679837a8 100644 --- a/trunk/tools/perf/scripts/perl/bin/rw-by-file-record +++ b/trunk/tools/perf/scripts/perl/bin/rw-by-file-record @@ -1,3 +1,3 @@ #!/bin/bash -perf record -a -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@ +perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_enter_write $@ diff --git a/trunk/tools/perf/scripts/perl/bin/rw-by-pid-record b/trunk/tools/perf/scripts/perl/bin/rw-by-pid-record index 6e0b2f7755ac..63976bf11e8b 100644 --- a/trunk/tools/perf/scripts/perl/bin/rw-by-pid-record +++ b/trunk/tools/perf/scripts/perl/bin/rw-by-pid-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ +perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ diff --git a/trunk/tools/perf/scripts/perl/bin/rwtop-record b/trunk/tools/perf/scripts/perl/bin/rwtop-record index 6e0b2f7755ac..63976bf11e8b 100644 --- a/trunk/tools/perf/scripts/perl/bin/rwtop-record +++ b/trunk/tools/perf/scripts/perl/bin/rwtop-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ +perf record -c 1 -f -a -M -R -e syscalls:sys_enter_read -e syscalls:sys_exit_read -e syscalls:sys_enter_write -e syscalls:sys_exit_write $@ diff --git a/trunk/tools/perf/scripts/perl/bin/wakeup-latency-record b/trunk/tools/perf/scripts/perl/bin/wakeup-latency-record index 9f2acaaae9f0..9c0cf588ff8c 100644 --- a/trunk/tools/perf/scripts/perl/bin/wakeup-latency-record +++ b/trunk/tools/perf/scripts/perl/bin/wakeup-latency-record @@ -1,5 +1,5 @@ #!/bin/bash -perf record -a -e sched:sched_switch -e sched:sched_wakeup $@ +perf record -c 1 -f -a -M -R -e sched:sched_switch -e sched:sched_wakeup $@ diff --git a/trunk/tools/perf/scripts/perl/bin/workqueue-stats-record b/trunk/tools/perf/scripts/perl/bin/workqueue-stats-record index 85301f2471ff..c2a1a9421133 100644 --- a/trunk/tools/perf/scripts/perl/bin/workqueue-stats-record +++ b/trunk/tools/perf/scripts/perl/bin/workqueue-stats-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@ +perf record -c 1 -f -a -M -R -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@ diff --git a/trunk/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record b/trunk/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record index eb5846bcb565..6ad9b8f5f009 100644 --- a/trunk/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record +++ b/trunk/tools/perf/scripts/python/bin/failed-syscalls-by-pid-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e raw_syscalls:sys_exit $@ +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_exit $@ diff --git a/trunk/tools/perf/scripts/python/bin/sctop-record b/trunk/tools/perf/scripts/python/bin/sctop-record index 1fc5998b721d..27ccffa26ab4 100644 --- a/trunk/tools/perf/scripts/python/bin/sctop-record +++ b/trunk/tools/perf/scripts/python/bin/sctop-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e raw_syscalls:sys_enter $@ +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ diff --git a/trunk/tools/perf/scripts/python/bin/syscall-counts-by-pid-record b/trunk/tools/perf/scripts/python/bin/syscall-counts-by-pid-record index 1fc5998b721d..27ccffa26ab4 100644 --- a/trunk/tools/perf/scripts/python/bin/syscall-counts-by-pid-record +++ b/trunk/tools/perf/scripts/python/bin/syscall-counts-by-pid-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e raw_syscalls:sys_enter $@ +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ diff --git a/trunk/tools/perf/scripts/python/bin/syscall-counts-record b/trunk/tools/perf/scripts/python/bin/syscall-counts-record index 1fc5998b721d..27ccffa26ab4 100644 --- a/trunk/tools/perf/scripts/python/bin/syscall-counts-record +++ b/trunk/tools/perf/scripts/python/bin/syscall-counts-record @@ -1,2 +1,2 @@ #!/bin/bash -perf record -a -e raw_syscalls:sys_enter $@ +perf record -c 1 -f -a -M -R -e raw_syscalls:sys_enter $@ diff --git a/trunk/tools/perf/util/header.c b/trunk/tools/perf/util/header.c index 79da0e50ef8f..2b9f898efea6 100644 --- a/trunk/tools/perf/util/header.c +++ b/trunk/tools/perf/util/header.c @@ -436,7 +436,6 @@ static int perf_header__adds_write(struct perf_header *self, int fd) trace_sec->size = lseek(fd, 0, SEEK_CUR) - trace_sec->offset; } - if (perf_header__has_feat(self, HEADER_BUILD_ID)) { struct perf_file_section *buildid_sec; diff --git a/trunk/tools/perf/util/parse-events.h b/trunk/tools/perf/util/parse-events.h index b8c1f64bc935..fc4ab3fe877a 100644 --- a/trunk/tools/perf/util/parse-events.h +++ b/trunk/tools/perf/util/parse-events.h @@ -13,6 +13,7 @@ struct tracepoint_path { }; extern struct tracepoint_path *tracepoint_id_to_path(u64 config); +extern bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events); extern int nr_counters; diff --git a/trunk/tools/perf/util/trace-event-info.c b/trunk/tools/perf/util/trace-event-info.c index 30cd9b575953..0a1fb9d4f3b6 100644 --- a/trunk/tools/perf/util/trace-event-info.c +++ b/trunk/tools/perf/util/trace-event-info.c @@ -487,6 +487,11 @@ get_tracepoints_path(struct perf_event_attr *pattrs, int nb_events) return nr_tracepoints > 0 ? path.next : NULL; } +bool have_tracepoints(struct perf_event_attr *pattrs, int nb_events) +{ + return get_tracepoints_path(pattrs, nb_events) ? true : false; +} + int read_tracing_data(int fd, struct perf_event_attr *pattrs, int nb_events) { char buf[BUFSIZ]; diff --git a/trunk/tools/perf/util/trace-event-parse.c b/trunk/tools/perf/util/trace-event-parse.c index 069f261b225c..d6ef414075a6 100644 --- a/trunk/tools/perf/util/trace-event-parse.c +++ b/trunk/tools/perf/util/trace-event-parse.c @@ -691,6 +691,11 @@ static int __read_expected(enum event_type expect, const char *str, return ret; } +static int read_expected_warn(enum event_type expect, const char *str, bool warn) +{ + return __read_expected(expect, str, 1, warn); +} + static int read_expected(enum event_type expect, const char *str) { return __read_expected(expect, str, 1, true); @@ -3099,6 +3104,90 @@ static void print_args(struct print_arg *args) } } +static void parse_header_field(const char *field, + int *offset, int *size, bool warn) +{ + char *token; + int type; + + if (read_expected(EVENT_ITEM, "field") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + + /* type */ + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + free_token(token); + + if (read_expected_warn(EVENT_ITEM, field, warn) < 0) + return; + if (read_expected(EVENT_OP, ";") < 0) + return; + if (read_expected(EVENT_ITEM, "offset") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + *offset = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + if (read_expected(EVENT_ITEM, "size") < 0) + return; + if (read_expected(EVENT_OP, ":") < 0) + return; + if (read_expect_type(EVENT_ITEM, &token) < 0) + goto fail; + *size = atoi(token); + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + type = read_token(&token); + if (type != EVENT_NEWLINE) { + /* newer versions of the kernel have a "signed" type */ + if (type != EVENT_ITEM) + goto fail; + + if (strcmp(token, "signed") != 0) + goto fail; + + free_token(token); + + if (read_expected(EVENT_OP, ":") < 0) + return; + + if (read_expect_type(EVENT_ITEM, &token)) + goto fail; + + free_token(token); + if (read_expected(EVENT_OP, ";") < 0) + return; + + if (read_expect_type(EVENT_NEWLINE, &token)) + goto fail; + } + fail: + free_token(token); +} + +int parse_header_page(char *buf, unsigned long size) +{ + init_input_buf(buf, size); + + parse_header_field("timestamp", &header_page_ts_offset, + &header_page_ts_size, true); + parse_header_field("commit", &header_page_size_offset, + &header_page_size_size, true); + parse_header_field("overwrite", &header_page_overwrite_offset, + &header_page_overwrite_size, false); + parse_header_field("data", &header_page_data_offset, + &header_page_data_size, true); + + return 0; +} + int parse_ftrace_file(char *buf, unsigned long size) { struct format_field *field; diff --git a/trunk/tools/perf/util/trace-event-read.c b/trunk/tools/perf/util/trace-event-read.c index cb54cd002f49..43f19c1fed3a 100644 --- a/trunk/tools/perf/util/trace-event-read.c +++ b/trunk/tools/perf/util/trace-event-read.c @@ -53,12 +53,6 @@ static unsigned long page_size; static ssize_t calc_data_size; static bool repipe; -/* If it fails, the next read will report it */ -static void skip(int size) -{ - lseek(input_fd, size, SEEK_CUR); -} - static int do_read(int fd, void *buf, int size) { int rsize = size; @@ -190,6 +184,7 @@ static void read_ftrace_printk(void) static void read_header_files(void) { unsigned long long size; + char *header_page; char *header_event; char buf[BUFSIZ]; @@ -199,7 +194,10 @@ static void read_header_files(void) die("did not read header page"); size = read8(); - skip(size); + header_page = malloc_or_die(size); + read_or_die(header_page, size); + parse_header_page(header_page, size); + free(header_page); /* * The size field in the page is of type long, diff --git a/trunk/tools/perf/util/trace-event.h b/trunk/tools/perf/util/trace-event.h index 406d452956db..ebfee80e4a07 100644 --- a/trunk/tools/perf/util/trace-event.h +++ b/trunk/tools/perf/util/trace-event.h @@ -244,6 +244,7 @@ extern int header_page_data_size; extern bool latency_format; +int parse_header_page(char *buf, unsigned long size); int trace_parse_common_type(void *data); int trace_parse_common_pid(void *data); int parse_common_pc(void *data);