From f3c7ac40a99f4044b843e6e2c4f46ab2d354c563 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: [PATCH 01/10] ftrace: remove condition from ftrace_record_ip Impact: let module functions be recorded when dyn ftrace not enabled When dynamic ftrace had a daemon and a hash to record the locations of mcount callers at run time, the recording needed to stop when ftrace was disabled. But now that the recording is done at compile time and the ftrace_record_ip is only called at boot up and when a module is loaded, we no longer need to check if ftrace_enabled is set. In fact, this breaks module load if it is not set because we skip over module functions. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 54cb9a7d15e5b..3160254f6c7e2 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -334,7 +334,7 @@ ftrace_record_ip(unsigned long ip) { struct dyn_ftrace *rec; - if (!ftrace_enabled || ftrace_disabled) + if (ftrace_disabled) return NULL; rec = ftrace_alloc_dyn_node(ip); From b17e8a37a13d0e87165054714434534bb7e69f2d Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: [PATCH 02/10] ftrace: disable ftrace on anomalies in trace start and stop Impact: robust feature to disable ftrace on start or stop tracing on error Currently only the initial conversion to nops will disable ftrace on an anomaly. But if an anomaly happens on start or stopping of the tracer, it will silently fail. This patch adds a check there too, to disable ftrace and warn if the conversion fails. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 81 +++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3160254f6c7e2..d5bd21f39524b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -348,6 +348,47 @@ ftrace_record_ip(unsigned long ip) return rec; } +static void print_ip_ins(const char *fmt, unsigned char *p) +{ + int i; + + printk(KERN_CONT "%s", fmt); + + for (i = 0; i < MCOUNT_INSN_SIZE; i++) + printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); +} + +static void ftrace_bug(int failed, unsigned long ip, + unsigned char *expected, + unsigned char *replace) +{ + switch (failed) { + case -EFAULT: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on modifying "); + print_ip_sym(ip); + break; + case -EINVAL: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace failed to modify "); + print_ip_sym(ip); + print_ip_ins(" expected: ", expected); + print_ip_ins(" actual: ", (unsigned char *)ip); + print_ip_ins(" replace: ", replace); + printk(KERN_CONT "\n"); + break; + case -EPERM: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on writing "); + print_ip_sym(ip); + break; + default: + FTRACE_WARN_ON_ONCE(1); + pr_info("ftrace faulted on unknown error "); + print_ip_sym(ip); + } +} + #define FTRACE_ADDR ((long)(ftrace_caller)) static int @@ -465,22 +506,13 @@ static void ftrace_replace_code(int enable) if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { ftrace_free_rec(rec); - } + } else + ftrace_bug(failed, rec->ip, old, new); } } } } -static void print_ip_ins(const char *fmt, unsigned char *p) -{ - int i; - - printk(KERN_CONT "%s", fmt); - - for (i = 0; i < MCOUNT_INSN_SIZE; i++) - printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); -} - static int ftrace_code_disable(struct dyn_ftrace *rec) { @@ -495,32 +527,7 @@ ftrace_code_disable(struct dyn_ftrace *rec) ret = ftrace_modify_code(ip, call, nop); if (ret) { - switch (ret) { - case -EFAULT: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on modifying "); - print_ip_sym(ip); - break; - case -EINVAL: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace failed to modify "); - print_ip_sym(ip); - print_ip_ins(" expected: ", call); - print_ip_ins(" actual: ", (unsigned char *)ip); - print_ip_ins(" replace: ", nop); - printk(KERN_CONT "\n"); - break; - case -EPERM: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on writing "); - print_ip_sym(ip); - break; - default: - FTRACE_WARN_ON_ONCE(1); - pr_info("ftrace faulted on unknown error "); - print_ip_sym(ip); - } - + ftrace_bug(ret, ip, call, nop); rec->flags |= FTRACE_FL_FAILED; return 0; } From 918c115410c6cc57033835b6a401e57697f9ea4f Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: [PATCH 03/10] ftrace: do not process freed records Impact: keep from converting freed records When the tracer is started or stopped, it converts all code pointed to by the saved records into callers to ftrace or nops. When modules are unloaded, their records are freed, but they still exist within the record pages. This patch changes the code to skip over freed records. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index d5bd21f39524b..3940c71ac2a24 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -488,8 +488,12 @@ static void ftrace_replace_code(int enable) for (i = 0; i < pg->index; i++) { rec = &pg->records[i]; - /* don't modify code that has already faulted */ - if (rec->flags & FTRACE_FL_FAILED) + /* + * Skip over free records and records that have + * failed. + */ + if (rec->flags & FTRACE_FL_FREE || + rec->flags & FTRACE_FL_FAILED) continue; /* ignore updates to this record's mcount site */ From d51ad7ac48f991c4a8834485727efa99a691cb87 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 15 Nov 2008 15:48:29 -0500 Subject: [PATCH 04/10] ftrace: replace raw_local_irq_save with local_irq_save Impact: fix lockdep disabling itself when function tracing is enabled The raw_local_irq_saves used in ftrace is causing problems with lockdep. (it thinks the irq flags are out of sync and disables itself with a warning) The raw ops here are not needed, and the normal local_irq_save is fine. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 4 ++-- kernel/trace/trace_selftest.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 4a904623e05d1..dff4bee591b95 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1051,7 +1051,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) * Need to use raw, since this must be called before the * recursive protection is performed. */ - raw_local_irq_save(flags); + local_irq_save(flags); cpu = raw_smp_processor_id(); data = tr->data[cpu]; disabled = atomic_inc_return(&data->disabled); @@ -1062,7 +1062,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip) } atomic_dec(&data->disabled); - raw_local_irq_restore(flags); + local_irq_restore(flags); } #ifdef CONFIG_FUNCTION_RET_TRACER diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 24e6e075e6d65..5cb64ea061b51 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -52,7 +52,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) int cpu, ret = 0; /* Don't allow flipping of max traces now */ - raw_local_irq_save(flags); + local_irq_save(flags); __raw_spin_lock(&ftrace_max_lock); cnt = ring_buffer_entries(tr->buffer); @@ -63,7 +63,7 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) break; } __raw_spin_unlock(&ftrace_max_lock); - raw_local_irq_restore(flags); + local_irq_restore(flags); if (count) *count = cnt; From 31e889098a80ceb3e9e3c555d522b2686a6663c6 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: [PATCH 05/10] ftrace: pass module struct to arch dynamic ftrace functions Impact: allow archs more flexibility on dynamic ftrace implementations Dynamic ftrace has largly been developed on x86. Since x86 does not have the same limitations as other architectures, the ftrace interaction between the generic code and the architecture specific code was not flexible enough to handle some of the issues that other architectures have. Most notably, module trampolines. Due to the limited branch distance that archs make in calling kernel core code from modules, the module load code must create a trampoline to jump to what will make the larger jump into core kernel code. The problem arises when this happens to a call to mcount. Ftrace checks all code before modifying it and makes sure the current code is what it expects. Right now, there is not enough information to handle modifying module trampolines. This patch changes the API between generic dynamic ftrace code and the arch dependent code. There is now two functions for modifying code: ftrace_make_nop(mod, rec, addr) - convert the code at rec->ip into a nop, where the original text is calling addr. (mod is the module struct if called by module init) ftrace_make_caller(rec, addr) - convert the code rec->ip that should be a nop into a caller to addr. The record "rec" now has a new field called "arch" where the architecture can add any special attributes to each call site record. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ftrace.h | 8 +++++ arch/x86/kernel/ftrace.c | 29 ++++++++++++++-- include/linux/ftrace.h | 53 +++++++++++++++++++++++------- kernel/module.c | 2 +- kernel/trace/ftrace.c | 62 ++++++++++++++--------------------- 5 files changed, 100 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9b6a1fa19e700..2bb43b433e076 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -17,6 +17,14 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) */ return addr - 1; } + +#ifdef CONFIG_DYNAMIC_FTRACE + +struct dyn_arch_ftrace { + /* No extra data needed for x86 */ +}; + +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index fe832738e1e27..762222ad13878 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -166,7 +166,7 @@ static int ftrace_calc_offset(long ip, long addr) return (int)(addr - ip); } -unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) { static union ftrace_code_union calc; @@ -311,12 +311,12 @@ do_ftrace_mod_code(unsigned long ip, void *new_code) static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; -unsigned char *ftrace_nop_replace(void) +static unsigned char *ftrace_nop_replace(void) { return ftrace_nop; } -int +static int ftrace_modify_code(unsigned long ip, unsigned char *old_code, unsigned char *new_code) { @@ -349,6 +349,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, return 0; } +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_call_replace(ip, addr); + new = ftrace_nop_replace(); + + return ftrace_modify_code(rec->ip, old, new); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char *new, *old; + unsigned long ip = rec->ip; + + old = ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + + return ftrace_modify_code(rec->ip, old, new); +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 4fbc4a8b86a57..166a2070ef65b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -74,6 +74,9 @@ static inline void ftrace_start(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_DYNAMIC_FTRACE +/* asm/ftrace.h must be defined for archs supporting dynamic ftrace */ +#include + enum { FTRACE_FL_FREE = (1 << 0), FTRACE_FL_FAILED = (1 << 1), @@ -88,6 +91,7 @@ struct dyn_ftrace { struct list_head list; unsigned long ip; /* address of mcount call-site */ unsigned long flags; + struct dyn_arch_ftrace arch; }; int ftrace_force_update(void); @@ -95,22 +99,40 @@ void ftrace_set_filter(unsigned char *buf, int len, int reset); /* defined in arch */ extern int ftrace_ip_converted(unsigned long ip); -extern unsigned char *ftrace_nop_replace(void); -extern unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr); extern int ftrace_dyn_arch_init(void *data); extern int ftrace_update_ftrace_func(ftrace_func_t func); extern void ftrace_caller(void); extern void ftrace_call(void); extern void mcount_call(void); -/* May be defined in arch */ -extern int ftrace_arch_read_dyn_info(char *buf, int size); +/** + * ftrace_make_nop - convert code into top + * @mod: module structure if called by module load initialization + * @rec: the mcount call site record + * @addr: the address that the call site should be calling + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should be a caller to @addr + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +extern int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr); /** - * ftrace_modify_code - modify code segment - * @ip: the address of the code segment - * @old_code: the contents of what is expected to be there - * @new_code: the code to patch in + * ftrace_make_call - convert a nop call site into a call to addr + * @rec: the mcount call site record + * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs * to be taken by the arch. The operation should carefully @@ -118,6 +140,8 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * what we expect it to be, and then on success of the compare, * it should write to the location. * + * The code segment at @rec->ip should be a nop + * * Return must be: * 0 on success * -EFAULT on error reading the location @@ -125,8 +149,11 @@ extern int ftrace_arch_read_dyn_info(char *buf, int size); * -EPERM on error writing to the location * Any other value will be considered a failure. */ -extern int ftrace_modify_code(unsigned long ip, unsigned char *old_code, - unsigned char *new_code); +extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); + + +/* May be defined in arch */ +extern int ftrace_arch_read_dyn_info(char *buf, int size); extern int skip_trace(unsigned long ip); @@ -259,11 +286,13 @@ static inline void ftrace_dump(void) { } #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); -extern void ftrace_init_module(unsigned long *start, unsigned long *end); +extern void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end); #else static inline void ftrace_init(void) { } static inline void -ftrace_init_module(unsigned long *start, unsigned long *end) { } +ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { } #endif diff --git a/kernel/module.c b/kernel/module.c index 1f4cc00e0c200..69791274e8998 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2201,7 +2201,7 @@ static noinline struct module *load_module(void __user *umod, /* sechdrs[0].sh_size is always zero */ mseg = section_objs(hdr, sechdrs, secstrings, "__mcount_loc", sizeof(*mseg), &num_mcount); - ftrace_init_module(mseg, mseg + num_mcount); + ftrace_init_module(mod, mseg, mseg + num_mcount); err = module_finalize(hdr, sechdrs, mod); if (err < 0) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3940c71ac2a24..e9a5fbfce08e5 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -358,9 +358,7 @@ static void print_ip_ins(const char *fmt, unsigned char *p) printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]); } -static void ftrace_bug(int failed, unsigned long ip, - unsigned char *expected, - unsigned char *replace) +static void ftrace_bug(int failed, unsigned long ip) { switch (failed) { case -EFAULT: @@ -372,9 +370,7 @@ static void ftrace_bug(int failed, unsigned long ip, FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); print_ip_sym(ip); - print_ip_ins(" expected: ", expected); print_ip_ins(" actual: ", (unsigned char *)ip); - print_ip_ins(" replace: ", replace); printk(KERN_CONT "\n"); break; case -EPERM: @@ -392,8 +388,7 @@ static void ftrace_bug(int failed, unsigned long ip, #define FTRACE_ADDR ((long)(ftrace_caller)) static int -__ftrace_replace_code(struct dyn_ftrace *rec, - unsigned char *old, unsigned char *new, int enable) +__ftrace_replace_code(struct dyn_ftrace *rec, int enable) { unsigned long ip, fl; @@ -435,12 +430,10 @@ __ftrace_replace_code(struct dyn_ftrace *rec, * otherwise enable it! */ if (fl & FTRACE_FL_ENABLED) { - /* swap new and old */ - new = old; - old = ftrace_call_replace(ip, FTRACE_ADDR); + enable = 0; rec->flags &= ~FTRACE_FL_ENABLED; } else { - new = ftrace_call_replace(ip, FTRACE_ADDR); + enable = 1; rec->flags |= FTRACE_FL_ENABLED; } } else { @@ -453,10 +446,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); if (fl == FTRACE_FL_NOTRACE) return 0; - - new = ftrace_call_replace(ip, FTRACE_ADDR); - } else - old = ftrace_call_replace(ip, FTRACE_ADDR); + } if (enable) { if (rec->flags & FTRACE_FL_ENABLED) @@ -469,21 +459,18 @@ __ftrace_replace_code(struct dyn_ftrace *rec, } } - return ftrace_modify_code(ip, old, new); + if (enable) + return ftrace_make_call(rec, FTRACE_ADDR); + else + return ftrace_make_nop(NULL, rec, FTRACE_ADDR); } static void ftrace_replace_code(int enable) { int i, failed; - unsigned char *new = NULL, *old = NULL; struct dyn_ftrace *rec; struct ftrace_page *pg; - if (enable) - old = ftrace_nop_replace(); - else - new = ftrace_nop_replace(); - for (pg = ftrace_pages_start; pg; pg = pg->next) { for (i = 0; i < pg->index; i++) { rec = &pg->records[i]; @@ -504,34 +491,30 @@ static void ftrace_replace_code(int enable) unfreeze_record(rec); } - failed = __ftrace_replace_code(rec, old, new, enable); + failed = __ftrace_replace_code(rec, enable); if (failed && (rec->flags & FTRACE_FL_CONVERTED)) { rec->flags |= FTRACE_FL_FAILED; if ((system_state == SYSTEM_BOOTING) || !core_kernel_text(rec->ip)) { ftrace_free_rec(rec); } else - ftrace_bug(failed, rec->ip, old, new); + ftrace_bug(failed, rec->ip); } } } } static int -ftrace_code_disable(struct dyn_ftrace *rec) +ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) { unsigned long ip; - unsigned char *nop, *call; int ret; ip = rec->ip; - nop = ftrace_nop_replace(); - call = ftrace_call_replace(ip, mcount_addr); - - ret = ftrace_modify_code(ip, call, nop); + ret = ftrace_make_nop(mod, rec, mcount_addr); if (ret) { - ftrace_bug(ret, ip, call, nop); + ftrace_bug(ret, ip); rec->flags |= FTRACE_FL_FAILED; return 0; } @@ -650,7 +633,7 @@ static cycle_t ftrace_update_time; static unsigned long ftrace_update_cnt; unsigned long ftrace_update_tot_cnt; -static int ftrace_update_code(void) +static int ftrace_update_code(struct module *mod) { struct dyn_ftrace *p, *t; cycle_t start, stop; @@ -667,7 +650,7 @@ static int ftrace_update_code(void) list_del_init(&p->list); /* convert record (i.e, patch mcount-call with NOP) */ - if (ftrace_code_disable(p)) { + if (ftrace_code_disable(mod, p)) { p->flags |= FTRACE_FL_CONVERTED; ftrace_update_cnt++; } else @@ -1309,7 +1292,8 @@ static __init int ftrace_init_debugfs(void) fs_initcall(ftrace_init_debugfs); -static int ftrace_convert_nops(unsigned long *start, +static int ftrace_convert_nops(struct module *mod, + unsigned long *start, unsigned long *end) { unsigned long *p; @@ -1325,18 +1309,19 @@ static int ftrace_convert_nops(unsigned long *start, /* disable interrupts to prevent kstop machine */ local_irq_save(flags); - ftrace_update_code(); + ftrace_update_code(mod); local_irq_restore(flags); mutex_unlock(&ftrace_start_lock); return 0; } -void ftrace_init_module(unsigned long *start, unsigned long *end) +void ftrace_init_module(struct module *mod, + unsigned long *start, unsigned long *end) { if (ftrace_disabled || start == end) return; - ftrace_convert_nops(start, end); + ftrace_convert_nops(mod, start, end); } extern unsigned long __start_mcount_loc[]; @@ -1366,7 +1351,8 @@ void __init ftrace_init(void) last_ftrace_enabled = ftrace_enabled = 1; - ret = ftrace_convert_nops(__start_mcount_loc, + ret = ftrace_convert_nops(NULL, + __start_mcount_loc, __stop_mcount_loc); return; From 20e5227e9f55ae1969934821ccbf581563785bbe Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 14 Nov 2008 16:21:19 -0800 Subject: [PATCH 06/10] ftrace: allow NULL pointers in mcount_loc Impact: make ftrace_convert_nops() more permissive Due to the way different architecture linkers combine the data sections of the mcount_loc (the section that lists all the locations that call mcount), there may be zeros added in that section. This is usually due to strange alignments that the linker performs, that pads in zeros. This patch makes the conversion code to nops skip any pointer in the mcount_loc section that is NULL. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e9a5fbfce08e5..cc4219135dc97 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1304,6 +1304,14 @@ static int ftrace_convert_nops(struct module *mod, p = start; while (p < end) { addr = ftrace_call_adjust(*p++); + /* + * Some architecture linkers will pad between + * the different mcount_loc sections of different + * object files to satisfy alignments. + * Skip any NULL pointers. + */ + if (!addr) + continue; ftrace_record_ip(addr); } From 982c350b9ec4b3564d67f3627a274ae61bbc7e95 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 15 Nov 2008 16:31:41 -0500 Subject: [PATCH 07/10] ftrace: fix dyn ftrace filter Impact: correct implementation of dyn ftrace filter The old decisions made by the filter algorithm was complex and incorrect. This lead to inconsistent enabling or disabling of functions when the filter was used. This patch simplifies that code and in doing so, corrects the usage of the filters. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 83 +++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 47 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index cc4219135dc97..b9f2e22faf2ea 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -394,72 +394,62 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) ip = rec->ip; - if (ftrace_filtered && enable) { + /* + * If this record is not to be traced and + * it is not enabled then do nothing. + * + * If this record is not to be traced and + * it is enabled then disabled it. + * + */ + if (rec->flags & FTRACE_FL_NOTRACE) { + if (rec->flags & FTRACE_FL_ENABLED) + rec->flags &= ~FTRACE_FL_ENABLED; + else + return 0; + + } else if (ftrace_filtered && enable) { /* - * If filtering is on: - * - * If this record is set to be filtered and - * is enabled then do nothing. - * - * If this record is set to be filtered and - * it is not enabled, enable it. - * - * If this record is not set to be filtered - * and it is not enabled do nothing. - * - * If this record is set not to trace then - * do nothing. - * - * If this record is set not to trace and - * it is enabled then disable it. - * - * If this record is not set to be filtered and - * it is enabled, disable it. + * Filtering is on: */ - fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE | - FTRACE_FL_ENABLED); + fl = rec->flags & (FTRACE_FL_FILTER | FTRACE_FL_ENABLED); - if ((fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) || - (fl == (FTRACE_FL_FILTER | FTRACE_FL_NOTRACE)) || - !fl || (fl == FTRACE_FL_NOTRACE)) + /* Record is filtered and enabled, do nothing */ + if (fl == (FTRACE_FL_FILTER | FTRACE_FL_ENABLED)) return 0; - /* - * If it is enabled disable it, - * otherwise enable it! - */ - if (fl & FTRACE_FL_ENABLED) { - enable = 0; + /* Record is not filtered and is not enabled do nothing */ + if (!fl) + return 0; + + /* Record is not filtered but enabled, disable it */ + if (fl == FTRACE_FL_ENABLED) rec->flags &= ~FTRACE_FL_ENABLED; - } else { - enable = 1; + else + /* Otherwise record is filtered but not enabled, enable it */ rec->flags |= FTRACE_FL_ENABLED; - } } else { + /* Disable or not filtered */ if (enable) { - /* - * If this record is set not to trace and is - * not enabled, do nothing. - */ - fl = rec->flags & (FTRACE_FL_NOTRACE | FTRACE_FL_ENABLED); - if (fl == FTRACE_FL_NOTRACE) - return 0; - } - - if (enable) { + /* if record is enabled, do nothing */ if (rec->flags & FTRACE_FL_ENABLED) return 0; + rec->flags |= FTRACE_FL_ENABLED; + } else { + + /* if record is not enabled do nothing */ if (!(rec->flags & FTRACE_FL_ENABLED)) return 0; + rec->flags &= ~FTRACE_FL_ENABLED; } } - if (enable) + if (rec->flags & FTRACE_FL_ENABLED) return ftrace_make_call(rec, FTRACE_ADDR); else return ftrace_make_nop(NULL, rec, FTRACE_ADDR); @@ -554,8 +544,7 @@ static void ftrace_startup(void) mutex_lock(&ftrace_start_lock); ftrace_start_up++; - if (ftrace_start_up == 1) - command |= FTRACE_ENABLE_CALLS; + command |= FTRACE_ENABLE_CALLS; if (saved_ftrace_func != ftrace_trace_function) { saved_ftrace_func = ftrace_trace_function; From ee02a2e5c88ca2e4d6921f08d037b46d5bf82641 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 15 Nov 2008 16:31:41 -0500 Subject: [PATCH 08/10] ftrace: make filtered functions effective on setting Impact: set filtered functions at time the filter is set It can be confusing when the set_filter_functions is set (or cleared) and the functions being recorded by the dynamic tracer does not match. This patch causes the code to be updated if the function tracer is enabled and the filter is changed. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b9f2e22faf2ea..b42ec1de546bd 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1194,7 +1194,7 @@ ftrace_regex_release(struct inode *inode, struct file *file, int enable) mutex_lock(&ftrace_sysctl_lock); mutex_lock(&ftrace_start_lock); - if (iter->filtered && ftrace_start_up && ftrace_enabled) + if (ftrace_start_up && ftrace_enabled) ftrace_run_update_code(FTRACE_ENABLE_CALLS); mutex_unlock(&ftrace_start_lock); mutex_unlock(&ftrace_sysctl_lock); From e6e7a65aabdb696cf05a56cfd495c49a11fd4cde Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 05:53:19 +0100 Subject: [PATCH 09/10] tracing/ftrace: fix unexpected -EINVAL when longest tracer name is set Impact: fix confusing write() -EINVAL when changing the tracer The following commit d9e540762f5cdd89f24e518ad1fd31142d0b9726 remade alive the bug which made the set of a new tracer returning -EINVAL if this is the longest name of tracer. This patch corrects it. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dff4bee591b95..80898f4870ccd 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2655,6 +2655,9 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, char buf[max_tracer_type_len+1]; int i; size_t ret; + int err; + + ret = cnt; if (cnt > max_tracer_type_len) cnt = max_tracer_type_len; @@ -2668,12 +2671,11 @@ tracing_set_trace_write(struct file *filp, const char __user *ubuf, for (i = cnt - 1; i > 0 && isspace(buf[i]); i--) buf[i] = 0; - ret = tracing_set_tracer(buf); - if (!ret) - ret = cnt; + err = tracing_set_tracer(buf); + if (err) + return err; - if (ret > 0) - filp->f_pos += ret; + filp->f_pos += ret; return ret; } From 1c80025a49855b12fa09bb6db71820e3367b1369 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Nov 2008 05:57:26 +0100 Subject: [PATCH 10/10] tracing/ftrace: change the type of the init() callback Impact: extend the ->init() method with the ability to fail This bring a way to know if the initialization of a tracer successed. A tracer must return 0 on success and a traditional error (ie: -ENOMEM) if it fails. If a tracer fails to init, it is free to print a detailed warn. The tracing api will not and switch to a new tracer will just return the error from the init callback. Note: this will be used for the return tracer. Signed-off-by: Frederic Weisbecker Signed-off-by: Ingo Molnar --- kernel/trace/trace.c | 7 ++- kernel/trace/trace.h | 3 +- kernel/trace/trace_boot.c | 3 +- kernel/trace/trace_branch.c | 3 +- kernel/trace/trace_functions.c | 3 +- kernel/trace/trace_functions_return.c | 3 +- kernel/trace/trace_irqsoff.c | 9 ++-- kernel/trace/trace_mmiotrace.c | 3 +- kernel/trace/trace_nop.c | 3 +- kernel/trace/trace_sched_switch.c | 3 +- kernel/trace/trace_sched_wakeup.c | 3 +- kernel/trace/trace_selftest.c | 66 +++++++++++++++++++++++---- kernel/trace/trace_sysprof.c | 3 +- 13 files changed, 88 insertions(+), 24 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 80898f4870ccd..396fda034e3f7 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2638,8 +2638,11 @@ static int tracing_set_tracer(char *buf) current_trace->reset(tr); current_trace = t; - if (t->init) - t->init(tr); + if (t->init) { + ret = t->init(tr); + if (ret) + goto out; + } trace_branch_enable(tr); out: diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 790ea8c0e1f3b..cdbd5cc22be8e 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -264,7 +264,8 @@ enum print_line_t { */ struct tracer { const char *name; - void (*init)(struct trace_array *tr); + /* Your tracer should raise a warning if init fails */ + int (*init)(struct trace_array *tr); void (*reset)(struct trace_array *tr); void (*start)(struct trace_array *tr); void (*stop)(struct trace_array *tr); diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c index cb333b7fd1139..a4fa2c57e34e3 100644 --- a/kernel/trace/trace_boot.c +++ b/kernel/trace/trace_boot.c @@ -47,7 +47,7 @@ static void reset_boot_trace(struct trace_array *tr) tracing_reset(tr, cpu); } -static void boot_trace_init(struct trace_array *tr) +static int boot_trace_init(struct trace_array *tr) { int cpu; boot_trace = tr; @@ -56,6 +56,7 @@ static void boot_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); tracing_sched_switch_assign_trace(tr); + return 0; } static enum print_line_t diff --git a/kernel/trace/trace_branch.c b/kernel/trace/trace_branch.c index 85265553918f2..44bd39539d61a 100644 --- a/kernel/trace/trace_branch.c +++ b/kernel/trace/trace_branch.c @@ -125,7 +125,7 @@ static void stop_branch_trace(struct trace_array *tr) disable_branch_tracing(); } -static void branch_trace_init(struct trace_array *tr) +static int branch_trace_init(struct trace_array *tr) { int cpu; @@ -133,6 +133,7 @@ static void branch_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); start_branch_trace(tr); + return 0; } static void branch_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c index 8693b7a0a5b2d..e74f6d0a32166 100644 --- a/kernel/trace/trace_functions.c +++ b/kernel/trace/trace_functions.c @@ -42,9 +42,10 @@ static void stop_function_trace(struct trace_array *tr) tracing_stop_cmdline_record(); } -static void function_trace_init(struct trace_array *tr) +static int function_trace_init(struct trace_array *tr) { start_function_trace(tr); + return 0; } static void function_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c index 7680b21537dd2..61185f756a13e 100644 --- a/kernel/trace/trace_functions_return.c +++ b/kernel/trace/trace_functions_return.c @@ -24,13 +24,14 @@ static void stop_return_trace(struct trace_array *tr) unregister_ftrace_return(); } -static void return_trace_init(struct trace_array *tr) +static int return_trace_init(struct trace_array *tr) { int cpu; for_each_online_cpu(cpu) tracing_reset(tr, cpu); start_return_trace(tr); + return 0; } static void return_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index d919d4eaa7cce..7c2e326bbc8b1 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -416,11 +416,12 @@ static void irqsoff_tracer_close(struct trace_iterator *iter) } #ifdef CONFIG_IRQSOFF_TRACER -static void irqsoff_tracer_init(struct trace_array *tr) +static int irqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer irqsoff_tracer __read_mostly = { @@ -442,11 +443,12 @@ static struct tracer irqsoff_tracer __read_mostly = #endif #ifdef CONFIG_PREEMPT_TRACER -static void preemptoff_tracer_init(struct trace_array *tr) +static int preemptoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer preemptoff_tracer __read_mostly = @@ -471,11 +473,12 @@ static struct tracer preemptoff_tracer __read_mostly = #if defined(CONFIG_IRQSOFF_TRACER) && \ defined(CONFIG_PREEMPT_TRACER) -static void preemptirqsoff_tracer_init(struct trace_array *tr) +static int preemptirqsoff_tracer_init(struct trace_array *tr) { trace_type = TRACER_IRQS_OFF | TRACER_PREEMPT_OFF; __irqsoff_tracer_init(tr); + return 0; } static struct tracer preemptirqsoff_tracer __read_mostly = diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c index 51bcf370215e3..433d650eda9f8 100644 --- a/kernel/trace/trace_mmiotrace.c +++ b/kernel/trace/trace_mmiotrace.c @@ -30,13 +30,14 @@ static void mmio_reset_data(struct trace_array *tr) tracing_reset(tr, cpu); } -static void mmio_trace_init(struct trace_array *tr) +static int mmio_trace_init(struct trace_array *tr) { pr_debug("in %s\n", __func__); mmio_trace_array = tr; mmio_reset_data(tr); enable_mmiotrace(); + return 0; } static void mmio_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_nop.c b/kernel/trace/trace_nop.c index 2ef1d227e7d8c..0e77415caed3e 100644 --- a/kernel/trace/trace_nop.c +++ b/kernel/trace/trace_nop.c @@ -24,7 +24,7 @@ static void stop_nop_trace(struct trace_array *tr) /* Nothing to do! */ } -static void nop_trace_init(struct trace_array *tr) +static int nop_trace_init(struct trace_array *tr) { int cpu; ctx_trace = tr; @@ -33,6 +33,7 @@ static void nop_trace_init(struct trace_array *tr) tracing_reset(tr, cpu); start_nop_trace(tr); + return 0; } static void nop_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_switch.c b/kernel/trace/trace_sched_switch.c index be35bdfe2e38d..863390557b445 100644 --- a/kernel/trace/trace_sched_switch.c +++ b/kernel/trace/trace_sched_switch.c @@ -206,10 +206,11 @@ static void stop_sched_trace(struct trace_array *tr) tracing_stop_sched_switch_record(); } -static void sched_switch_trace_init(struct trace_array *tr) +static int sched_switch_trace_init(struct trace_array *tr) { ctx_trace = tr; start_sched_trace(tr); + return 0; } static void sched_switch_trace_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c index 983f2b1478c96..0067b49746c1d 100644 --- a/kernel/trace/trace_sched_wakeup.c +++ b/kernel/trace/trace_sched_wakeup.c @@ -331,10 +331,11 @@ static void stop_wakeup_tracer(struct trace_array *tr) unregister_trace_sched_wakeup(probe_wakeup); } -static void wakeup_tracer_init(struct trace_array *tr) +static int wakeup_tracer_init(struct trace_array *tr) { wakeup_trace = tr; start_wakeup_tracer(tr); + return 0; } static void wakeup_tracer_reset(struct trace_array *tr) diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 5cb64ea061b51..88c8eb70f54ae 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -71,6 +71,11 @@ static int trace_test_buffer(struct trace_array *tr, unsigned long *count) return ret; } +static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret) +{ + printk(KERN_WARNING "Failed to init %s tracer, init returned %d\n", + trace->name, init_ret); +} #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE @@ -111,7 +116,11 @@ int trace_selftest_startup_dynamic_tracing(struct tracer *trace, ftrace_set_filter(func_name, strlen(func_name), 1); /* enable tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } /* Sleep for a 1/10 of a second */ msleep(100); @@ -181,7 +190,12 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr) ftrace_enabled = 1; tracer_enabled = 1; - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -223,7 +237,12 @@ trace_selftest_startup_irqsoff(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; /* disable interrupts for a bit */ @@ -272,7 +291,12 @@ trace_selftest_startup_preemptoff(struct tracer *trace, struct trace_array *tr) } /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; /* disable preemption for a bit */ @@ -321,7 +345,11 @@ trace_selftest_startup_preemptirqsoff(struct tracer *trace, struct trace_array * } /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + goto out; + } /* reset the max latency */ tracing_max_latency = 0; @@ -449,7 +477,12 @@ trace_selftest_startup_wakeup(struct tracer *trace, struct trace_array *tr) wait_for_completion(&isrt); /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* reset the max latency */ tracing_max_latency = 0; @@ -505,7 +538,12 @@ trace_selftest_startup_sched_switch(struct tracer *trace, struct trace_array *tr int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -532,7 +570,12 @@ trace_selftest_startup_sysprof(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return 0; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ @@ -554,7 +597,12 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr) int ret; /* start the tracing */ - trace->init(tr); + ret = trace->init(tr); + if (ret) { + warn_failed_init_tracer(trace, ret); + return ret; + } + /* Sleep for a 1/10 of a second */ msleep(100); /* stop the tracing. */ diff --git a/kernel/trace/trace_sysprof.c b/kernel/trace/trace_sysprof.c index 05f753422aeaf..54960edb96d07 100644 --- a/kernel/trace/trace_sysprof.c +++ b/kernel/trace/trace_sysprof.c @@ -261,11 +261,12 @@ static void stop_stack_trace(struct trace_array *tr) mutex_unlock(&sample_timer_lock); } -static void stack_trace_init(struct trace_array *tr) +static int stack_trace_init(struct trace_array *tr) { sysprof_trace = tr; start_stack_trace(tr); + return 0; } static void stack_trace_reset(struct trace_array *tr)