From 937fbf111ac17c456cfcbf18412a1417ff3b69ed Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sat, 8 Feb 2025 10:30:17 -0500 Subject: [PATCH 01/18] tracing: Add traceoff_after_boot option Sometimes tracing is used to debug issues during the boot process. Since the trace buffer has a limited amount of storage, it may be prudent to disable tracing after the boot is finished, otherwise the critical information may be overwritten. With this option, the main tracing buffer will be turned off at the end of the boot process. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Borislav Petkov Link: https://lore.kernel.org/20250208103017.48a7ec83@batman.local.home Signed-off-by: Steven Rostedt (Google) --- Documentation/admin-guide/kernel-parameters.txt | 9 +++++++++ kernel/trace/trace.c | 11 +++++++++++ 2 files changed, 20 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index fb8752b42ec8..e8ca4eabd152 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -7279,6 +7279,15 @@ See also "Event triggers" in Documentation/trace/events.rst + traceoff_after_boot + [FTRACE] Sometimes tracing is used to debug issues + during the boot process. Since the trace buffer has a + limited amount of storage, it may be prudent to + disable tracing after the boot is finished, otherwise + the critical information may be overwritten. With this + option, the main tracing buffer will be turned off at + the end of the boot process. + traceoff_on_warning [FTRACE] enable this option to disable tracing when a warning is hit. This turns off "tracing_on". Tracing can diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0e6d517e74e0..61458d8c3a61 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -87,6 +87,7 @@ void __init disable_tracing_selftest(const char *reason) static struct trace_iterator *tracepoint_print_iter; int tracepoint_printk; static bool tracepoint_printk_stop_on_boot __initdata; +static bool traceoff_after_boot __initdata; static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key); /* For tracers that don't implement custom flags */ @@ -330,6 +331,13 @@ static int __init set_tracepoint_printk_stop(char *str) } __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop); +static int __init set_traceoff_after_boot(char *str) +{ + traceoff_after_boot = true; + return 1; +} +__setup("traceoff_after_boot", set_traceoff_after_boot); + unsigned long long ns2usecs(u64 nsec) { nsec += 500; @@ -10704,6 +10712,9 @@ __init static int late_trace_init(void) tracepoint_printk = 0; } + if (traceoff_after_boot) + tracing_off(); + tracing_set_default_clock(); clear_boot_tracer(); return 0; From 3ca4d7af35093fa5c698e1e10e5fc48156c376fe Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Sat, 18 Jan 2025 01:23:52 +0000 Subject: [PATCH 02/18] ring-buffer: Fix typo in comment about header page pointer Fix typo in comment about header page pointer in function rb_get_reader_page. Link: https://lore.kernel.org/20250118012352.3430519-1-zhouzhouyi@gmail.com Signed-off-by: Zhouyi Zhou Signed-off-by: Steven Rostedt (Google) --- kernel/trace/ring_buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index bb6089c2951e..9d4d951090d3 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -5318,7 +5318,7 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer) * moving it. The page before the header page has the * flag bit '1' set if it is pointing to the page we want. * but if the writer is in the process of moving it - * than it will be '2' or already moved '0'. + * then it will be '2' or already moved '0'. */ ret = rb_head_page_replace(reader, cpu_buffer->reader_page); From 35b98180ec989db5dfdd73c49a15b5047f243edb Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Thu, 13 Feb 2025 11:39:51 +0000 Subject: [PATCH 03/18] tracing: Remove orphaned event_trace_printk The event_trace_printk macro has no callers since commit b8e65554d80b ("tracing: remove deprecated TRACE_FORMAT"). So drop it. Link: https://lore.kernel.org/20250213113951.813258-1-hengqi.chen@gmail.com Signed-off-by: Hengqi Chen Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 5caea596fef0..fa9cf4292dff 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -859,24 +859,6 @@ int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set); int trace_set_clr_event(const char *system, const char *event, int set); int trace_array_set_clr_event(struct trace_array *tr, const char *system, const char *event, bool enable); -/* - * The double __builtin_constant_p is because gcc will give us an error - * if we try to allocate the static variable to fmt if it is not a - * constant. Even with the outer if statement optimizing out. - */ -#define event_trace_printk(ip, fmt, args...) \ -do { \ - __trace_printk_check_format(fmt, ##args); \ - tracing_record_cmdline(current); \ - if (__builtin_constant_p(fmt)) { \ - static const char *trace_printk_fmt \ - __section("__trace_printk_fmt") = \ - __builtin_constant_p(fmt) ? fmt : NULL; \ - \ - __trace_bprintk(ip, trace_printk_fmt, ##args); \ - } else \ - __trace_printk(ip, fmt, ##args); \ -} while (0) #ifdef CONFIG_PERF_EVENTS struct perf_event; From ca29a0bf1221451f566f19999f2eda8adf116ff9 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 25 Feb 2025 13:56:11 -0500 Subject: [PATCH 04/18] tracing: gfp: Remove duplication of recording GFP flags The gfp_flags when recorded in the trace require being converted from their numbers to values. Various macros are used to help facilitate this, but there's two sets of macros that need to keep track of the same GFP flags to stay in sync. Commit 60295b944ff68 ("tracing: gfp: Fix the GFP enum values shown for user space tracing tools") added a TRACE_GFP_FLAGS macro that holds the enum ___GFP_*_BIT defined bits, and creates the TRACE_DEFINE_ENUM() wrapper around them. The __def_gfpflag_names() macro creates the mapping of various flags or multiple flags to give them human readable names via the __print_flags() tracing helper macro. As the TRACE_GFP_FLAGS is a subset of the __def_gfpflags_names(), it can be used to cover the individual bit names, by redefining the internal macro TRACE_GFP_EM(): #undef TRACE_GFP_EM #define TRACE_GFP_EM(a) gfpflag_string(__GFP_##a), This will remove the bits that are duplicate between the two macros. If a new bit is created, only the TRACE_GFP_FLAGS needs to be updated and that will also update the __def_gfpflags_names() macro. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Michael Petlan Cc: Veronika Molnarova Cc: Suren Baghdasaryan Cc: Linus Torvalds Link: https://lore.kernel.org/20250225135611.1942b65c@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- include/trace/events/mmflags.h | 41 +++++++++------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 72fbfe3caeaf..82371177ef79 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -78,6 +78,13 @@ TRACE_DEFINE_ENUM(___GFP_LAST_BIT); #define gfpflag_string(flag) {(__force unsigned long)flag, #flag} +/* + * For the values that match the bits, use the TRACE_GFP_FLAGS + * which will allow any updates to be included automatically. + */ +#undef TRACE_GFP_EM +#define TRACE_GFP_EM(a) gfpflag_string(__GFP_##a), + #define __def_gfpflag_names \ gfpflag_string(GFP_TRANSHUGE), \ gfpflag_string(GFP_TRANSHUGE_LIGHT), \ @@ -91,41 +98,13 @@ TRACE_DEFINE_ENUM(___GFP_LAST_BIT); gfpflag_string(GFP_NOIO), \ gfpflag_string(GFP_NOWAIT), \ gfpflag_string(GFP_DMA), \ - gfpflag_string(__GFP_HIGHMEM), \ gfpflag_string(GFP_DMA32), \ - gfpflag_string(__GFP_HIGH), \ - gfpflag_string(__GFP_IO), \ - gfpflag_string(__GFP_FS), \ - gfpflag_string(__GFP_NOWARN), \ - gfpflag_string(__GFP_RETRY_MAYFAIL), \ - gfpflag_string(__GFP_NOFAIL), \ - gfpflag_string(__GFP_NORETRY), \ - gfpflag_string(__GFP_COMP), \ - gfpflag_string(__GFP_ZERO), \ - gfpflag_string(__GFP_NOMEMALLOC), \ - gfpflag_string(__GFP_MEMALLOC), \ - gfpflag_string(__GFP_HARDWALL), \ - gfpflag_string(__GFP_THISNODE), \ - gfpflag_string(__GFP_RECLAIMABLE), \ - gfpflag_string(__GFP_MOVABLE), \ - gfpflag_string(__GFP_ACCOUNT), \ - gfpflag_string(__GFP_WRITE), \ gfpflag_string(__GFP_RECLAIM), \ - gfpflag_string(__GFP_DIRECT_RECLAIM), \ - gfpflag_string(__GFP_KSWAPD_RECLAIM), \ - gfpflag_string(__GFP_ZEROTAGS) - -#ifdef CONFIG_KASAN_HW_TAGS -#define __def_gfpflag_names_kasan , \ - gfpflag_string(__GFP_SKIP_ZERO), \ - gfpflag_string(__GFP_SKIP_KASAN) -#else -#define __def_gfpflag_names_kasan -#endif + TRACE_GFP_FLAGS \ + { 0, "none" } #define show_gfp_flags(flags) \ - (flags) ? __print_flags(flags, "|", \ - __def_gfpflag_names __def_gfpflag_names_kasan \ + (flags) ? __print_flags(flags, "|", __def_gfpflag_names \ ) : "none" #ifdef CONFIG_MMU From effd1059c448e2b982f4de5d3b03deca2714b2a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Mon, 17 Feb 2025 14:16:12 +0100 Subject: [PATCH 05/18] tracing/user_events: Don't use %pK through printk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restricted pointers ("%pK") are not meant to be used through printk(). It can unintentionally expose security sensitive, raw pointer values. Use regular pointer formatting instead. Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250217-restricted-pointers-trace-v1-1-bbe9ea279848@linutronix.de Link: https://lore.kernel.org/lkml/20250113171731-dc10e3c1-da64-4af0-b767-7c7070468023@linutronix.de/ Signed-off-by: Thomas Weißschuh Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 97325fbd6283..3effc6fce20e 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -455,7 +455,7 @@ static void user_event_enabler_fault_fixup(struct work_struct *work) if (ret && ret != -ENOENT) { struct user_event *user = enabler->event; - pr_warn("user_events: Fault for mm: 0x%pK @ 0x%llx event: %s\n", + pr_warn("user_events: Fault for mm: 0x%p @ 0x%llx event: %s\n", mm->mm, (unsigned long long)uaddr, EVENT_NAME(user)); } From 06889030f585a3daa34f1aa43bc3a70cb9aba8a1 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Wed, 19 Feb 2025 16:43:33 +0100 Subject: [PATCH 06/18] tracing/user_events: Slightly simplify user_seq_show() 2 seq_puts() calls can be merged. It saves a few lines of code and a few cycles, should it matter. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/845caa94b74cea8d72c158bf1994fe250beee28c.1739979791.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 3effc6fce20e..af42aaa3d172 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -2793,11 +2793,8 @@ static int user_seq_show(struct seq_file *m, void *p) seq_printf(m, "%s", EVENT_TP_NAME(user)); - if (status != 0) - seq_puts(m, " #"); - if (status != 0) { - seq_puts(m, " Used by"); + seq_puts(m, " # Used by"); if (status & EVENT_STATUS_FTRACE) seq_puts(m, " ftrace"); if (status & EVENT_STATUS_PERF) From d4ae5070733b114b9431a49c9640437f5e8b2ec5 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 4 Mar 2025 10:37:24 -0500 Subject: [PATCH 07/18] tracing: Update MAINTAINERS file to include tracepoint.c Updates to the tracepoint.c file should not only be Cc'd to LKML, but also to linux-trace-kernel@vger.kernel.org. But because it is not listed in the MAINTAINERS file, it does not get added. Cc: Mathieu Desnoyers Cc: Masami Hiramatsu Link: https://lore.kernel.org/20250304103724.174ecb36@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 4ff26fa94895..c203aa4225b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -24063,6 +24063,7 @@ F: fs/tracefs/ F: include/linux/trace*.h F: include/trace/ F: kernel/trace/ +F: kernel/tracepoint.c F: scripts/tracing/ F: tools/testing/selftests/ftrace/ From a926d15a799acc6935820340b5a1428754f8ab45 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Fri, 7 Mar 2025 10:39:41 -0500 Subject: [PATCH 08/18] scripts/tracing: Remove scripts/tracing/draw_functrace.py The draw_functrace.py hasn't worked in years. There's better ways to accomplish the same thing (via libtracefs). Remove it. Link: https://lore.kernel.org/linux-trace-kernel/20250210-debuginfo-v1-1-368feb58292a@purestorage.com/ Cc: Masami Hiramatsu Cc: Mark Rutland Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Uday Shankar Cc: Masahiro Yamada Link: https://lore.kernel.org/20250307103941.070654e7@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- scripts/tracing/draw_functrace.py | 129 ------------------------------ 1 file changed, 129 deletions(-) delete mode 100755 scripts/tracing/draw_functrace.py diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py deleted file mode 100755 index 42fa87300941..000000000000 --- a/scripts/tracing/draw_functrace.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python -# SPDX-License-Identifier: GPL-2.0-only - -""" -Copyright 2008 (c) Frederic Weisbecker - -This script parses a trace provided by the function tracer in -kernel/trace/trace_functions.c -The resulted trace is processed into a tree to produce a more human -view of the call stack by drawing textual but hierarchical tree of -calls. Only the functions's names and the call time are provided. - -Usage: - Be sure that you have CONFIG_FUNCTION_TRACER - # mount -t tracefs nodev /sys/kernel/tracing - # echo function > /sys/kernel/tracing/current_tracer - $ cat /sys/kernel/tracing/trace_pipe > ~/raw_trace_func - Wait some times but not too much, the script is a bit slow. - Break the pipe (Ctrl + Z) - $ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace - Then you have your drawn trace in draw_functrace -""" - - -import sys, re - -class CallTree: - """ This class provides a tree representation of the functions - call stack. If a function has no parent in the kernel (interrupt, - syscall, kernel thread...) then it is attached to a virtual parent - called ROOT. - """ - ROOT = None - - def __init__(self, func, time = None, parent = None): - self._func = func - self._time = time - if parent is None: - self._parent = CallTree.ROOT - else: - self._parent = parent - self._children = [] - - def calls(self, func, calltime): - """ If a function calls another one, call this method to insert it - into the tree at the appropriate place. - @return: A reference to the newly created child node. - """ - child = CallTree(func, calltime, self) - self._children.append(child) - return child - - def getParent(self, func): - """ Retrieve the last parent of the current node that - has the name given by func. If this function is not - on a parent, then create it as new child of root - @return: A reference to the parent. - """ - tree = self - while tree != CallTree.ROOT and tree._func != func: - tree = tree._parent - if tree == CallTree.ROOT: - child = CallTree.ROOT.calls(func, None) - return child - return tree - - def __repr__(self): - return self.__toString("", True) - - def __toString(self, branch, lastChild): - if self._time is not None: - s = "%s----%s (%s)\n" % (branch, self._func, self._time) - else: - s = "%s----%s\n" % (branch, self._func) - - i = 0 - if lastChild: - branch = branch[:-1] + " " - while i < len(self._children): - if i != len(self._children) - 1: - s += "%s" % self._children[i].__toString(branch +\ - " |", False) - else: - s += "%s" % self._children[i].__toString(branch +\ - " |", True) - i += 1 - return s - -class BrokenLineException(Exception): - """If the last line is not complete because of the pipe breakage, - we want to stop the processing and ignore this line. - """ - pass - -class CommentLineException(Exception): - """ If the line is a comment (as in the beginning of the trace file), - just ignore it. - """ - pass - - -def parseLine(line): - line = line.strip() - if line.startswith("#"): - raise CommentLineException - m = re.match("[^]]+?\\] +([a-z.]+) +([0-9.]+): (\\w+) <-(\\w+)", line) - if m is None: - raise BrokenLineException - return (m.group(2), m.group(3), m.group(4)) - - -def main(): - CallTree.ROOT = CallTree("Root (Nowhere)", None, None) - tree = CallTree.ROOT - - for line in sys.stdin: - try: - calltime, callee, caller = parseLine(line) - except BrokenLineException: - break - except CommentLineException: - continue - tree = tree.getParent(caller) - tree = tree.calls(callee, calltime) - - print(CallTree.ROOT) - -if __name__ == "__main__": - main() From 502d2e71a89fa706843fa9277f4d6de1947072c8 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 9 Mar 2025 12:56:06 +0100 Subject: [PATCH 09/18] tracing: Constify struct event_trigger_ops 'event_trigger_ops mwifiex_if_ops' are not modified in these drivers. Constifying these structures moves some data to a read-only section, so increase overall security, especially when the structure holds some function pointers. On a x86_64, with allmodconfig, as an example: Before: ====== text data bss dec hex filename 31368 9024 6200 46592 b600 kernel/trace/trace_events_trigger.o After: ===== text data bss dec hex filename 31752 8608 6200 46560 b5e0 kernel/trace/trace_events_trigger.o Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/66e8f990e649678e4be37d4d1a19158ca0dea2f4.1741521295.git.christophe.jaillet@wanadoo.fr Signed-off-by: Christophe JAILLET Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.h | 4 +-- kernel/trace/trace_eprobe.c | 6 ++--- kernel/trace/trace_events_hist.c | 20 +++++++-------- kernel/trace/trace_events_trigger.c | 38 ++++++++++++++--------------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 9c21ba45b7af..880cf9abc055 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1714,7 +1714,7 @@ struct event_trigger_data { unsigned long count; int ref; int flags; - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; struct event_command *cmd_ops; struct event_filter __rcu *filter; char *filter_str; @@ -1959,7 +1959,7 @@ struct event_command { int (*set_filter)(char *filter_str, struct event_trigger_data *data, struct trace_event_file *file); - struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); + const struct event_trigger_ops *(*get_trigger_ops)(char *cmd, char *param); }; /** diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 82fd637cfc19..c1cc2fe54887 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -478,7 +478,7 @@ static void eprobe_trigger_func(struct event_trigger_data *data, __eprobe_trace_func(edata, rec); } -static struct event_trigger_ops eprobe_trigger_ops = { +static const struct event_trigger_ops eprobe_trigger_ops = { .trigger = eprobe_trigger_func, .print = eprobe_trigger_print, .init = eprobe_trigger_init, @@ -507,8 +507,8 @@ static void eprobe_trigger_unreg_func(char *glob, } -static struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd, - char *param) +static const struct event_trigger_ops *eprobe_trigger_get_ops(char *cmd, + char *param) { return &eprobe_trigger_ops; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 261163b00137..d3c4880fa8b1 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -6191,7 +6191,7 @@ static void event_hist_trigger_free(struct event_trigger_data *data) } } -static struct event_trigger_ops event_hist_trigger_ops = { +static const struct event_trigger_ops event_hist_trigger_ops = { .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_init, @@ -6223,15 +6223,15 @@ static void event_hist_trigger_named_free(struct event_trigger_data *data) } } -static struct event_trigger_ops event_hist_trigger_named_ops = { +static const struct event_trigger_ops event_hist_trigger_named_ops = { .trigger = event_hist_trigger, .print = event_hist_trigger_print, .init = event_hist_trigger_named_init, .free = event_hist_trigger_named_free, }; -static struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd, - char *param) +static const struct event_trigger_ops *event_hist_get_trigger_ops(char *cmd, + char *param) { return &event_hist_trigger_ops; } @@ -6826,38 +6826,38 @@ hist_enable_count_trigger(struct event_trigger_data *data, hist_enable_trigger(data, buffer, rec, event); } -static struct event_trigger_ops hist_enable_trigger_ops = { +static const struct event_trigger_ops hist_enable_trigger_ops = { .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops hist_enable_count_trigger_ops = { +static const struct event_trigger_ops hist_enable_count_trigger_ops = { .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops hist_disable_trigger_ops = { +static const struct event_trigger_ops hist_disable_trigger_ops = { .trigger = hist_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops hist_disable_count_trigger_ops = { +static const struct event_trigger_ops hist_disable_count_trigger_ops = { .trigger = hist_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * hist_enable_get_trigger_ops(char *cmd, char *param) { - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; bool enable; enable = (strcmp(cmd, ENABLE_HIST_STR) == 0); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index d45448947094..b66b6d235d91 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -825,7 +825,7 @@ struct event_trigger_data *event_trigger_alloc(struct event_command *cmd_ops, void *private_data) { struct event_trigger_data *trigger_data; - struct event_trigger_ops *trigger_ops; + const struct event_trigger_ops *trigger_ops; trigger_ops = cmd_ops->get_trigger_ops(cmd, param); @@ -1367,38 +1367,38 @@ traceoff_trigger_print(struct seq_file *m, struct event_trigger_data *data) data->filter_str); } -static struct event_trigger_ops traceon_trigger_ops = { +static const struct event_trigger_ops traceon_trigger_ops = { .trigger = traceon_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops traceon_count_trigger_ops = { +static const struct event_trigger_ops traceon_count_trigger_ops = { .trigger = traceon_count_trigger, .print = traceon_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops traceoff_trigger_ops = { +static const struct event_trigger_ops traceoff_trigger_ops = { .trigger = traceoff_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops traceoff_count_trigger_ops = { +static const struct event_trigger_ops traceoff_count_trigger_ops = { .trigger = traceoff_count_trigger, .print = traceoff_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * onoff_get_trigger_ops(char *cmd, char *param) { - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; /* we register both traceon and traceoff to this callback */ if (strcmp(cmd, "traceon") == 0) @@ -1491,21 +1491,21 @@ snapshot_trigger_print(struct seq_file *m, struct event_trigger_data *data) data->filter_str); } -static struct event_trigger_ops snapshot_trigger_ops = { +static const struct event_trigger_ops snapshot_trigger_ops = { .trigger = snapshot_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops snapshot_count_trigger_ops = { +static const struct event_trigger_ops snapshot_count_trigger_ops = { .trigger = snapshot_count_trigger, .print = snapshot_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * snapshot_get_trigger_ops(char *cmd, char *param) { return param ? &snapshot_count_trigger_ops : &snapshot_trigger_ops; @@ -1586,21 +1586,21 @@ stacktrace_trigger_print(struct seq_file *m, struct event_trigger_data *data) data->filter_str); } -static struct event_trigger_ops stacktrace_trigger_ops = { +static const struct event_trigger_ops stacktrace_trigger_ops = { .trigger = stacktrace_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops stacktrace_count_trigger_ops = { +static const struct event_trigger_ops stacktrace_count_trigger_ops = { .trigger = stacktrace_count_trigger, .print = stacktrace_trigger_print, .init = event_trigger_init, .free = event_trigger_free, }; -static struct event_trigger_ops * +static const struct event_trigger_ops * stacktrace_get_trigger_ops(char *cmd, char *param) { return param ? &stacktrace_count_trigger_ops : &stacktrace_trigger_ops; @@ -1711,28 +1711,28 @@ void event_enable_trigger_free(struct event_trigger_data *data) } } -static struct event_trigger_ops event_enable_trigger_ops = { +static const struct event_trigger_ops event_enable_trigger_ops = { .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops event_enable_count_trigger_ops = { +static const struct event_trigger_ops event_enable_count_trigger_ops = { .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops event_disable_trigger_ops = { +static const struct event_trigger_ops event_disable_trigger_ops = { .trigger = event_enable_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, .free = event_enable_trigger_free, }; -static struct event_trigger_ops event_disable_count_trigger_ops = { +static const struct event_trigger_ops event_disable_count_trigger_ops = { .trigger = event_enable_count_trigger, .print = event_enable_trigger_print, .init = event_trigger_init, @@ -1916,10 +1916,10 @@ void event_enable_unregister_trigger(char *glob, data->ops->free(data); } -static struct event_trigger_ops * +static const struct event_trigger_ops * event_enable_get_trigger_ops(char *cmd, char *param) { - struct event_trigger_ops *ops; + const struct event_trigger_ops *ops; bool enable; #ifdef CONFIG_HIST_TRIGGERS From 30c94bbceeda7c1312e568ae17b5244a0148cb2c Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 7 Mar 2025 11:38:58 +0800 Subject: [PATCH 10/18] tracepoint: Print the function symbol when tracepoint_debug is set When tracepoint_debug is set, we may get the output in kernel log: [ 380.013843] Probe 0 : 00000000f0d68cda It is not readable, so change to print the function symbol. After this patch, the output may becomes: [ 55.225555] Probe 0 : perf_trace_sched_wakeup_template+0x0/0x20 Link: https://lore.kernel.org/20250307033858.4134-1-shijie@os.amperecomputing.com Signed-off-by: Huang Shijie Signed-off-by: Steven Rostedt (Google) --- kernel/tracepoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 1848ce7e2976..62719d2941c9 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -127,7 +127,7 @@ static void debug_print_probes(struct tracepoint_func *funcs) return; for (i = 0; funcs[i].func; i++) - printk(KERN_DEBUG "Probe %d : %p\n", i, funcs[i].func); + printk(KERN_DEBUG "Probe %d : %pSb\n", i, funcs[i].func); } static struct tracepoint_func * From c1657640a8b3f7023675511d4857aff4f32f3a51 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Fri, 14 Mar 2025 15:13:11 +0100 Subject: [PATCH 11/18] tracing: gfp: vsprintf: Do not print "none" when using %pGg printf format The commit ca29a0bf122145 ("tracing: gfp: Remove duplication of recording GFP flags") caused the following regression in printf_test selftest: [ 46.208199] test_printf: kvasprintf(..., "%pGg", ...) returned 'none|0xfc000000', expected '0xfc000000' [ 46.208209] test_printf: kvasprintf(..., "%pGg", ...) returned '__GFP_HIGH|none|0xfc000000', expected '__GFP_HIGH|0xfc000000' The problem is the new '{ 0, "none" }' entry in __def_gfpflag_names macro and the following code: char *format_flags(char *buf, char *end, unsigned long flags, const struct trace_print_flags *names) { [...] if ((flags & mask) != mask) continue; [...] } The purpose of the code is to print the name of a mask instead of bits, for example, printk "GFP_ZONEMASK", instead of "__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE". Unfortunately, the mask "0" pass this check and "none" is always printed. A solution would be to move TRACE_GFP_FLAGS up so that it is not the last entry. But it breaks the rule that named masks must be defined before names of single bytes. Otherwise, it would print the names of the bytes instead of the mask. Instead, replace '{ 0, "none" }' with '{ 0, NULL }'. It works because __def_gfpflag_names defines a standalone array and this is the standard trailing entry. The code processing these arrays always ends the cycle when flag->name == NULL. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Andrew Morton Cc: Michael Petlan Cc: Veronika Molnarova Cc: Suren Baghdasaryan Cc: Rasmus Villemoes Cc: Andy Shevchenko Cc: Tamir Duberstein Cc: Linus Torvalds Link: https://lore.kernel.org/Z9Q5d11ZbA3CNMZm@pathway.suse.cz Fixes: ca29a0bf122145 ("tracing: gfp: Remove duplication of recording GFP flags") Signed-off-by: Petr Mladek Signed-off-by: Steven Rostedt (Google) --- include/trace/events/mmflags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index 82371177ef79..15aae955a10b 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -101,7 +101,7 @@ TRACE_DEFINE_ENUM(___GFP_LAST_BIT); gfpflag_string(GFP_DMA32), \ gfpflag_string(__GFP_RECLAIM), \ TRACE_GFP_FLAGS \ - { 0, "none" } + { 0, NULL } #define show_gfp_flags(flags) \ (flags) ? __print_flags(flags, "|", __def_gfpflag_names \ From 81c7a515b0f1e1254edd8adcfa1b780c31833565 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Wed, 19 Mar 2025 21:50:27 +0000 Subject: [PATCH 12/18] tracing: Align synth event print fmt The vast majority of ftrace event print fmt consist of a space-separated field=value pair. Synthetic event currently use a comma-separated field=value pair, which sticks out from events created via more classical means. Align the format of synth events so they look just like any other event, for better consistency and less headache when doing crude text-based data processing. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Link: https://lore.kernel.org/20250319215028.1680278-1-douglas.raillard@arm.com Signed-off-by: Douglas Raillard Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index e3f7d09e5512..07ff8be8267e 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -612,7 +612,7 @@ static int __set_synth_event_print_fmt(struct synth_event *event, fmt = synth_field_fmt(event->fields[i]->type); pos += snprintf(buf + pos, LEN_OR_ZERO, "%s=%s%s", event->fields[i]->name, fmt, - i == event->n_fields - 1 ? "" : ", "); + i == event->n_fields - 1 ? "" : " "); } pos += snprintf(buf + pos, LEN_OR_ZERO, "\""); From 7e6b3fcc9c5294aeafed0dbe1a09a1bc899bd0f2 Mon Sep 17 00:00:00 2001 From: Ran Xiaokai Date: Fri, 21 Mar 2025 09:52:49 +0000 Subject: [PATCH 13/18] tracing/osnoise: Fix possible recursive locking for cpus_read_lock() Lockdep reports this deadlock log: osnoise: could not start sampling thread ============================================ WARNING: possible recursive locking detected -------------------------------------------- CPU0 ---- lock(cpu_hotplug_lock); lock(cpu_hotplug_lock); Call Trace: print_deadlock_bug+0x282/0x3c0 __lock_acquire+0x1610/0x29a0 lock_acquire+0xcb/0x2d0 cpus_read_lock+0x49/0x120 stop_per_cpu_kthreads+0x7/0x60 start_kthread+0x103/0x120 osnoise_hotplug_workfn+0x5e/0x90 process_one_work+0x44f/0xb30 worker_thread+0x33e/0x5e0 kthread+0x206/0x3b0 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x11/0x20 This is the deadlock scenario: osnoise_hotplug_workfn() guard(cpus_read_lock)(); // first lock call start_kthread(cpu) if (IS_ERR(kthread)) { stop_per_cpu_kthreads(); { cpus_read_lock(); // second lock call. Cause the AA deadlock } } It is not necessary to call stop_per_cpu_kthreads() which stops osnoise kthread for every other CPUs in the system if a failure occurs during hotplug of a certain CPU. For start_per_cpu_kthreads(), if the start_kthread() call fails, this function calls stop_per_cpu_kthreads() to handle the error. Therefore, similarly, there is no need to call stop_per_cpu_kthreads() again within start_kthread(). So just remove stop_per_cpu_kthreads() from start_kthread to solve this issue. Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250321095249.2739397-1-ranxiaokai627@163.com Fixes: c8895e271f79 ("trace/osnoise: Support hotplug operations") Signed-off-by: Ran Xiaokai Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_osnoise.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index f3a2722ee4c0..c83a51218ee5 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -2032,7 +2032,6 @@ static int start_kthread(unsigned int cpu) if (IS_ERR(kthread)) { pr_err(BANNER "could not start sampling thread\n"); - stop_per_cpu_kthreads(); return -ENOMEM; } From 0c588ac0ca6c22b774d9ad4a6594681fdfa57d9d Mon Sep 17 00:00:00 2001 From: Gabriele Paoloni Date: Fri, 21 Mar 2025 18:08:21 +0100 Subject: [PATCH 14/18] tracing: fix return value in __ftrace_event_enable_disable for TRACE_REG_UNREGISTER When __ftrace_event_enable_disable invokes the class callback to unregister the event, the return value is not reported up to the caller, hence leading to event unregister failures being silently ignored. This patch assigns the ret variable to the invocation of the event unregister callback, so that its return value is stored and reported to the caller, and it raises a warning in case of error. Link: https://lore.kernel.org/20250321170821.101403-1-gpaoloni@redhat.com Signed-off-by: Gabriele Paoloni Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 513de9ceb80e..8e7603acca21 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -790,7 +790,9 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file, clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags); } - call->class->reg(call, TRACE_REG_UNREGISTER, file); + ret = call->class->reg(call, TRACE_REG_UNREGISTER, file); + + WARN_ON_ONCE(ret); } /* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */ if (file->flags & EVENT_FILE_FL_SOFT_MODE) From 21581dd4e7ff6c07d0ab577e3c32b13a74b31522 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 18 Mar 2025 18:09:05 +0000 Subject: [PATCH 15/18] tracing: Ensure module defining synth event cannot be unloaded while tracing Currently, using synth_event_delete() will fail if the event is being used (tracing in progress), but that is normally done in the module exit function. At that stage, failing is problematic as returning a non-zero status means the module will become locked (impossible to unload or reload again). Instead, ensure the module exit function does not get called in the first place by increasing the module refcnt when the event is enabled. Cc: stable@vger.kernel.org Cc: Mathieu Desnoyers Fixes: 35ca5207c2d11 ("tracing: Add synthetic event command generation functions") Link: https://lore.kernel.org/20250318180906.226841-1-douglas.raillard@arm.com Signed-off-by: Douglas Raillard Acked-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 07ff8be8267e..463b0073629a 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -852,6 +852,34 @@ static struct trace_event_fields synth_event_fields_array[] = { {} }; +static int synth_event_reg(struct trace_event_call *call, + enum trace_reg type, void *data) +{ + struct synth_event *event = container_of(call, struct synth_event, call); + + switch (type) { + case TRACE_REG_REGISTER: + case TRACE_REG_PERF_REGISTER: + if (!try_module_get(event->mod)) + return -EBUSY; + break; + default: + break; + } + + int ret = trace_event_reg(call, type, data); + + switch (type) { + case TRACE_REG_UNREGISTER: + case TRACE_REG_PERF_UNREGISTER: + module_put(event->mod); + break; + default: + break; + } + return ret; +} + static int register_synth_event(struct synth_event *event) { struct trace_event_call *call = &event->call; @@ -881,7 +909,7 @@ static int register_synth_event(struct synth_event *event) goto out; } call->flags = TRACE_EVENT_FL_TRACEPOINT; - call->class->reg = trace_event_reg; + call->class->reg = synth_event_reg; call->class->probe = trace_event_raw_event_synth; call->data = event; call->tp = event->tp; From 8eb1518642738c6892bd629b46043513a3bf1a6a Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Sun, 23 Mar 2025 15:21:51 -0400 Subject: [PATCH 16/18] tracing: Do not use PERF enums when perf is not defined An update was made to up the module ref count when a synthetic event is registered for both trace and perf events. But if perf is not configured in, the perf enums used will cause the kernel to fail to build. Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Cc: Douglas Raillard Link: https://lore.kernel.org/20250323152151.528b5ced@batman.local.home Fixes: 21581dd4e7ff ("tracing: Ensure module defining synth event cannot be unloaded while tracing") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202503232230.TeREVy8R-lkp@intel.com/ Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 463b0073629a..a5c5f34c207a 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -858,8 +858,10 @@ static int synth_event_reg(struct trace_event_call *call, struct synth_event *event = container_of(call, struct synth_event, call); switch (type) { - case TRACE_REG_REGISTER: +#ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: +#endif + case TRACE_REG_REGISTER: if (!try_module_get(event->mod)) return -EBUSY; break; @@ -870,8 +872,10 @@ static int synth_event_reg(struct trace_event_call *call, int ret = trace_event_reg(call, type, data); switch (type) { - case TRACE_REG_UNREGISTER: +#ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_UNREGISTER: +#endif + case TRACE_REG_UNREGISTER: module_put(event->mod); break; default: From 4d38328eb442dc06aec4350fd9594ffa6488af02 Mon Sep 17 00:00:00 2001 From: Douglas Raillard Date: Tue, 25 Mar 2025 16:52:02 +0000 Subject: [PATCH 17/18] tracing: Fix synth event printk format for str fields The printk format for synth event uses "%.*s" to print string fields, but then only passes the pointer part as var arg. Replace %.*s with %s as the C string is guaranteed to be null-terminated. The output in print fmt should never have been updated as __get_str() handles the string limit because it can access the length of the string in the string meta data that is saved in the ring buffer. Cc: stable@vger.kernel.org Cc: Masami Hiramatsu Cc: Mathieu Desnoyers Fixes: 8db4d6bfbbf92 ("tracing: Change synthetic event string format to limit printed length") Link: https://lore.kernel.org/20250325165202.541088-1-douglas.raillard@arm.com Signed-off-by: Douglas Raillard Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index a5c5f34c207a..6d592cbc38e4 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -305,7 +305,7 @@ static const char *synth_field_fmt(char *type) else if (strcmp(type, "gfp_t") == 0) fmt = "%x"; else if (synth_field_is_string(type)) - fmt = "%.*s"; + fmt = "%s"; else if (synth_field_is_stack(type)) fmt = "%s"; From e0344f9564f5847dc20e245fbea67a4b262ee659 Mon Sep 17 00:00:00 2001 From: Siddarth G Date: Tue, 25 Mar 2025 23:42:32 +0530 Subject: [PATCH 18/18] tracing: Replace strncpy with memcpy for fixed-length substring copy checkpatch.pl reports the following warning: WARNING: Prefer strscpy, strscpy_pad, or __nonstring over strncpy (see: https://github.com/KSPP/linux/issues/90) In synth_field_string_size(), replace strncpy() with memcpy() to copy 'len' characters from 'start' to 'buf'. The code manually adds a NUL terminator after the copy, making memcpy safe here. Link: https://lore.kernel.org/20250325181232.38284-1-siddarthsgml@gmail.com Signed-off-by: Siddarth G Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_synth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c index 6d592cbc38e4..969f48742d72 100644 --- a/kernel/trace/trace_events_synth.c +++ b/kernel/trace/trace_events_synth.c @@ -207,7 +207,7 @@ static int synth_field_string_size(char *type) if (len == 0) return 0; /* variable-length string */ - strncpy(buf, start, len); + memcpy(buf, start, len); buf[len] = '\0'; err = kstrtouint(buf, 0, &size);