From 94c255ac676fa922df3498608ead42b0a0c85122 Mon Sep 17 00:00:00 2001 From: Xiang wangx Date: Mon, 6 Jun 2022 10:30:07 +0800 Subject: [PATCH 01/50] tracing/user_events: Fix syntax errors in comments Delete the redundant word 'have'. Link: https://lkml.kernel.org/r/20220606023007.23377-1-wangxiang@cdjrlc.com Signed-off-by: Xiang wangx Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 706e1686b5eb9..a6621c52ce454 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -567,7 +567,7 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) * to allow user_event files to be less locked down. The extreme case * being "other" has read/write access to user_events_data/status. * - * When not locked down, processes may not have have permissions to + * When not locked down, processes may not have permissions to * add/remove calls themselves to tracefs. We need to temporarily * switch to root file permission to allow for this scenario. */ From fb991f1942334b0cbf6aa6a88faa586ba22d3550 Mon Sep 17 00:00:00 2001 From: Zheng Yejian Date: Thu, 30 Jun 2022 09:31:52 +0800 Subject: [PATCH 02/50] tracing/histograms: Simplify create_hist_fields() When I look into implements of create_hist_fields(), I think there can be following two simplifications: 1. If something wrong happened in parse_var_defs(), free_var_defs() would have been called in it, so no need goto free again after calling it; 2. After calling create_key_fields(), regardless of the value of 'ret', it then always runs into 'out: ', so the judge of 'ret' is redundant. Link: https://lkml.kernel.org/r/20220630013152.164871-1-zhengyejian1@huawei.com Signed-off-by: Zheng Yejian Reviewed-by: Tom Rix Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_events_hist.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index e87a46794079d..fdf784620c283 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -4455,7 +4455,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data, ret = parse_var_defs(hist_data); if (ret) - goto out; + return ret; ret = create_val_fields(hist_data, file); if (ret) @@ -4466,8 +4466,7 @@ static int create_hist_fields(struct hist_trigger_data *hist_data, goto out; ret = create_key_fields(hist_data, file); - if (ret) - goto out; + out: free_var_defs(hist_data); From 2a04b8d846dca196342862caecadd2a78460d8dc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Jul 2022 18:58:20 -0400 Subject: [PATCH 03/50] tracing: devlink: Use static array for string in devlink_trap_report event The trace event devlink_trap_report uses the __dynamic_array() macro to determine the size of the input_dev_name field. This is because it needs to test the dev field for NULL, and will use "NULL" if it is. But it also has the size of the dynamic array as a fixed IFNAMSIZ bytes. This defeats the purpose of the dynamic array, as this will reserve that amount of bytes on the ring buffer, and to make matters worse, it will even save that size in the event as the event expects it to be dynamic (for which it is not). Since IFNAMSIZ is just 16 bytes, just make it a static array and this will remove the meta data from the event that records the size. Link: https://lkml.kernel.org/r/20220712185820.002d9fb5@gandalf.local.home Cc: Leon Romanovsky Cc: Jiri Pirko Cc: "David S. Miller" Cc: Eric Dumazet Cc: Paolo Abeni Cc: netdev@vger.kernel.org Reviewed-by: Ido Schimmel Tested-by: Ido Schimmel Acked-by: Jakub Kicinski Signed-off-by: Steven Rostedt (Google) --- include/trace/events/devlink.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/include/trace/events/devlink.h b/include/trace/events/devlink.h index 2814f188d98c3..24969184c5348 100644 --- a/include/trace/events/devlink.h +++ b/include/trace/events/devlink.h @@ -186,7 +186,7 @@ TRACE_EVENT(devlink_trap_report, __string(driver_name, devlink_to_dev(devlink)->driver->name) __string(trap_name, metadata->trap_name) __string(trap_group_name, metadata->trap_group_name) - __dynamic_array(char, input_dev_name, IFNAMSIZ) + __array(char, input_dev_name, IFNAMSIZ) ), TP_fast_assign( @@ -197,15 +197,14 @@ TRACE_EVENT(devlink_trap_report, __assign_str(driver_name, devlink_to_dev(devlink)->driver->name); __assign_str(trap_name, metadata->trap_name); __assign_str(trap_group_name, metadata->trap_group_name); - __assign_str(input_dev_name, - (input_dev ? input_dev->name : "NULL")); + strscpy(__entry->input_dev_name, input_dev ? input_dev->name : "NULL", IFNAMSIZ); ), TP_printk("bus_name=%s dev_name=%s driver_name=%s trap_name=%s " "trap_group_name=%s input_dev_name=%s", __get_str(bus_name), __get_str(dev_name), __get_str(driver_name), __get_str(trap_name), __get_str(trap_group_name), - __get_str(input_dev_name)) + __entry->input_dev_name) ); #endif /* _TRACE_DEVLINK_H */ From fca8300f68fe3fbb2fcda862f0f114011715b3bc Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Mon, 4 Jul 2022 09:14:36 -0400 Subject: [PATCH 04/50] tracing/ipv4/ipv6: Use static array for name field in fib*_lookup_table event The fib_lookup_table and fib6_lookup_table events declare name as a dynamic_array, but also give it a fixed size, which defeats the purpose of the dynamic array, especially since the dynamic array also includes meta data in the event to specify its size. Since the size of the name is at most 16 bytes (defined by IFNAMSIZ), it is not worth spending the effort to determine the size of the string. Just use a fixed size array and copy into it. This will save 4 bytes that are used for the meta data that saves the size and position of a dynamic array, and even slightly speed up the event processing. Link: https://lkml.kernel.org/r/20220704091436.3705edbf@rorschach.local.home Cc: David S. Miller Cc: netdev@vger.kernel.org Acked-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: Steven Rostedt (Google) --- include/trace/events/fib.h | 6 +++--- include/trace/events/fib6.h | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 6f2a4dc35e37e..c2300c407f583 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -32,7 +32,7 @@ TRACE_EVENT(fib_table_lookup, __array( __u8, gw6, 16 ) __field( u16, sport ) __field( u16, dport ) - __dynamic_array(char, name, IFNAMSIZ ) + __array(char, name, IFNAMSIZ ) ), TP_fast_assign( @@ -66,7 +66,7 @@ TRACE_EVENT(fib_table_lookup, } dev = nhc ? nhc->nhc_dev : NULL; - __assign_str(name, dev ? dev->name : "-"); + strlcpy(__entry->name, dev ? dev->name : "-", IFNAMSIZ); if (nhc) { if (nhc->nhc_gw_family == AF_INET) { @@ -95,7 +95,7 @@ TRACE_EVENT(fib_table_lookup, __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, - __get_str(name), __entry->gw4, __entry->gw6, __entry->err) + __entry->name, __entry->gw4, __entry->gw6, __entry->err) ); #endif /* _TRACE_FIB_H */ diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h index c6abdcc77c127..6e821eb794503 100644 --- a/include/trace/events/fib6.h +++ b/include/trace/events/fib6.h @@ -31,7 +31,7 @@ TRACE_EVENT(fib6_table_lookup, __field( u16, dport ) __field( u8, proto ) __field( u8, rt_type ) - __dynamic_array( char, name, IFNAMSIZ ) + __array( char, name, IFNAMSIZ ) __array( __u8, gw, 16 ) ), @@ -63,9 +63,9 @@ TRACE_EVENT(fib6_table_lookup, } if (res->nh && res->nh->fib_nh_dev) { - __assign_str(name, res->nh->fib_nh_dev); + strlcpy(__entry->name, res->nh->fib_nh_dev->name, IFNAMSIZ); } else { - __assign_str(name, "-"); + strcpy(__entry->name, "-"); } if (res->f6i == net->ipv6.fib6_null_entry) { struct in6_addr in6_zero = {}; @@ -83,7 +83,7 @@ TRACE_EVENT(fib6_table_lookup, __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, - __get_str(name), __entry->gw, __entry->err) + __entry->name, __entry->gw, __entry->err) ); #endif /* _TRACE_FIB6_H */ From 43b2aef3735e0ddb4fc4e6401cd5dc1bf205dead Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:37:41 -0400 Subject: [PATCH 05/50] neighbor: tracing: Have neigh_create event use __string() The dev field of the neigh_create event uses __dynamic_array() with a fixed size, which defeats the purpose of __dynamic_array(). Looking at the logic, as it already uses __assign_str(), just use the same logic in __string to create the size needed. It appears that because "dev" can be NULL, it needs the check. But __string() can have the same checks as __assign_str() so use them there too. Link: https://lkml.kernel.org/r/20220705183741.35387e3f@rorschach.local.home Cc: David Ahern Cc: David S. Miller Cc: netdev@vger.kernel.org Acked-by: Jakub Kicinski Reviewed-by: David Ahern Signed-off-by: Steven Rostedt (Google) --- include/trace/events/neigh.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h index 62bb17516713f..5eaa1fa991715 100644 --- a/include/trace/events/neigh.h +++ b/include/trace/events/neigh.h @@ -30,7 +30,7 @@ TRACE_EVENT(neigh_create, TP_STRUCT__entry( __field(u32, family) - __dynamic_array(char, dev, IFNAMSIZ ) + __string(dev, dev ? dev->name : "NULL") __field(int, entries) __field(u8, created) __field(u8, gc_exempt) From 0563231f93c6d1f582b168a47753b345c1e20d81 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:54 -0400 Subject: [PATCH 06/50] tracing/events: Add __vstring() and __assign_vstr() helper macros There's several places that open code the following logic: TP_STRUCT__entry(__dynamic_array(char, msg, MSG_MAX)), TP_fast_assign(vsnprintf(__get_str(msg), MSG_MAX, vaf->fmt, *vaf->va);) To load a string created by variable array va_list. The main issue with this approach is that "MSG_MAX" usage in the __dynamic_array() portion. That actually just reserves the MSG_MAX in the event, and even wastes space because there's dynamic meta data also saved in the event to denote the offset and size of the dynamic array. It would have been better to just use a static __array() field. Instead, create __vstring() and __assign_vstr() that work like __string and __assign_str() but instead of taking a destination string to copy, take a format string and a va_list pointer and fill in the values. It uses the helper: #define __trace_event_vstr_len(fmt, va) \ ({ \ va_list __ap; \ int __ret; \ \ va_copy(__ap, *(va)); \ __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ va_end(__ap); \ \ min(__ret, TRACE_EVENT_STR_MAX); \ }) To figure out the length to store the string. It may be slightly slower as it needs to run the vsnprintf() twice, but it now saves space on the ring buffer. Link: https://lkml.kernel.org/r/20220705224749.053570613@goodmis.org Cc: Dennis Dalessandro Cc: Ingo Molnar Cc: Andrew Morton Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: Arend van Spriel Cc: Franky Lin Cc: Hante Meuleman Cc: Gregory Greenman Cc: Peter Chen Cc: Greg Kroah-Hartman Cc: Mathias Nyman Cc: Chunfeng Yun Cc: Bin Liu Cc: Marek Lindner Cc: Simon Wunderlich Cc: Antonio Quartulli Cc: Sven Eckelmann Cc: Johannes Berg Cc: Jim Cromie Signed-off-by: Steven Rostedt (Google) --- include/linux/trace_events.h | 18 ++++++++++++++++++ include/trace/stages/stage1_struct_define.h | 3 +++ include/trace/stages/stage2_data_offsets.h | 3 +++ include/trace/stages/stage4_event_fields.h | 3 +++ include/trace/stages/stage5_get_offsets.h | 4 ++++ include/trace/stages/stage6_event_callback.h | 7 +++++++ 6 files changed, 38 insertions(+) diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index e6e95a9f07a52..b18759a673c66 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, #endif +#define TRACE_EVENT_STR_MAX 512 + +/* + * gcc warns that you can not use a va_list in an inlined + * function. But lets me make it into a macro :-/ + */ +#define __trace_event_vstr_len(fmt, va) \ +({ \ + va_list __ap; \ + int __ret; \ + \ + va_copy(__ap, *(va)); \ + __ret = vsnprintf(NULL, 0, fmt, __ap) + 1; \ + va_end(__ap); \ + \ + min(__ret, TRACE_EVENT_STR_MAX); \ +}) + #endif /* _LINUX_TRACE_EVENT_H */ /* diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h index a16783419687e..1b7bab60434c1 100644 --- a/include/trace/stages/stage1_struct_define.h +++ b/include/trace/stages/stage1_struct_define.h @@ -26,6 +26,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1) diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h index 42fd1e8813ecf..1b7a8f764fddd 100644 --- a/include/trace/stages/stage2_data_offsets.h +++ b/include/trace/stages/stage2_data_offsets.h @@ -32,6 +32,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index e80cdc397a436..c3790ec7a4530 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -38,6 +38,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h index 7ee5931300e6d..fba4c24ed9e60 100644 --- a/include/trace/stages/stage5_get_offsets.h +++ b/include/trace/stages/stage5_get_offsets.h @@ -39,6 +39,10 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, \ + __trace_event_vstr_len(fmt, ap)) + #undef __rel_dynamic_array #define __rel_dynamic_array(type, item, len) \ __item_length = (len) * sizeof(type); \ diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index e1724f73594be..0f51f6b3ab70c 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -24,6 +24,9 @@ #undef __string_len #define __string_len(item, src, len) __dynamic_array(char, item, -1) +#undef __vstring +#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1) + #undef __assign_str #define __assign_str(dst, src) \ strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)"); @@ -35,6 +38,10 @@ __get_str(dst)[len] = '\0'; \ } while(0) +#undef __assign_vstr +#define __assign_vstr(dst, fmt, va) \ + vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, *(va)) + #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) From 8d7f5df0fb4eaaef85c6b80caeafd5bd544159a5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:55 -0400 Subject: [PATCH 07/50] tracing/IB/hfi1: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224749.239494531@goodmis.org Cc: Dennis Dalessandro Cc: Ingo Molnar Cc: Andrew Morton Cc: Jason Gunthorpe Cc: Leon Romanovsky Cc: linux-rdma@vger.kernel.org Signed-off-by: Steven Rostedt (Google) --- drivers/infiniband/hw/hfi1/trace_dbg.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/infiniband/hw/hfi1/trace_dbg.h b/drivers/infiniband/hw/hfi1/trace_dbg.h index 707f1053f0b70..582b6f68df3dd 100644 --- a/drivers/infiniband/hw/hfi1/trace_dbg.h +++ b/drivers/infiniband/hw/hfi1/trace_dbg.h @@ -26,14 +26,10 @@ DECLARE_EVENT_CLASS(hfi1_trace_template, TP_PROTO(const char *function, struct va_format *vaf), TP_ARGS(function, vaf), TP_STRUCT__entry(__string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign(__assign_str(function, function); - WARN_ON_ONCE(vsnprintf - (__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= - MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("(%s) %s", __get_str(function), From c01406f8972154be6236d89f7f7f056ee289b9cf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:56 -0400 Subject: [PATCH 08/50] tracing/ath: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224749.430339634@goodmis.org Cc: Kalle Valo Cc: Ingo Molnar Cc: Andrew Morton Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: ath10k@lists.infradead.org Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Cc: ath11k@lists.infradead.org Acked-by: Kalle Valo Signed-off-by: Steven Rostedt (Google) --- drivers/net/wireless/ath/ath10k/trace.h | 14 ++++---------- drivers/net/wireless/ath/ath11k/trace.h | 7 ++----- drivers/net/wireless/ath/ath6kl/trace.h | 14 ++++---------- drivers/net/wireless/ath/trace.h | 7 ++----- drivers/net/wireless/ath/wil6210/trace.h | 7 ++----- 5 files changed, 14 insertions(+), 35 deletions(-) diff --git a/drivers/net/wireless/ath/ath10k/trace.h b/drivers/net/wireless/ath/ath10k/trace.h index 4714c86bb5016..64e7a767d9634 100644 --- a/drivers/net/wireless/ath/ath10k/trace.h +++ b/drivers/net/wireless/ath/ath10k/trace.h @@ -52,15 +52,12 @@ DECLARE_EVENT_CLASS(ath10k_log_event, TP_STRUCT__entry( __string(device, dev_name(ar->dev)) __string(driver, dev_driver_string(ar->dev)) - __dynamic_array(char, msg, ATH10K_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, dev_name(ar->dev)); __assign_str(driver, dev_driver_string(ar->dev)); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH10K_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH10K_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( "%s %s %s", @@ -92,16 +89,13 @@ TRACE_EVENT(ath10k_log_dbg, __string(device, dev_name(ar->dev)) __string(driver, dev_driver_string(ar->dev)) __field(unsigned int, level) - __dynamic_array(char, msg, ATH10K_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, dev_name(ar->dev)); __assign_str(driver, dev_driver_string(ar->dev)); __entry->level = level; - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH10K_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH10K_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( "%s %s %s", diff --git a/drivers/net/wireless/ath/ath11k/trace.h b/drivers/net/wireless/ath/ath11k/trace.h index a02e54735e889..76560587bea06 100644 --- a/drivers/net/wireless/ath/ath11k/trace.h +++ b/drivers/net/wireless/ath/ath11k/trace.h @@ -126,15 +126,12 @@ DECLARE_EVENT_CLASS(ath11k_log_event, TP_STRUCT__entry( __string(device, dev_name(ab->dev)) __string(driver, dev_driver_string(ab->dev)) - __dynamic_array(char, msg, ATH11K_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, dev_name(ab->dev)); __assign_str(driver, dev_driver_string(ab->dev)); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH11K_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH11K_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( "%s %s %s", diff --git a/drivers/net/wireless/ath/ath6kl/trace.h b/drivers/net/wireless/ath/ath6kl/trace.h index a3d3740419eb7..231a94769ddb9 100644 --- a/drivers/net/wireless/ath/ath6kl/trace.h +++ b/drivers/net/wireless/ath/ath6kl/trace.h @@ -253,13 +253,10 @@ DECLARE_EVENT_CLASS(ath6kl_log_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, ATH6KL_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH6KL_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH6KL_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); @@ -284,14 +281,11 @@ TRACE_EVENT(ath6kl_log_dbg, TP_ARGS(level, vaf), TP_STRUCT__entry( __field(unsigned int, level) - __dynamic_array(char, msg, ATH6KL_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH6KL_MSG_MAX, - vaf->fmt, - *vaf->va) >= ATH6KL_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); diff --git a/drivers/net/wireless/ath/trace.h b/drivers/net/wireless/ath/trace.h index ba711644d27ee..9935cf475b6d3 100644 --- a/drivers/net/wireless/ath/trace.h +++ b/drivers/net/wireless/ath/trace.h @@ -40,16 +40,13 @@ TRACE_EVENT(ath_log, TP_STRUCT__entry( __string(device, wiphy_name(wiphy)) __string(driver, KBUILD_MODNAME) - __dynamic_array(char, msg, ATH_DBG_MAX_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, wiphy_name(wiphy)); __assign_str(driver, KBUILD_MODNAME); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - ATH_DBG_MAX_LEN, - vaf->fmt, - *vaf->va) >= ATH_DBG_MAX_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( diff --git a/drivers/net/wireless/ath/wil6210/trace.h b/drivers/net/wireless/ath/wil6210/trace.h index 11c989e958809..201f44612c310 100644 --- a/drivers/net/wireless/ath/wil6210/trace.h +++ b/drivers/net/wireless/ath/wil6210/trace.h @@ -70,13 +70,10 @@ DECLARE_EVENT_CLASS(wil6210_log_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, WIL6210_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - WIL6210_MSG_MAX, - vaf->fmt, - *vaf->va) >= WIL6210_MSG_MAX); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); From b6d18ab34220565177d1f2efb84ff40abb195457 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:57 -0400 Subject: [PATCH 09/50] tracing/brcm: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224749.622796175@goodmis.org Cc: Arend van Spriel Cc: Ingo Molnar Cc: Andrew Morton Cc: Franky Lin Cc: Hante Meuleman Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-wireless@vger.kernel.org Cc: brcm80211-dev-list.pdl@broadcom.com Cc: SHA-cyfmac-dev-list@infineon.com Cc: netdev@vger.kernel.org Acked-by: Kalle Valo Signed-off-by: Steven Rostedt (Google) --- .../broadcom/brcm80211/brcmfmac/tracepoint.h | 12 ++++-------- .../brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h | 12 ++++-------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h index 338c66d0c5f83..5a139d7ed47aa 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.h @@ -33,13 +33,11 @@ TRACE_EVENT(brcmf_err, TP_ARGS(func, vaf), TP_STRUCT__entry( __string(func, func) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(func, func); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(func), __get_str(msg)) ); @@ -50,14 +48,12 @@ TRACE_EVENT(brcmf_dbg, TP_STRUCT__entry( __field(u32, level) __string(func, func) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; __assign_str(func, func); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(func), __get_str(msg)) ); diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h index 0e8a69ab909f2..4884564203530 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/brcms_trace_brcmsmac_msg.h @@ -28,12 +28,10 @@ DECLARE_EVENT_CLASS(brcms_msg_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); @@ -64,14 +62,12 @@ TRACE_EVENT(brcms_dbg, TP_STRUCT__entry( __field(u32, level) __string(func, func) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; __assign_str(func, func); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(func), __get_str(msg)) ); From c7c37bb87590886e08d24dec53089f74b89f5fbb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:58 -0400 Subject: [PATCH 10/50] tracing/iwlwifi: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224749.806599472@goodmis.org Cc: Gregory Greenman Cc: Ingo Molnar Cc: Andrew Morton Cc: Kalle Valo Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Acked-by: Kalle Valo Signed-off-by: Steven Rostedt (Google) --- .../net/wireless/intel/iwlwifi/iwl-devtrace-msg.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h index 7dd70011fd1ef..1d6c292cf5456 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace-msg.h @@ -18,12 +18,10 @@ DECLARE_EVENT_CLASS(iwlwifi_msg_event, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); @@ -55,14 +53,12 @@ TRACE_EVENT(iwlwifi_dbg, TP_STRUCT__entry( __field(u32, level) __string(function, function) - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __entry->level = level; __assign_str(function, function); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); From 1b756b372fbf6ae5bcbbb4a5b7aa271483902fa5 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:44:59 -0400 Subject: [PATCH 11/50] usb: chipidea: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224749.991587733@goodmis.org Cc: Peter Chen Cc: Ingo Molnar Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Steven Rostedt (Google) --- drivers/usb/chipidea/trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/chipidea/trace.h b/drivers/usb/chipidea/trace.h index 1601fd86c4c10..ca0e65b48f0ae 100644 --- a/drivers/usb/chipidea/trace.h +++ b/drivers/usb/chipidea/trace.h @@ -28,11 +28,11 @@ TRACE_EVENT(ci_log, TP_ARGS(ci, vaf), TP_STRUCT__entry( __string(name, dev_name(ci->dev)) - __dynamic_array(char, msg, CHIPIDEA_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(name, dev_name(ci->dev)); - vsnprintf(__get_str(msg), CHIPIDEA_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(name), __get_str(msg)) ); From 0ba4c9dede106c3e7ad2bb7e23eaa1393adc43a0 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:45:00 -0400 Subject: [PATCH 12/50] xhci: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224750.172301548@goodmis.org Cc: Mathias Nyman Cc: Ingo Molnar Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Steven Rostedt (Google) --- drivers/usb/host/xhci-trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h index a5da020772977..61e93a3540a7c 100644 --- a/drivers/usb/host/xhci-trace.h +++ b/drivers/usb/host/xhci-trace.h @@ -28,9 +28,9 @@ DECLARE_EVENT_CLASS(xhci_log_msg, TP_PROTO(struct va_format *vaf), TP_ARGS(vaf), - TP_STRUCT__entry(__dynamic_array(char, msg, XHCI_MSG_MAX)), + TP_STRUCT__entry(__vstring(msg, vaf->fmt, vaf->va)), TP_fast_assign( - vsnprintf(__get_str(msg), XHCI_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) ); From 84149fc768bc82922ef72e56c0eef9512417980c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:45:02 -0400 Subject: [PATCH 13/50] usb: musb: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224750.532345354@goodmis.org Cc: Bin Liu Cc: Ingo Molnar Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: linux-usb@vger.kernel.org Signed-off-by: Steven Rostedt (Google) --- drivers/usb/musb/musb_trace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/musb/musb_trace.h b/drivers/usb/musb/musb_trace.h index ec28b57167961..f246b14394c4a 100644 --- a/drivers/usb/musb/musb_trace.h +++ b/drivers/usb/musb/musb_trace.h @@ -28,11 +28,11 @@ TRACE_EVENT(musb_log, TP_ARGS(musb, vaf), TP_STRUCT__entry( __string(name, dev_name(musb->controller)) - __dynamic_array(char, msg, MUSB_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(name, dev_name(musb->controller)); - vsnprintf(__get_str(msg), MUSB_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(name), __get_str(msg)) ); From 5409b8053511f9a32ee08c3d16827632a5b17e3f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:45:03 -0400 Subject: [PATCH 14/50] scsi: iscsi: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224750.715763972@goodmis.org Cc: Fred Herard Cc: Ingo Molnar Cc: Andrew Morton Cc: Martin K. Petersen Signed-off-by: Steven Rostedt (Google) --- include/trace/events/iscsi.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/iscsi.h b/include/trace/events/iscsi.h index 87408faf6e4ea..8ff2a3ca5d75e 100644 --- a/include/trace/events/iscsi.h +++ b/include/trace/events/iscsi.h @@ -26,12 +26,12 @@ DECLARE_EVENT_CLASS(iscsi_log_msg, TP_STRUCT__entry( __string(dname, dev_name(dev) ) - __dynamic_array(char, msg, ISCSI_MSG_MAX ) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(dname, dev_name(dev)); - vsnprintf(__get_str(msg), ISCSI_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s",__get_str(dname), __get_str(msg) From 74003fc4ae7619ac5a7bec8748a14f3a8655f3ea Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:45:04 -0400 Subject: [PATCH 15/50] scsi: qla2xxx: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224750.896553364@goodmis.org Cc: Bart Van Assche Cc: Ingo Molnar Cc: Andrew Morton Cc: Martin K. Petersen Signed-off-by: Steven Rostedt (Google) --- include/trace/events/qla.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/trace/events/qla.h b/include/trace/events/qla.h index 5857cf682ee74..e7fd55e7dc3d5 100644 --- a/include/trace/events/qla.h +++ b/include/trace/events/qla.h @@ -22,11 +22,11 @@ DECLARE_EVENT_CLASS(qla_log_event, TP_STRUCT__entry( __string(buf, buf) - __dynamic_array(char, msg, QLA_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(buf, buf); - vsnprintf(__get_str(msg), QLA_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s %s", __get_str(buf), __get_str(msg)) From ded4a2f1ae608772dd95e819f04bbaba1f0e78b1 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 5 Jul 2022 18:45:06 -0400 Subject: [PATCH 16/50] mac80211: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220705224751.271015450@goodmis.org Cc: Johannes Berg Cc: Ingo Molnar Cc: Andrew Morton Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: linux-wireless@vger.kernel.org Cc: netdev@vger.kernel.org Signed-off-by: Steven Rostedt (Google) --- net/mac80211/trace_msg.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 40141df09f255..c9dbe9aab7bdc 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -24,13 +24,11 @@ DECLARE_EVENT_CLASS(mac80211_msg_event, TP_ARGS(vaf), TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s", __get_str(msg)) From b774926c733850037b15c50f893383aa71bd8695 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 27 Jun 2022 10:19:05 +0800 Subject: [PATCH 17/50] tracing: eprobe: Add missing log index Add trace_probe_log_set_index(1) to allow report correct error if user input wrong SYSTEM.EVENT format. Link: https://lore.kernel.org/all/1656296348-16111-2-git-send-email-quic_linyyuan@quicinc.com/ Reviewed-by: Tom Zanussi Signed-off-by: Linyu Yuan Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_eprobe.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 7d4478525c669..b805b570305fc 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -881,6 +881,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (!is_good_name(event) || !is_good_name(group)) goto parse_error; + trace_probe_log_set_index(1); sys_event = argv[1]; ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, sys_event - argv[1]); From f360ea5641dc9473ad485e882c8ac3b1aa2672ff Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 27 Jun 2022 10:19:06 +0800 Subject: [PATCH 18/50] tracing: eprobe: Remove duplicate is_good_name() operation traceprobe_parse_event_name() already validate SYSTEM and EVENT name, there is no need to call is_good_name() after it. Link: https://lore.kernel.org/all/1656296348-16111-3-git-send-email-quic_linyyuan@quicinc.com/ Acked-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Signed-off-by: Linyu Yuan Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_eprobe.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index b805b570305fc..8979cb9ec37a5 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -887,8 +887,6 @@ static int __trace_eprobe_create(int argc, const char *argv[]) sys_event - argv[1]); if (ret || !sys_name) goto parse_error; - if (!is_good_name(sys_event) || !is_good_name(sys_name)) - goto parse_error; mutex_lock(&event_mutex); event_call = find_and_get_event(sys_name, sys_event); From 95c104c378dc7d4cb3fb9f289dc5354bfc285fe0 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 27 Jun 2022 10:19:07 +0800 Subject: [PATCH 19/50] tracing: Auto generate event name when creating a group of events Currently when creating a specific group of trace events, take kprobe event as example, the user must use the following format: p:GRP/EVENT [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS], which means user must enter EVENT name, one example is: echo 'p:usb_gadget/config_usb_cfg_link config_usb_cfg_link $arg1' >> kprobe_events It is not simple if there are too many entries because the event name is the same as symbol name. This change allows user to specify no EVENT name, format changed as: p:GRP/ [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] It will generate event name automatically and one example is: echo 'p:usb_gadget/ config_usb_cfg_link $arg1' >> kprobe_events. Link: https://lore.kernel.org/all/1656296348-16111-4-git-send-email-quic_linyyuan@quicinc.com/ Acked-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Signed-off-by: Linyu Yuan Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/kprobetrace.rst | 8 ++++---- Documentation/trace/uprobetracer.rst | 8 ++++---- kernel/trace/trace.c | 8 ++++---- kernel/trace/trace_dynevent.c | 2 +- kernel/trace/trace_eprobe.c | 25 +++++++++++++------------ kernel/trace/trace_kprobe.c | 16 ++++++++++------ kernel/trace/trace_probe.c | 4 ++++ kernel/trace/trace_uprobe.c | 12 ++++++++---- 8 files changed, 48 insertions(+), 35 deletions(-) diff --git a/Documentation/trace/kprobetrace.rst b/Documentation/trace/kprobetrace.rst index b175d88f31ebb..4274cc6a2f94f 100644 --- a/Documentation/trace/kprobetrace.rst +++ b/Documentation/trace/kprobetrace.rst @@ -28,10 +28,10 @@ Synopsis of kprobe_events ------------------------- :: - p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe - r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe - p:[GRP/]EVENT] [MOD:]SYM[+0]%return [FETCHARGS] : Set a return probe - -:[GRP/]EVENT : Clear a probe + p[:[GRP/][EVENT]] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe + r[MAXACTIVE][:[GRP/][EVENT]] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe + p[:[GRP/][EVENT]] [MOD:]SYM[+0]%return [FETCHARGS] : Set a return probe + -:[GRP/][EVENT] : Clear a probe GRP : Group name. If omitted, use "kprobes" for it. EVENT : Event name. If omitted, the event name is generated diff --git a/Documentation/trace/uprobetracer.rst b/Documentation/trace/uprobetracer.rst index a8e5938f609e2..3a1797d707f4c 100644 --- a/Documentation/trace/uprobetracer.rst +++ b/Documentation/trace/uprobetracer.rst @@ -26,10 +26,10 @@ Synopsis of uprobe_tracer ------------------------- :: - p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a uprobe - r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) - p[:[GRP/]EVENT] PATH:OFFSET%return [FETCHARGS] : Set a return uprobe (uretprobe) - -:[GRP/]EVENT : Clear uprobe or uretprobe event + p[:[GRP/][EVENT]] PATH:OFFSET [FETCHARGS] : Set a uprobe + r[:[GRP/][EVENT]] PATH:OFFSET [FETCHARGS] : Set a return uprobe (uretprobe) + p[:[GRP/][EVENT]] PATH:OFFSET%return [FETCHARGS] : Set a return uprobe (uretprobe) + -:[GRP/][EVENT] : Clear uprobe or uretprobe event GRP : Group name. If omitted, "uprobes" is the default value. EVENT : Event name. If omitted, the event name is generated based diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index b8dd546270750..7eb5bce625006 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -5569,13 +5569,13 @@ static const char readme_msg[] = #endif #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) "\t accepts: event-definitions (one definition per line)\n" - "\t Format: p[:[/]] []\n" - "\t r[maxactive][:[/]] []\n" + "\t Format: p[:[/][]] []\n" + "\t r[maxactive][:[/][]] []\n" #ifdef CONFIG_HIST_TRIGGERS "\t s:[synthetic/] []\n" #endif - "\t e[:[/]] . []\n" - "\t -:[/]\n" + "\t e[:[/][]] . []\n" + "\t -:[/][]\n" #ifdef CONFIG_KPROBE_EVENTS "\t place: [:][+]|\n" "place (kretprobe): [:][+]%return|\n" diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index 076b447a1b889..154996684fb54 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -101,7 +101,7 @@ int dyn_event_release(const char *raw_command, struct dyn_event_operations *type event = p + 1; *p = '\0'; } - if (event[0] == '\0') { + if (!system && event[0] == '\0') { ret = -EINVAL; goto out; } diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index 8979cb9ec37a5..a30f21499e812 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -125,6 +125,7 @@ static bool eprobe_dyn_event_match(const char *system, const char *event, * We match the following: * event only - match all eprobes with event name * system and event only - match all system/event probes + * system only - match all system probes * * The below has the above satisfied with more arguments: * @@ -143,7 +144,7 @@ static bool eprobe_dyn_event_match(const char *system, const char *event, return false; /* Must match the event name */ - if (strcmp(trace_probe_name(&ep->tp), event) != 0) + if (event[0] != '\0' && strcmp(trace_probe_name(&ep->tp), event) != 0) return false; /* No arguments match all */ @@ -848,7 +849,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) { /* * Argument syntax: - * e[:[GRP/]ENAME] SYSTEM.EVENT [FETCHARGS] + * e[:[GRP/][ENAME]] SYSTEM.EVENT [FETCHARGS] * Fetch args: * =$[:TYPE] */ @@ -858,6 +859,7 @@ static int __trace_eprobe_create(int argc, const char *argv[]) struct trace_eprobe *ep = NULL; char buf1[MAX_EVENT_NAME_LEN]; char buf2[MAX_EVENT_NAME_LEN]; + char gbuf[MAX_EVENT_NAME_LEN]; int ret = 0; int i; @@ -869,25 +871,24 @@ static int __trace_eprobe_create(int argc, const char *argv[]) event = strchr(&argv[0][1], ':'); if (event) { event++; - ret = traceprobe_parse_event_name(&event, &group, buf1, + ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) goto parse_error; - } else { - strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN); - sanitize_event_name(buf1); - event = buf1; } - if (!is_good_name(event) || !is_good_name(group)) - goto parse_error; trace_probe_log_set_index(1); sys_event = argv[1]; - ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, - sys_event - argv[1]); - if (ret || !sys_name) + ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0); + if (!sys_event || !sys_name) goto parse_error; + if (!event) { + strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN); + sanitize_event_name(buf1); + event = buf1; + } + mutex_lock(&event_mutex); event_call = find_and_get_event(sys_name, sys_event); ep = alloc_event_probe(group, event, event_call, argc - 2); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index a245ea673715d..23f7f0ec4f4cf 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -163,7 +163,8 @@ static bool trace_kprobe_match(const char *system, const char *event, { struct trace_kprobe *tk = to_trace_kprobe(ev); - return strcmp(trace_probe_name(&tk->tp), event) == 0 && + return (event[0] == '\0' || + strcmp(trace_probe_name(&tk->tp), event) == 0) && (!system || strcmp(trace_probe_group_name(&tk->tp), system) == 0) && trace_kprobe_match_command_head(tk, argc, argv); } @@ -708,11 +709,11 @@ static int __trace_kprobe_create(int argc, const char *argv[]) /* * Argument syntax: * - Add kprobe: - * p[:[GRP/]EVENT] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] + * p[:[GRP/][EVENT]] [MOD:]KSYM[+OFFS]|KADDR [FETCHARGS] * - Add kretprobe: - * r[MAXACTIVE][:[GRP/]EVENT] [MOD:]KSYM[+0] [FETCHARGS] + * r[MAXACTIVE][:[GRP/][EVENT]] [MOD:]KSYM[+0] [FETCHARGS] * Or - * p:[GRP/]EVENT] [MOD:]KSYM[+0]%return [FETCHARGS] + * p[:[GRP/][EVENT]] [MOD:]KSYM[+0]%return [FETCHARGS] * * Fetch args: * $retval : fetch return value @@ -739,6 +740,7 @@ static int __trace_kprobe_create(int argc, const char *argv[]) long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; + char gbuf[MAX_EVENT_NAME_LEN]; unsigned int flags = TPARG_FL_KERNEL; switch (argv[0][0]) { @@ -833,11 +835,13 @@ static int __trace_kprobe_create(int argc, const char *argv[]) trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf, + ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) goto parse_error; - } else { + } + + if (!event) { /* Make a new event name */ if (symbol) snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_%ld", diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c index 80863c6508e5e..850a88abd33ba 100644 --- a/kernel/trace/trace_probe.c +++ b/kernel/trace/trace_probe.c @@ -257,6 +257,10 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup, } len = strlen(event); if (len == 0) { + if (slash) { + *pevent = NULL; + return 0; + } trace_probe_log_err(offset, NO_EVENT_NAME); return -EINVAL; } else if (len > MAX_EVENT_NAME_LEN) { diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index c3dc4f859a6bc..a3fec28961d62 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -312,7 +312,8 @@ static bool trace_uprobe_match(const char *system, const char *event, { struct trace_uprobe *tu = to_trace_uprobe(ev); - return strcmp(trace_probe_name(&tu->tp), event) == 0 && + return (event[0] == '\0' || + strcmp(trace_probe_name(&tu->tp), event) == 0) && (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) && trace_uprobe_match_command_head(tu, argc, argv); } @@ -532,7 +533,7 @@ static int register_trace_uprobe(struct trace_uprobe *tu) /* * Argument syntax: - * - Add uprobe: p|r[:[GRP/]EVENT] PATH:OFFSET[%return][(REF)] [FETCHARGS] + * - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ static int __trace_uprobe_create(int argc, const char **argv) { @@ -540,6 +541,7 @@ static int __trace_uprobe_create(int argc, const char **argv) const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; char *arg, *filename, *rctr, *rctr_end, *tmp; char buf[MAX_EVENT_NAME_LEN]; + char gbuf[MAX_EVENT_NAME_LEN]; enum probe_print_type ptype; struct path path; unsigned long offset, ref_ctr_offset; @@ -644,11 +646,13 @@ static int __trace_uprobe_create(int argc, const char **argv) /* setup a probe */ trace_probe_log_set_index(0); if (event) { - ret = traceprobe_parse_event_name(&event, &group, buf, + ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) goto fail_address_parse; - } else { + } + + if (!event) { char *tail; char *ptr; From 5db19792f0660ad1ece247829bddd24bb2f8db25 Mon Sep 17 00:00:00 2001 From: Linyu Yuan Date: Mon, 27 Jun 2022 10:19:08 +0800 Subject: [PATCH 20/50] selftests/ftrace: Add test case for GRP/ only input Add kprobe and eprobe event test for new GRP/ only format. Link: https://lore.kernel.org/all/1656296348-16111-5-git-send-email-quic_linyyuan@quicinc.com/ Acked-by: Masami Hiramatsu (Google) Reviewed-by: Tom Zanussi Signed-off-by: Linyu Yuan Signed-off-by: Steven Rostedt (Google) --- .../ftrace/test.d/dynevent/add_remove_eprobe.tc | 9 ++++++++- .../ftrace/test.d/dynevent/add_remove_kprobe.tc | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc index 60c02b482be83..c300eb0202620 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Generic dynamic event - add/remove eprobe events -# requires: dynamic_events events/syscalls/sys_enter_openat "e[:[/]] . []":README +# requires: dynamic_events events/syscalls/sys_enter_openat ". []":README echo 0 > events/enable @@ -87,4 +87,11 @@ echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events ! grep -q "$EPROBE" dynamic_events ! test -d events/eprobes/$EPROBE +if grep -q "e\[:\[/]\[]]" README; then + echo "e:mygroup/ $SYSTEM/$EVENT $OPTIONS" >> dynamic_events + test -d events/mygroup + echo "-:mygroup/" >> dynamic_events + ! test -d events/mygroup +fi + clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index b4da41d126d58..13d43f40a6fc6 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -23,4 +23,11 @@ grep -q myevent1 dynamic_events echo > dynamic_events +if grep -q "p\[:\[/]\[]]" README; then + echo "p:mygroup/ $PLACE" >> dynamic_events + test -d events/mygroup + echo "-:mygroup/" >> dynamic_events + ! test -d events/mygroup +fi + clear_trace From f5eab65ff2b76449286d18efc7fee3e0b72f7d9b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 12 Jul 2022 16:17:07 -0400 Subject: [PATCH 21/50] selftests/kprobe: Do not test for GRP/ without event failures A new feature is added where kprobes (and other probes) do not need to explicitly state the event name when creating a probe. The event name will come from what is being attached. That is: # echo 'p:foo/ vfs_read' > kprobe_events Will no longer error, but instead create an event: # cat kprobe_events p:foo/p_vfs_read_0 vfs_read This should not be tested as an error case anymore. Remove it from the selftest as now this feature "breaks" the selftest as it no longer fails as expected. Link: https://lore.kernel.org/all/1656296348-16111-1-git-send-email-quic_linyyuan@quicinc.com/ Link: https://lkml.kernel.org/r/20220712161707.6dc08a14@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index fa928b431555c..7c02509c71d0a 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -21,7 +21,6 @@ check_error 'p:^/bar vfs_read' # NO_GROUP_NAME check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME -check_error 'p:foo/^ vfs_read' # NO_EVENT_NAME check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME From fea6ac554d9dea849e2517284b17f99fb9be423a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Fri, 15 Jul 2022 17:55:55 -0400 Subject: [PATCH 22/50] tracing: Add example and documentation for new __vstring() macro Update the sample trace events to include an example that uses the new __vstring() helpers for TRACE_EVENTS. Link: https://lkml.kernel.org/r/20220715175555.16375a3b@gandalf.local.home Signed-off-by: Steven Rostedt (Google) --- samples/trace_events/trace-events-sample.c | 14 ++++++++-- samples/trace_events/trace-events-sample.h | 32 +++++++++++++++++++--- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c index 4d34dc0b0fee7..608c4ae3b08a3 100644 --- a/samples/trace_events/trace-events-sample.c +++ b/samples/trace_events/trace-events-sample.c @@ -19,9 +19,10 @@ static const char *random_strings[] = { "One ring to rule them all" }; -static void simple_thread_func(int cnt) +static void do_simple_thread_func(int cnt, const char *fmt, ...) { unsigned long bitmask[1] = {0xdeadbeefUL}; + va_list va; int array[6]; int len = cnt % 5; int i; @@ -33,9 +34,13 @@ static void simple_thread_func(int cnt) array[i] = i + 1; array[i] = 0; + va_start(va, fmt); + /* Silly tracepoints */ trace_foo_bar("hello", cnt, array, random_strings[len], - current->cpus_ptr); + current->cpus_ptr, fmt, &va); + + va_end(va); trace_foo_with_template_simple("HELLO", cnt); @@ -48,6 +53,11 @@ static void simple_thread_func(int cnt) trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask); } +static void simple_thread_func(int cnt) +{ + do_simple_thread_func(cnt, "iter=%d", cnt); +} + static int simple_thread(void *arg) { int cnt = 0; diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h index cbbbb83beced0..1a92226202fc5 100644 --- a/samples/trace_events/trace-events-sample.h +++ b/samples/trace_events/trace-events-sample.h @@ -141,6 +141,27 @@ * In most cases, the __assign_str() macro will take the same * parameters as the __string() macro had to declare the string. * + * __vstring: This is similar to __string() but instead of taking a + * dynamic length, it takes a variable list va_list 'va' variable. + * Some event callers already have a message from parameters saved + * in a va_list. Passing in the format and the va_list variable + * will save just enough on the ring buffer for that string. + * Note, the va variable used is a pointer to a va_list, not + * to the va_list directly. + * + * (va_list *va) + * + * __vstring(foo, fmt, va) is similar to: vsnprintf(foo, fmt, va) + * + * To assign the string, use the helper macro __assign_vstr(). + * + * __assign_vstr(foo, fmt, va); + * + * In most cases, the __assign_vstr() macro will take the same + * parameters as the __vstring() macro had to declare the string. + * Use __get_str() to retrieve the __vstring() just like it would for + * __string(). + * * __string_len: This is a helper to a __dynamic_array, but it understands * that the array has characters in it, and with the combined * use of __assign_str_len(), it will allocate 'len' + 1 bytes @@ -256,9 +277,10 @@ TRACE_DEFINE_ENUM(TRACE_SAMPLE_ZOO); TRACE_EVENT(foo_bar, TP_PROTO(const char *foo, int bar, const int *lst, - const char *string, const struct cpumask *mask), + const char *string, const struct cpumask *mask, + const char *fmt, va_list *va), - TP_ARGS(foo, bar, lst, string, mask), + TP_ARGS(foo, bar, lst, string, mask, fmt, va), TP_STRUCT__entry( __array( char, foo, 10 ) @@ -266,6 +288,7 @@ TRACE_EVENT(foo_bar, __dynamic_array(int, list, __length_of(lst)) __string( str, string ) __bitmask( cpus, num_possible_cpus() ) + __vstring( vstr, fmt, va ) ), TP_fast_assign( @@ -274,10 +297,11 @@ TRACE_EVENT(foo_bar, memcpy(__get_dynamic_array(list), lst, __length_of(lst) * sizeof(int)); __assign_str(str, string); + __assign_vstr(vstr, fmt, va); __assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus()); ), - TP_printk("foo %s %d %s %s %s %s (%s)", __entry->foo, __entry->bar, + TP_printk("foo %s %d %s %s %s %s (%s) %s", __entry->foo, __entry->bar, /* * Notice here the use of some helper functions. This includes: @@ -321,7 +345,7 @@ TRACE_EVENT(foo_bar, __print_array(__get_dynamic_array(list), __get_dynamic_array_len(list) / sizeof(int), sizeof(int)), - __get_str(str), __get_bitmask(cpus)) + __get_str(str), __get_bitmask(cpus), __get_str(vstr)) ); /* From f71f3ba9b42381c2e52c079a0104e11c30ca4cc2 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 18 Jul 2022 16:05:10 +0900 Subject: [PATCH 23/50] selftests/kprobe: Update test for no event name syntax error The commit 208003254c32 ("selftests/kprobe: Do not test for GRP/ without event failures") removed a syntax which is no more cause a syntax error (NO_EVENT_NAME error with GRP/). However, there are another case (NO_EVENT_NAME error without GRP/) which causes a same error. This adds a test for that case. Link: https://lkml.kernel.org/r/165812790993.1377963.9762767354560397298.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- .../selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 7c02509c71d0a..9e85d3019ff0c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -21,6 +21,7 @@ check_error 'p:^/bar vfs_read' # NO_GROUP_NAME check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME +check_error 'p:^ vfs_read' # NO_EVENT_NAME check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME From 730dbb8ddac6b92573658c3cb03555df1bdf95aa Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 19 Jul 2022 11:27:19 -0400 Subject: [PATCH 24/50] USB: mtu3: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220719112719.17e796c6@gandalf.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: linux-usb@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-mediatek@lists.infradead.org Acked-by: Greg Kroah-Hartman Tested-by: Chunfeng Yun Signed-off-by: Steven Rostedt (Google) --- drivers/usb/mtu3/mtu3_trace.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/usb/mtu3/mtu3_trace.h b/drivers/usb/mtu3/mtu3_trace.h index 1b897636daf24..a98fa012b7291 100644 --- a/drivers/usb/mtu3/mtu3_trace.h +++ b/drivers/usb/mtu3/mtu3_trace.h @@ -18,18 +18,16 @@ #include "mtu3.h" -#define MTU3_MSG_MAX 256 - TRACE_EVENT(mtu3_log, TP_PROTO(struct device *dev, struct va_format *vaf), TP_ARGS(dev, vaf), TP_STRUCT__entry( __string(name, dev_name(dev)) - __dynamic_array(char, msg, MTU3_MSG_MAX) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(name, dev_name(dev)); - vsnprintf(__get_str(msg), MTU3_MSG_MAX, vaf->fmt, *vaf->va); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk("%s: %s", __get_str(name), __get_str(msg)) ); From 9abc291812d784bd4a26c01af4ebdbf9f2dbf0bb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 24 Jul 2022 19:16:50 -0400 Subject: [PATCH 25/50] batman-adv: tracing: Use the new __vstring() helper Instead of open coding a __dynamic_array() with a fixed length (which defeats the purpose of the dynamic array in the first place). Use the new __vstring() helper that will use a va_list and only write enough of the string into the ring buffer that is needed. Link: https://lkml.kernel.org/r/20220724191650.236b1355@rorschach.local.home Cc: Marek Lindner Cc: Ingo Molnar Cc: Andrew Morton Cc: Simon Wunderlich Cc: Antonio Quartulli Cc: "David S. Miller" Cc: Eric Dumazet Cc: Jakub Kicinski Cc: Paolo Abeni Cc: b.a.t.m.a.n@lists.open-mesh.org Cc: netdev@vger.kernel.org Acked-by: Sven Eckelmann Signed-off-by: Steven Rostedt (Google) --- net/batman-adv/trace.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index d673ebdd04267..31c8f922651d5 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -28,8 +28,6 @@ #endif /* CONFIG_BATMAN_ADV_TRACING */ -#define BATADV_MAX_MSG_LEN 256 - TRACE_EVENT(batadv_dbg, TP_PROTO(struct batadv_priv *bat_priv, @@ -40,16 +38,13 @@ TRACE_EVENT(batadv_dbg, TP_STRUCT__entry( __string(device, bat_priv->soft_iface->name) __string(driver, KBUILD_MODNAME) - __dynamic_array(char, msg, BATADV_MAX_MSG_LEN) + __vstring(msg, vaf->fmt, vaf->va) ), TP_fast_assign( __assign_str(device, bat_priv->soft_iface->name); __assign_str(driver, KBUILD_MODNAME); - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - BATADV_MAX_MSG_LEN, - vaf->fmt, - *vaf->va) >= BATADV_MAX_MSG_LEN); + __assign_vstr(msg, vaf->fmt, vaf->va); ), TP_printk( From 3a2dcbaf4d31023106975d6ae75b6df080c454cb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 19 Jul 2022 18:20:04 -0400 Subject: [PATCH 26/50] tracing: Use a copy of the va_list for __assign_vstr() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If an instance of tracing enables the same trace event as another instance, or the top level instance, or even perf, then the va_list passed into some tracepoints can be used more than once. As va_list can only be traversed once, this can cause issues: # cat /sys/kernel/tracing/instances/qla2xxx/trace cat-56106 [012] ..... 2419873.470098: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14: Entered (null). cat-56106 [012] ..... 2419873.470101: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14: Entered ×+<96>²Ü<98>^H. cat-56106 [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14: Prepare to issue mbox cmd=0xde589000. # cat /sys/kernel/tracing/trace cat-56106 [012] ..... 2419873.470097: ql_dbg_log: qla2xxx [0000:05:00.0]-1054:14: Entered qla2x00_get_firmware_state. cat-56106 [012] ..... 2419873.470100: ql_dbg_log: qla2xxx [0000:05:00.0]-1000:14: Entered qla2x00_mailbox_command. cat-56106 [012] ..... 2419873.470102: ql_dbg_log: qla2xxx [0000:05:00.0]-1006:14: Prepare to issue mbox cmd=0x69. The instance version is corrupted because the top level instance iterated the va_list first. Use va_copy() in the __assign_vstr() macro to make sure that each trace event for each use case gets a fresh va_list. Link: https://lore.kernel.org/all/259d53a5-958e-6508-4e45-74dba2821242@marvell.com/ Link: https://lkml.kernel.org/r/20220719182004.21daa83e@gandalf.local.home Fixes: 0563231f93c6d ("tracing/events: Add __vstring() and __assign_vstr() helper macros") Reported-by: Arun Easi Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage6_event_callback.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h index 0f51f6b3ab70c..3c554a5853204 100644 --- a/include/trace/stages/stage6_event_callback.h +++ b/include/trace/stages/stage6_event_callback.h @@ -40,7 +40,12 @@ #undef __assign_vstr #define __assign_vstr(dst, fmt, va) \ - vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, *(va)) + do { \ + va_list __cp_va; \ + va_copy(__cp_va, *(va)); \ + vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, __cp_va); \ + va_end(__cp_va); \ + } while (0) #undef __bitmask #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1) From ac6c1b2ca77e722a1e5d651f12f437f2f237e658 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 26 Jul 2022 10:18:51 -0400 Subject: [PATCH 27/50] ftrace/x86: Add back ftrace_expected assignment When a ftrace_bug happens (where ftrace fails to modify a location) it is helpful to have what was at that location as well as what was expected to be there. But with the conversion to text_poke() the variable that assigns the expected for debugging was dropped. Unfortunately, I noticed this when I needed it. Add it back. Link: https://lkml.kernel.org/r/20220726101851.069d2e70@gandalf.local.home Cc: "x86@kernel.org" Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Andrew Morton Cc: stable@vger.kernel.org Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()") Signed-off-by: Steven Rostedt (Google) --- arch/x86/kernel/ftrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 5b4efc927d808..f5cdb5f675b31 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -91,6 +91,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code) /* Make sure it is what we expect it to be */ if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) { + ftrace_expected = old_code; WARN_ON(1); return -EINVAL; } From 102227b970a15256f5ffd12a6a276ddf978e6caf Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:40 +0200 Subject: [PATCH 28/50] rv: Add Runtime Verification (RV) interface RV is a lightweight (yet rigorous) method that complements classical exhaustive verification techniques (such as model checking and theorem proving) with a more practical approach to complex systems. RV works by analyzing the trace of the system's actual execution, comparing it against a formal specification of the system behavior. RV can give precise information on the runtime behavior of the monitored system while enabling the reaction for unexpected events, avoiding, for example, the propagation of a failure on safety-critical systems. The development of this interface roots in the development of the paper: De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo Silva. Efficient formal verification for the Linux kernel. In: International Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. p. 315-332. And: De Oliveira, Daniel Bristot. Automata-based formal analysis and verification of the real-time Linux kernel. PhD Thesis, 2020. The RV interface resembles the tracing/ interface on purpose. The current path for the RV interface is /sys/kernel/tracing/rv/. It presents these files: "available_monitors" - List the available monitors, one per line. For example: # cat available_monitors wip wwnr "enabled_monitors" - Lists the enabled monitors, one per line; - Writing to it enables a given monitor; - Writing a monitor name with a '!' prefix disables it; - Truncating the file disables all enabled monitors. For example: # cat enabled_monitors # echo wip > enabled_monitors # echo wwnr >> enabled_monitors # cat enabled_monitors wip wwnr # echo '!wip' >> enabled_monitors # cat enabled_monitors wwnr # echo > enabled_monitors # cat enabled_monitors # Note that more than one monitor can be enabled concurrently. "monitoring_on" - It is an on/off general switcher for monitoring. Note that it does not disable enabled monitors or detach events, but stop the per-entity monitors of monitoring the events received from the system. It resembles the "tracing_on" switcher. "monitors/" Each monitor will have its one directory inside "monitors/". There the monitor specific files will be presented. The "monitors/" directory resembles the "events" directory on tracefs. For example: # cd monitors/wip/ # ls desc enable # cat desc wakeup in preemptive per-cpu testing monitor. # cat enable 0 For further information, see the comments in the header of kernel/trace/rv/rv.c from this patch. Link: https://lkml.kernel.org/r/a4bfe038f50cb047bfb343ad0e12b0e646ab308b.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 43 +++ include/linux/sched.h | 11 + kernel/trace/Kconfig | 2 + kernel/trace/Makefile | 1 + kernel/trace/rv/Kconfig | 12 + kernel/trace/rv/Makefile | 3 + kernel/trace/rv/rv.c | 782 +++++++++++++++++++++++++++++++++++++++ kernel/trace/rv/rv.h | 33 ++ kernel/trace/trace.c | 2 + kernel/trace/trace.h | 9 + 10 files changed, 898 insertions(+) create mode 100644 include/linux/rv.h create mode 100644 kernel/trace/rv/Kconfig create mode 100644 kernel/trace/rv/Makefile create mode 100644 kernel/trace/rv/rv.c create mode 100644 kernel/trace/rv/rv.h diff --git a/include/linux/rv.h b/include/linux/rv.h new file mode 100644 index 0000000000000..d8fa9e8be94ac --- /dev/null +++ b/include/linux/rv.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Runtime Verification. + * + * For futher information, see: kernel/trace/rv/rv.c. + */ +#ifndef _LINUX_RV_H +#define _LINUX_RV_H + +#ifdef CONFIG_RV + +/* + * Per-task RV monitors count. Nowadays fixed in RV_PER_TASK_MONITORS. + * If we find justification for more monitors, we can think about + * adding more or developing a dynamic method. So far, none of + * these are justified. + */ +#define RV_PER_TASK_MONITORS 1 +#define RV_PER_TASK_MONITOR_INIT (RV_PER_TASK_MONITORS) + +/* + * Futher monitor types are expected, so make this a union. + */ +union rv_task_monitor { +}; + +struct rv_monitor { + const char *name; + const char *description; + bool enabled; + int (*enable)(void); + void (*disable)(void); + void (*reset)(void); +}; + +bool rv_monitoring_on(void); +int rv_unregister_monitor(struct rv_monitor *monitor); +int rv_register_monitor(struct rv_monitor *monitor); +int rv_get_task_monitor_slot(void); +void rv_put_task_monitor_slot(int slot); + +#endif /* CONFIG_RV */ +#endif /* _LINUX_RV_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index c46f3a63b758f..b5da3e7c3a04f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -34,6 +34,7 @@ #include #include #include +#include #include /* task_struct member predeclarations (sorted alphabetically): */ @@ -1500,6 +1501,16 @@ struct task_struct { struct callback_head l1d_flush_kill; #endif +#ifdef CONFIG_RV + /* + * Per-task RV monitor. Nowadays fixed in RV_PER_TASK_MONITORS. + * If we find justification for more monitors, we can think + * about adding more or developing a dynamic method. So far, + * none of these are justified. + */ + union rv_task_monitor rv[RV_PER_TASK_MONITORS]; +#endif + /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig index ccd6a5ade3e9f..1052126bdca22 100644 --- a/kernel/trace/Kconfig +++ b/kernel/trace/Kconfig @@ -1106,4 +1106,6 @@ config HIST_TRIGGERS_DEBUG If unsure, say N. +source "kernel/trace/rv/Kconfig" + endif # FTRACE diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 0d261774d6f38..c6651e16b5572 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -106,5 +106,6 @@ obj-$(CONFIG_FPROBE) += fprobe.o obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_TRACEPOINT_BENCHMARK) += trace_benchmark.o +obj-$(CONFIG_RV) += rv/ libftrace-y := ftrace.o diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig new file mode 100644 index 0000000000000..6d127cdb00dd4 --- /dev/null +++ b/kernel/trace/rv/Kconfig @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +menuconfig RV + bool "Runtime Verification" + depends on TRACING + help + Enable the kernel runtime verification infrastructure. RV is a + lightweight (yet rigorous) method that complements classical + exhaustive verification techniques (such as model checking and + theorem proving). RV works by analyzing the trace of the system's + actual execution, comparing it against a formal specification of + the system behavior. diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile new file mode 100644 index 0000000000000..fd995379df678 --- /dev/null +++ b/kernel/trace/rv/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_RV) += rv.o diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c new file mode 100644 index 0000000000000..731cc961cc70d --- /dev/null +++ b/kernel/trace/rv/rv.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira + * + * This is the online Runtime Verification (RV) interface. + * + * RV is a lightweight (yet rigorous) method that complements classical + * exhaustive verification techniques (such as model checking and + * theorem proving) with a more practical approach to complex systems. + * + * RV works by analyzing the trace of the system's actual execution, + * comparing it against a formal specification of the system behavior. + * RV can give precise information on the runtime behavior of the + * monitored system while enabling the reaction for unexpected + * events, avoiding, for example, the propagation of a failure on + * safety-critical systems. + * + * The development of this interface roots in the development of the + * paper: + * + * De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo + * Silva. Efficient formal verification for the Linux kernel. In: + * International Conference on Software Engineering and Formal Methods. + * Springer, Cham, 2019. p. 315-332. + * + * And: + * + * De Oliveira, Daniel Bristot, et al. Automata-based formal analysis + * and verification of the real-time Linux kernel. PhD Thesis, 2020. + * + * == Runtime monitor interface == + * + * A monitor is the central part of the runtime verification of a system. + * + * The monitor stands in between the formal specification of the desired + * (or undesired) behavior, and the trace of the actual system. + * + * In Linux terms, the runtime verification monitors are encapsulated + * inside the "RV monitor" abstraction. A RV monitor includes a reference + * model of the system, a set of instances of the monitor (per-cpu monitor, + * per-task monitor, and so on), and the helper functions that glue the + * monitor to the system via trace. Generally, a monitor includes some form + * of trace output as a reaction for event parsing and exceptions, + * as depicted bellow: + * + * Linux +----- RV Monitor ----------------------------------+ Formal + * Realm | | Realm + * +-------------------+ +----------------+ +-----------------+ + * | Linux kernel | | Monitor | | Reference | + * | Tracing | -> | Instance(s) | <- | Model | + * | (instrumentation) | | (verification) | | (specification) | + * +-------------------+ +----------------+ +-----------------+ + * | | | + * | V | + * | +----------+ | + * | | Reaction | | + * | +--+--+--+-+ | + * | | | | | + * | | | +-> trace output ? | + * +------------------------|--|----------------------+ + * | +----> panic ? + * +-------> + * + * This file implements the interface for loading RV monitors, and + * to control the verification session. + * + * == Registering monitors == + * + * The struct rv_monitor defines a set of callback functions to control + * a verification session. For instance, when a given monitor is enabled, + * the "enable" callback function is called to hook the instrumentation + * functions to the kernel trace events. The "disable" function is called + * when disabling the verification session. + * + * A RV monitor is registered via: + * int rv_register_monitor(struct rv_monitor *monitor); + * And unregistered via: + * int rv_unregister_monitor(struct rv_monitor *monitor); + * + * == User interface == + * + * The user interface resembles kernel tracing interface. It presents + * these files: + * + * "available_monitors" + * - List the available monitors, one per line. + * + * For example: + * # cat available_monitors + * wip + * wwnr + * + * "enabled_monitors" + * - Lists the enabled monitors, one per line; + * - Writing to it enables a given monitor; + * - Writing a monitor name with a '!' prefix disables it; + * - Truncating the file disables all enabled monitors. + * + * For example: + * # cat enabled_monitors + * # echo wip > enabled_monitors + * # echo wwnr >> enabled_monitors + * # cat enabled_monitors + * wip + * wwnr + * # echo '!wip' >> enabled_monitors + * # cat enabled_monitors + * wwnr + * # echo > enabled_monitors + * # cat enabled_monitors + * # + * + * Note that more than one monitor can be enabled concurrently. + * + * "monitoring_on" + * - It is an on/off general switcher for monitoring. Note + * that it does not disable enabled monitors or detach events, + * but stops the per-entity monitors from monitoring the events + * received from the instrumentation. It resembles the "tracing_on" + * switcher. + * + * "monitors/" + * Each monitor will have its own directory inside "monitors/". There + * the monitor specific files will be presented. + * The "monitors/" directory resembles the "events" directory on + * tracefs. + * + * For example: + * # cd monitors/wip/ + * # ls + * desc enable + * # cat desc + * auto-generated wakeup in preemptive monitor. + * # cat enable + * 0 + */ + +#include +#include +#include +#include + +#include "rv.h" + +DEFINE_MUTEX(rv_interface_lock); + +static struct rv_interface rv_root; + +struct dentry *get_monitors_root(void) +{ + return rv_root.monitors_dir; +} + +/* + * Interface for the monitor register. + */ +static LIST_HEAD(rv_monitors_list); + +static int task_monitor_count; +static bool task_monitor_slots[RV_PER_TASK_MONITORS]; + +int rv_get_task_monitor_slot(void) +{ + int i; + + lockdep_assert_held(&rv_interface_lock); + + if (task_monitor_count == RV_PER_TASK_MONITORS) + return -EBUSY; + + task_monitor_count++; + + for (i = 0; i < RV_PER_TASK_MONITORS; i++) { + if (task_monitor_slots[i] == false) { + task_monitor_slots[i] = true; + return i; + } + } + + WARN_ONCE(1, "RV task_monitor_count and slots are out of sync\n"); + + return -EINVAL; +} + +void rv_put_task_monitor_slot(int slot) +{ + lockdep_assert_held(&rv_interface_lock); + + if (slot < 0 || slot >= RV_PER_TASK_MONITORS) { + WARN_ONCE(1, "RV releasing an invalid slot!: %d\n", slot); + return; + } + + WARN_ONCE(!task_monitor_slots[slot], "RV releasing unused task_monitor_slots: %d\n", + slot); + + task_monitor_count--; + task_monitor_slots[slot] = false; +} + +/* + * This section collects the monitor/ files and folders. + */ +static ssize_t monitor_enable_read_data(struct file *filp, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct rv_monitor_def *mdef = filp->private_data; + const char *buff; + + buff = mdef->monitor->enabled ? "1\n" : "0\n"; + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff)+1); +} + +/* + * __rv_disable_monitor - disabled an enabled monitor + */ +static int __rv_disable_monitor(struct rv_monitor_def *mdef, bool sync) +{ + lockdep_assert_held(&rv_interface_lock); + + if (mdef->monitor->enabled) { + mdef->monitor->enabled = 0; + mdef->monitor->disable(); + + /* + * Wait for the execution of all events to finish. + * Otherwise, the data used by the monitor could + * be inconsistent. i.e., if the monitor is re-enabled. + */ + if (sync) + tracepoint_synchronize_unregister(); + return 1; + } + return 0; +} + +/** + * rv_disable_monitor - disable a given runtime monitor + * + * Returns 0 on success. + */ +int rv_disable_monitor(struct rv_monitor_def *mdef) +{ + __rv_disable_monitor(mdef, true); + return 0; +} + +/** + * rv_enable_monitor - enable a given runtime monitor + * + * Returns 0 on success, error otherwise. + */ +int rv_enable_monitor(struct rv_monitor_def *mdef) +{ + int retval; + + lockdep_assert_held(&rv_interface_lock); + + if (mdef->monitor->enabled) + return 0; + + retval = mdef->monitor->enable(); + + if (!retval) + mdef->monitor->enabled = 1; + + return retval; +} + +/* + * interface for enabling/disabling a monitor. + */ +static ssize_t monitor_enable_write_data(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct rv_monitor_def *mdef = filp->private_data; + int retval; + bool val; + + retval = kstrtobool_from_user(user_buf, count, &val); + if (retval) + return retval; + + retval = count; + + mutex_lock(&rv_interface_lock); + + if (val) + retval = rv_enable_monitor(mdef); + else + retval = rv_disable_monitor(mdef); + + mutex_unlock(&rv_interface_lock); + + return retval ? : count; +} + +static const struct file_operations interface_enable_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = monitor_enable_write_data, + .read = monitor_enable_read_data, +}; + +/* + * Interface to read monitors description. + */ +static ssize_t monitor_desc_read_data(struct file *filp, char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct rv_monitor_def *mdef = filp->private_data; + char buff[256]; + + memset(buff, 0, sizeof(buff)); + + snprintf(buff, sizeof(buff), "%s\n", mdef->monitor->description); + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff) + 1); +} + +static const struct file_operations interface_desc_fops = { + .open = simple_open, + .llseek = no_llseek, + .read = monitor_desc_read_data, +}; + +/* + * During the registration of a monitor, this function creates + * the monitor dir, where the specific options of the monitor + * are exposed. + */ +static int create_monitor_dir(struct rv_monitor_def *mdef) +{ + struct dentry *root = get_monitors_root(); + const char *name = mdef->monitor->name; + struct dentry *tmp; + int retval; + + mdef->root_d = rv_create_dir(name, root); + if (!mdef->root_d) + return -ENOMEM; + + tmp = rv_create_file("enable", RV_MODE_WRITE, mdef->root_d, mdef, &interface_enable_fops); + if (!tmp) { + retval = -ENOMEM; + goto out_remove_root; + } + + tmp = rv_create_file("desc", RV_MODE_READ, mdef->root_d, mdef, &interface_desc_fops); + if (!tmp) { + retval = -ENOMEM; + goto out_remove_root; + } + + return 0; + +out_remove_root: + rv_remove(mdef->root_d); + return retval; +} + +/* + * Available/Enable monitor shared seq functions. + */ +static int monitors_show(struct seq_file *m, void *p) +{ + struct rv_monitor_def *mon_def = p; + + seq_printf(m, "%s\n", mon_def->monitor->name); + return 0; +} + +/* + * Used by the seq file operations at the end of a read + * operation. + */ +static void monitors_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&rv_interface_lock); +} + +/* + * Available monitor seq functions. + */ +static void *available_monitors_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&rv_interface_lock); + return seq_list_start(&rv_monitors_list, *pos); +} + +static void *available_monitors_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &rv_monitors_list, pos); +} + +/* + * Enable monitor seq functions. + */ +static void *enabled_monitors_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct rv_monitor_def *m_def = p; + + (*pos)++; + + list_for_each_entry_continue(m_def, &rv_monitors_list, list) { + if (m_def->monitor->enabled) + return m_def; + } + + return NULL; +} + +static void *enabled_monitors_start(struct seq_file *m, loff_t *pos) +{ + struct rv_monitor_def *m_def; + loff_t l; + + mutex_lock(&rv_interface_lock); + + if (list_empty(&rv_monitors_list)) + return NULL; + + m_def = list_entry(&rv_monitors_list, struct rv_monitor_def, list); + + for (l = 0; l <= *pos; ) { + m_def = enabled_monitors_next(m, m_def, &l); + if (!m_def) + break; + } + + return m_def; +} + +/* + * available/enabled monitors seq definition. + */ +static const struct seq_operations available_monitors_seq_ops = { + .start = available_monitors_start, + .next = available_monitors_next, + .stop = monitors_stop, + .show = monitors_show +}; + +static const struct seq_operations enabled_monitors_seq_ops = { + .start = enabled_monitors_start, + .next = enabled_monitors_next, + .stop = monitors_stop, + .show = monitors_show +}; + +/* + * available_monitors interface. + */ +static int available_monitors_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &available_monitors_seq_ops); +}; + +static const struct file_operations available_monitors_ops = { + .open = available_monitors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* + * enabled_monitors interface. + */ +static void disable_all_monitors(void) +{ + struct rv_monitor_def *mdef; + int enabled = 0; + + mutex_lock(&rv_interface_lock); + + list_for_each_entry(mdef, &rv_monitors_list, list) + enabled += __rv_disable_monitor(mdef, false); + + if (enabled) { + /* + * Wait for the execution of all events to finish. + * Otherwise, the data used by the monitor could + * be inconsistent. i.e., if the monitor is re-enabled. + */ + tracepoint_synchronize_unregister(); + } + + mutex_unlock(&rv_interface_lock); +} + +static int enabled_monitors_open(struct inode *inode, struct file *file) +{ + if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) + disable_all_monitors(); + + return seq_open(file, &enabled_monitors_seq_ops); +}; + +static ssize_t enabled_monitors_write(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buff[MAX_RV_MONITOR_NAME_SIZE + 2]; + struct rv_monitor_def *mdef; + int retval = -EINVAL; + bool enable = true; + char *ptr = buff; + int len; + + if (count < 1 || count > MAX_RV_MONITOR_NAME_SIZE + 1) + return -EINVAL; + + memset(buff, 0, sizeof(buff)); + + retval = simple_write_to_buffer(buff, sizeof(buff) - 1, ppos, user_buf, count); + if (retval < 0) + return -EFAULT; + + ptr = strim(buff); + + if (ptr[0] == '!') { + enable = false; + ptr++; + } + + len = strlen(ptr); + if (!len) + return count; + + mutex_lock(&rv_interface_lock); + + retval = -EINVAL; + + list_for_each_entry(mdef, &rv_monitors_list, list) { + if (strcmp(ptr, mdef->monitor->name) != 0) + continue; + + /* + * Monitor found! + */ + if (enable) + retval = rv_enable_monitor(mdef); + else + retval = rv_disable_monitor(mdef); + + if (!retval) + retval = count; + + break; + } + + mutex_unlock(&rv_interface_lock); + return retval; +} + +static const struct file_operations enabled_monitors_ops = { + .open = enabled_monitors_open, + .read = seq_read, + .write = enabled_monitors_write, + .llseek = seq_lseek, + .release = seq_release, +}; + +/* + * Monitoring on global switcher! + */ +static bool __read_mostly monitoring_on; + +/** + * rv_monitoring_on - checks if monitoring is on + * + * Returns 1 if on, 0 otherwise. + */ +bool rv_monitoring_on(void) +{ + /* Ensures that concurrent monitors read consistent monitoring_on */ + smp_rmb(); + return READ_ONCE(monitoring_on); +} + +/* + * monitoring_on general switcher. + */ +static ssize_t monitoring_on_read_data(struct file *filp, char __user *user_buf, + size_t count, loff_t *ppos) +{ + const char *buff; + + buff = rv_monitoring_on() ? "1\n" : "0\n"; + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff) + 1); +} + +static void turn_monitoring_off(void) +{ + WRITE_ONCE(monitoring_on, false); + /* Ensures that concurrent monitors read consistent monitoring_on */ + smp_wmb(); +} + +static void reset_all_monitors(void) +{ + struct rv_monitor_def *mdef; + + list_for_each_entry(mdef, &rv_monitors_list, list) { + if (mdef->monitor->enabled) + mdef->monitor->reset(); + } +} + +static void turn_monitoring_on(void) +{ + WRITE_ONCE(monitoring_on, true); + /* Ensures that concurrent monitors read consistent monitoring_on */ + smp_wmb(); +} + +static void turn_monitoring_on_with_reset(void) +{ + lockdep_assert_held(&rv_interface_lock); + + if (rv_monitoring_on()) + return; + + /* + * Monitors might be out of sync with the system if events were not + * processed because of !rv_monitoring_on(). + * + * Reset all monitors, forcing a re-sync. + */ + reset_all_monitors(); + turn_monitoring_on(); +} + +static ssize_t monitoring_on_write_data(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int retval; + bool val; + + retval = kstrtobool_from_user(user_buf, count, &val); + if (retval) + return retval; + + mutex_lock(&rv_interface_lock); + + if (val) + turn_monitoring_on_with_reset(); + else + turn_monitoring_off(); + + /* + * Wait for the execution of all events to finish + * before returning to user-space. + */ + tracepoint_synchronize_unregister(); + + mutex_unlock(&rv_interface_lock); + + return count; +} + +static const struct file_operations monitoring_on_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = monitoring_on_write_data, + .read = monitoring_on_read_data, +}; + +static void destroy_monitor_dir(struct rv_monitor_def *mdef) +{ + rv_remove(mdef->root_d); +} + +/** + * rv_register_monitor - register a rv monitor. + * @monitor: The rv_monitor to be registered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_register_monitor(struct rv_monitor *monitor) +{ + struct rv_monitor_def *r; + int retval = 0; + + if (strlen(monitor->name) >= MAX_RV_MONITOR_NAME_SIZE) { + pr_info("Monitor %s has a name longer than %d\n", monitor->name, + MAX_RV_MONITOR_NAME_SIZE); + return -1; + } + + mutex_lock(&rv_interface_lock); + + list_for_each_entry(r, &rv_monitors_list, list) { + if (strcmp(monitor->name, r->monitor->name) == 0) { + pr_info("Monitor %s is already registered\n", monitor->name); + retval = -1; + goto out_unlock; + } + } + + r = kzalloc(sizeof(struct rv_monitor_def), GFP_KERNEL); + if (!r) { + retval = -ENOMEM; + goto out_unlock; + } + + r->monitor = monitor; + + retval = create_monitor_dir(r); + if (retval) { + kfree(r); + goto out_unlock; + } + + list_add_tail(&r->list, &rv_monitors_list); + +out_unlock: + mutex_unlock(&rv_interface_lock); + return retval; +} + +/** + * rv_unregister_monitor - unregister a rv monitor. + * @monitor: The rv_monitor to be unregistered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_unregister_monitor(struct rv_monitor *monitor) +{ + struct rv_monitor_def *ptr, *next; + + mutex_lock(&rv_interface_lock); + + list_for_each_entry_safe(ptr, next, &rv_monitors_list, list) { + if (strcmp(monitor->name, ptr->monitor->name) == 0) { + rv_disable_monitor(ptr); + list_del(&ptr->list); + destroy_monitor_dir(ptr); + } + } + + mutex_unlock(&rv_interface_lock); + return 0; +} + +int __init rv_init_interface(void) +{ + struct dentry *tmp; + + rv_root.root_dir = rv_create_dir("rv", NULL); + if (!rv_root.root_dir) + goto out_err; + + rv_root.monitors_dir = rv_create_dir("monitors", rv_root.root_dir); + if (!rv_root.monitors_dir) + goto out_err; + + tmp = rv_create_file("available_monitors", RV_MODE_READ, rv_root.root_dir, NULL, + &available_monitors_ops); + if (!tmp) + goto out_err; + + tmp = rv_create_file("enabled_monitors", RV_MODE_WRITE, rv_root.root_dir, NULL, + &enabled_monitors_ops); + if (!tmp) + goto out_err; + + tmp = rv_create_file("monitoring_on", RV_MODE_WRITE, rv_root.root_dir, NULL, + &monitoring_on_fops); + if (!tmp) + goto out_err; + + turn_monitoring_on(); + + return 0; + +out_err: + rv_remove(rv_root.root_dir); + printk(KERN_ERR "RV: Error while creating the RV interface\n"); + return 1; +} diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h new file mode 100644 index 0000000000000..50014aa224a78 --- /dev/null +++ b/kernel/trace/rv/rv.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include + +struct rv_interface { + struct dentry *root_dir; + struct dentry *monitors_dir; +}; + +#include "../trace.h" +#include +#include + +#define RV_MODE_WRITE TRACE_MODE_WRITE +#define RV_MODE_READ TRACE_MODE_READ + +#define rv_create_dir tracefs_create_dir +#define rv_create_file tracefs_create_file +#define rv_remove tracefs_remove + +#define MAX_RV_MONITOR_NAME_SIZE 32 + +extern struct mutex rv_interface_lock; + +struct rv_monitor_def { + struct list_head list; + struct rv_monitor *monitor; + struct dentry *root_d; + bool task_monitor; +}; + +struct dentry *get_monitors_root(void); +int rv_disable_monitor(struct rv_monitor_def *mdef); +int rv_enable_monitor(struct rv_monitor_def *mdef); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 7eb5bce625006..301305ec134b6 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9772,6 +9772,8 @@ static __init int tracer_init_tracefs(void) tracer_init_tracefs_work_func(NULL); } + rv_init_interface(); + return 0; } diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index ff816fb41e48e..900e75d96c846 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -2005,4 +2005,13 @@ struct trace_min_max_param { extern const struct file_operations trace_min_max_fops; +#ifdef CONFIG_RV +extern int rv_init_interface(void); +#else +static inline int rv_init_interface(void) +{ + return 0; +} +#endif + #endif /* _LINUX_KERNEL_TRACE_H */ From 04acadcb4453cf8011dd3d4ce8d97fecac42d325 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:41 +0200 Subject: [PATCH 29/50] rv: Add runtime reactors interface A runtime monitor can cause a reaction to the detection of an exception on the model's execution. By default, the monitors have tracing reactions, printing the monitor output via tracepoints. But other reactions can be added (on-demand) via this interface. The user interface resembles the kernel tracing interface and presents these files: "available_reactors" - Reading shows the available reactors, one per line. For example: # cat available_reactors nop panic printk "reacting_on" - It is an on/off general switch for reactors, disabling all reactions. "monitors/MONITOR/reactors" - List available reactors, with the select reaction for the given MONITOR inside []. The default one is the nop (no operation) reactor. - Writing the name of a reactor enables it to the given MONITOR. For example: # cat monitors/wip/reactors [nop] panic printk # echo panic > monitors/wip/reactors # cat monitors/wip/reactors nop [panic] printk Link: https://lkml.kernel.org/r/1794eb994637457bdeaa6bad0b8263d2f7eece0c.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 17 ++ kernel/trace/rv/Kconfig | 11 + kernel/trace/rv/Makefile | 1 + kernel/trace/rv/rv.c | 9 + kernel/trace/rv/rv.h | 35 +++ kernel/trace/rv/rv_reactors.c | 508 ++++++++++++++++++++++++++++++++++ 6 files changed, 581 insertions(+) create mode 100644 kernel/trace/rv/rv_reactors.c diff --git a/include/linux/rv.h b/include/linux/rv.h index d8fa9e8be94ac..dcce56987cf7c 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -24,6 +24,14 @@ union rv_task_monitor { }; +#ifdef CONFIG_RV_REACTORS +struct rv_reactor { + const char *name; + const char *description; + void (*react)(char *msg); +}; +#endif + struct rv_monitor { const char *name; const char *description; @@ -31,6 +39,9 @@ struct rv_monitor { int (*enable)(void); void (*disable)(void); void (*reset)(void); +#ifdef CONFIG_RV_REACTORS + void (*react)(char *msg); +#endif }; bool rv_monitoring_on(void); @@ -39,5 +50,11 @@ int rv_register_monitor(struct rv_monitor *monitor); int rv_get_task_monitor_slot(void); void rv_put_task_monitor_slot(int slot); +#ifdef CONFIG_RV_REACTORS +bool rv_reacting_on(void); +int rv_unregister_reactor(struct rv_reactor *reactor); +int rv_register_reactor(struct rv_reactor *reactor); +#endif /* CONFIG_RV_REACTORS */ + #endif /* CONFIG_RV */ #endif /* _LINUX_RV_H */ diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index 6d127cdb00dd4..3eb5d48ab4f68 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -10,3 +10,14 @@ menuconfig RV theorem proving). RV works by analyzing the trace of the system's actual execution, comparing it against a formal specification of the system behavior. + +config RV_REACTORS + bool "Runtime verification reactors" + default y + depends on RV + help + Enables the online runtime verification reactors. A runtime + monitor can cause a reaction to the detection of an exception + on the model's execution. By default, the monitors have + tracing reactions, printing the monitor output via tracepoints, + but other reactions can be added (on-demand) via this interface. diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index fd995379df678..8944274d9b412 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_RV) += rv.o +obj-$(CONFIG_RV_REACTORS) += rv_reactors.o diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 731cc961cc70d..45cf64eb26000 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -353,6 +353,10 @@ static int create_monitor_dir(struct rv_monitor_def *mdef) goto out_remove_root; } + retval = reactor_populate_monitor(mdef); + if (retval) + goto out_remove_root; + return 0; out_remove_root: @@ -669,6 +673,7 @@ static const struct file_operations monitoring_on_fops = { static void destroy_monitor_dir(struct rv_monitor_def *mdef) { + reactor_cleanup_monitor(mdef); rv_remove(mdef->root_d); } @@ -747,6 +752,7 @@ int rv_unregister_monitor(struct rv_monitor *monitor) int __init rv_init_interface(void) { struct dentry *tmp; + int retval; rv_root.root_dir = rv_create_dir("rv", NULL); if (!rv_root.root_dir) @@ -770,6 +776,9 @@ int __init rv_init_interface(void) &monitoring_on_fops); if (!tmp) goto out_err; + retval = init_rv_reactors(rv_root.root_dir); + if (retval) + goto out_err; turn_monitoring_on(); diff --git a/kernel/trace/rv/rv.h b/kernel/trace/rv/rv.h index 50014aa224a78..db6cb0913dbd5 100644 --- a/kernel/trace/rv/rv.h +++ b/kernel/trace/rv/rv.h @@ -18,16 +18,51 @@ struct rv_interface { #define rv_remove tracefs_remove #define MAX_RV_MONITOR_NAME_SIZE 32 +#define MAX_RV_REACTOR_NAME_SIZE 32 extern struct mutex rv_interface_lock; +#ifdef CONFIG_RV_REACTORS +struct rv_reactor_def { + struct list_head list; + struct rv_reactor *reactor; + /* protected by the monitor interface lock */ + int counter; +}; +#endif + struct rv_monitor_def { struct list_head list; struct rv_monitor *monitor; struct dentry *root_d; +#ifdef CONFIG_RV_REACTORS + struct rv_reactor_def *rdef; + bool reacting; +#endif bool task_monitor; }; struct dentry *get_monitors_root(void); int rv_disable_monitor(struct rv_monitor_def *mdef); int rv_enable_monitor(struct rv_monitor_def *mdef); + +#ifdef CONFIG_RV_REACTORS +int reactor_populate_monitor(struct rv_monitor_def *mdef); +void reactor_cleanup_monitor(struct rv_monitor_def *mdef); +int init_rv_reactors(struct dentry *root_dir); +#else +static inline int reactor_populate_monitor(struct rv_monitor_def *mdef) +{ + return 0; +} + +static inline void reactor_cleanup_monitor(struct rv_monitor_def *mdef) +{ + return; +} + +static inline int init_rv_reactors(struct dentry *root_dir) +{ + return 0; +} +#endif diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c new file mode 100644 index 0000000000000..a6522c1963821 --- /dev/null +++ b/kernel/trace/rv/rv_reactors.c @@ -0,0 +1,508 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira + * + * Runtime reactor interface. + * + * A runtime monitor can cause a reaction to the detection of an + * exception on the model's execution. By default, the monitors have + * tracing reactions, printing the monitor output via tracepoints. + * But other reactions can be added (on-demand) via this interface. + * + * == Registering reactors == + * + * The struct rv_reactor defines a callback function to be executed + * in case of a model exception happens. The callback function + * receives a message to be (optionally) printed before executing + * the reaction. + * + * A RV reactor is registered via: + * int rv_register_reactor(struct rv_reactor *reactor) + * And unregistered via: + * int rv_unregister_reactor(struct rv_reactor *reactor) + * + * These functions are exported to modules, enabling reactors to be + * dynamically loaded. + * + * == User interface == + * + * The user interface resembles the kernel tracing interface and + * presents these files: + * + * "available_reactors" + * - List the available reactors, one per line. + * + * For example: + * # cat available_reactors + * nop + * panic + * printk + * + * "reacting_on" + * - It is an on/off general switch for reactors, disabling + * all reactions. + * + * "monitors/MONITOR/reactors" + * - List available reactors, with the select reaction for the given + * MONITOR inside []. The default one is the nop (no operation) + * reactor. + * - Writing the name of an reactor enables it to the given + * MONITOR. + * + * For example: + * # cat monitors/wip/reactors + * [nop] + * panic + * printk + * # echo panic > monitors/wip/reactors + * # cat monitors/wip/reactors + * nop + * [panic] + * printk + */ + +#include + +#include "rv.h" + +/* + * Interface for the reactor register. + */ +static LIST_HEAD(rv_reactors_list); + +static struct rv_reactor_def *get_reactor_rdef_by_name(char *name) +{ + struct rv_reactor_def *r; + + list_for_each_entry(r, &rv_reactors_list, list) { + if (strcmp(name, r->reactor->name) == 0) + return r; + } + return NULL; +} + +/* + * Available reactors seq functions. + */ +static int reactors_show(struct seq_file *m, void *p) +{ + struct rv_reactor_def *rea_def = p; + + seq_printf(m, "%s\n", rea_def->reactor->name); + return 0; +} + +static void reactors_stop(struct seq_file *m, void *p) +{ + mutex_unlock(&rv_interface_lock); +} + +static void *reactors_start(struct seq_file *m, loff_t *pos) +{ + mutex_lock(&rv_interface_lock); + return seq_list_start(&rv_reactors_list, *pos); +} + +static void *reactors_next(struct seq_file *m, void *p, loff_t *pos) +{ + return seq_list_next(p, &rv_reactors_list, pos); +} + +/* + * available_reactors seq definition. + */ +static const struct seq_operations available_reactors_seq_ops = { + .start = reactors_start, + .next = reactors_next, + .stop = reactors_stop, + .show = reactors_show +}; + +/* + * available_reactors interface. + */ +static int available_reactors_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &available_reactors_seq_ops); +}; + +static const struct file_operations available_reactors_ops = { + .open = available_reactors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* + * Monitor's reactor file. + */ +static int monitor_reactor_show(struct seq_file *m, void *p) +{ + struct rv_monitor_def *mdef = m->private; + struct rv_reactor_def *rdef = p; + + if (mdef->rdef == rdef) + seq_printf(m, "[%s]\n", rdef->reactor->name); + else + seq_printf(m, "%s\n", rdef->reactor->name); + return 0; +} + +/* + * available_reactors seq definition. + */ +static const struct seq_operations monitor_reactors_seq_ops = { + .start = reactors_start, + .next = reactors_next, + .stop = reactors_stop, + .show = monitor_reactor_show +}; + +static void monitor_swap_reactors(struct rv_monitor_def *mdef, struct rv_reactor_def *rdef, + bool reacting) +{ + bool monitor_enabled; + + /* nothing to do */ + if (mdef->rdef == rdef) + return; + + monitor_enabled = mdef->monitor->enabled; + if (monitor_enabled) + rv_disable_monitor(mdef); + + /* swap reactor's usage */ + mdef->rdef->counter--; + rdef->counter++; + + mdef->rdef = rdef; + mdef->reacting = reacting; + mdef->monitor->react = rdef->reactor->react; + + if (monitor_enabled) + rv_enable_monitor(mdef); +} + +static ssize_t +monitor_reactors_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + char buff[MAX_RV_REACTOR_NAME_SIZE + 2]; + struct rv_monitor_def *mdef; + struct rv_reactor_def *rdef; + struct seq_file *seq_f; + int retval = -EINVAL; + bool enable; + char *ptr; + int len; + + if (count < 1 || count > MAX_RV_REACTOR_NAME_SIZE + 1) + return -EINVAL; + + memset(buff, 0, sizeof(buff)); + + retval = simple_write_to_buffer(buff, sizeof(buff) - 1, ppos, user_buf, count); + if (retval < 0) + return -EFAULT; + + ptr = strim(buff); + + len = strlen(ptr); + if (!len) + return count; + + /* + * See monitor_reactors_open() + */ + seq_f = file->private_data; + mdef = seq_f->private; + + mutex_lock(&rv_interface_lock); + + retval = -EINVAL; + + list_for_each_entry(rdef, &rv_reactors_list, list) { + if (strcmp(ptr, rdef->reactor->name) != 0) + continue; + + if (rdef == get_reactor_rdef_by_name("nop")) + enable = false; + else + enable = true; + + monitor_swap_reactors(mdef, rdef, enable); + + retval = count; + break; + } + + mutex_unlock(&rv_interface_lock); + + return retval; +} + +/* + * available_reactors interface. + */ +static int monitor_reactors_open(struct inode *inode, struct file *file) +{ + struct rv_monitor_def *mdef = inode->i_private; + struct seq_file *seq_f; + int ret; + + ret = seq_open(file, &monitor_reactors_seq_ops); + if (ret < 0) + return ret; + + /* + * seq_open stores the seq_file on the file->private data. + */ + seq_f = file->private_data; + + /* + * Copy the create file "private" data to the seq_file private data. + */ + seq_f->private = mdef; + + return 0; +}; + +static const struct file_operations monitor_reactors_ops = { + .open = monitor_reactors_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .write = monitor_reactors_write +}; + +static int __rv_register_reactor(struct rv_reactor *reactor) +{ + struct rv_reactor_def *r; + + list_for_each_entry(r, &rv_reactors_list, list) { + if (strcmp(reactor->name, r->reactor->name) == 0) { + pr_info("Reactor %s is already registered\n", reactor->name); + return -EINVAL; + } + } + + r = kzalloc(sizeof(struct rv_reactor_def), GFP_KERNEL); + if (!r) + return -ENOMEM; + + r->reactor = reactor; + r->counter = 0; + + list_add_tail(&r->list, &rv_reactors_list); + + return 0; +} + +/** + * rv_register_reactor - register a rv reactor. + * @reactor: The rv_reactor to be registered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_register_reactor(struct rv_reactor *reactor) +{ + int retval = 0; + + if (strlen(reactor->name) >= MAX_RV_REACTOR_NAME_SIZE) { + pr_info("Reactor %s has a name longer than %d\n", + reactor->name, MAX_RV_MONITOR_NAME_SIZE); + return -EINVAL; + } + + mutex_lock(&rv_interface_lock); + retval = __rv_register_reactor(reactor); + mutex_unlock(&rv_interface_lock); + return retval; +} + +/** + * rv_unregister_reactor - unregister a rv reactor. + * @reactor: The rv_reactor to be unregistered. + * + * Returns 0 if successful, error otherwise. + */ +int rv_unregister_reactor(struct rv_reactor *reactor) +{ + struct rv_reactor_def *ptr, *next; + + mutex_lock(&rv_interface_lock); + + list_for_each_entry_safe(ptr, next, &rv_reactors_list, list) { + if (strcmp(reactor->name, ptr->reactor->name) == 0) { + + if (!ptr->counter) { + list_del(&ptr->list); + } else { + printk(KERN_WARNING + "rv: the rv_reactor %s is in use by %d monitor(s)\n", + ptr->reactor->name, ptr->counter); + printk(KERN_WARNING "rv: the rv_reactor %s cannot be removed\n", + ptr->reactor->name); + return -EBUSY; + } + } + } + + mutex_unlock(&rv_interface_lock); + return 0; +} + +/* + * reacting_on interface. + */ +static bool __read_mostly reacting_on; + +/** + * rv_reacting_on - checks if reacting is on + * + * Returns 1 if on, 0 otherwise. + */ +bool rv_reacting_on(void) +{ + /* Ensures that concurrent monitors read consistent reacting_on */ + smp_rmb(); + return READ_ONCE(reacting_on); +} + +static ssize_t reacting_on_read_data(struct file *filp, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + char *buff; + + buff = rv_reacting_on() ? "1\n" : "0\n"; + + return simple_read_from_buffer(user_buf, count, ppos, buff, strlen(buff)+1); +} + +static void turn_reacting_off(void) +{ + WRITE_ONCE(reacting_on, false); + /* Ensures that concurrent monitors read consistent reacting_on */ + smp_wmb(); +} + +static void turn_reacting_on(void) +{ + WRITE_ONCE(reacting_on, true); + /* Ensures that concurrent monitors read consistent reacting_on */ + smp_wmb(); +} + +static ssize_t reacting_on_write_data(struct file *filp, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + int retval; + bool val; + + retval = kstrtobool_from_user(user_buf, count, &val); + if (retval) + return retval; + + mutex_lock(&rv_interface_lock); + + if (val) + turn_reacting_on(); + else + turn_reacting_off(); + + /* + * Wait for the execution of all events to finish + * before returning to user-space. + */ + tracepoint_synchronize_unregister(); + + mutex_unlock(&rv_interface_lock); + + return count; +} + +static const struct file_operations reacting_on_fops = { + .open = simple_open, + .llseek = no_llseek, + .write = reacting_on_write_data, + .read = reacting_on_read_data, +}; + +/** + * reactor_populate_monitor - creates per monitor reactors file + * @mdef: monitor's definition. + * + * Returns 0 if successful, error otherwise. + */ +int reactor_populate_monitor(struct rv_monitor_def *mdef) +{ + struct dentry *tmp; + + tmp = rv_create_file("reactors", RV_MODE_WRITE, mdef->root_d, mdef, &monitor_reactors_ops); + if (!tmp) + return -ENOMEM; + + /* + * Configure as the rv_nop reactor. + */ + mdef->rdef = get_reactor_rdef_by_name("nop"); + mdef->rdef->counter++; + mdef->reacting = false; + + return 0; +} + +/** + * reactor_cleanup_monitor - cleanup a monitor reference + * @mdef: monitor's definition. + */ +void reactor_cleanup_monitor(struct rv_monitor_def *mdef) +{ + lockdep_assert_held(&rv_interface_lock); + mdef->rdef->counter--; + WARN_ON_ONCE(mdef->rdef->counter < 0); +} + +/* + * Nop reactor register + */ +static void rv_nop_reaction(char *msg) +{ +} + +static struct rv_reactor rv_nop = { + .name = "nop", + .description = "no-operation reactor: do nothing.", + .react = rv_nop_reaction +}; + +int init_rv_reactors(struct dentry *root_dir) +{ + struct dentry *available, *reacting; + int retval; + + available = rv_create_file("available_reactors", RV_MODE_READ, root_dir, NULL, + &available_reactors_ops); + if (!available) + goto out_err; + + reacting = rv_create_file("reacting_on", RV_MODE_WRITE, root_dir, NULL, &reacting_on_fops); + if (!reacting) + goto rm_available; + + retval = __rv_register_reactor(&rv_nop); + if (retval) + goto rm_reacting; + + turn_reacting_on(); + + return 0; + +rm_reacting: + rv_remove(reacting); +rm_available: + rv_remove(available); +out_err: + return -ENOMEM; +} From 09ecd8b8c585c95a3b8dbdec86c15a981fdfeba1 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:42 +0200 Subject: [PATCH 30/50] rv/include: Add helper functions for deterministic automata Formally, a deterministic automaton, denoted by G, is defined as a quintuple: G = { X, E, f, x_0, X_m } where: - X is the set of states; - E is the finite set of events; - x_0 is the initial state; - X_m (subset of X) is the set of marked states. - f : X x E -> X $ is the transition function. It defines the state transition in the occurrence of a event from E in the state X. In the special case of deterministic automata, the occurrence of the event in E in a state in X has a deterministic next state from X. An automaton can also be represented using a graphical format of vertices (nodes) and edges. The open-source tool Graphviz can produce this graphic format using the (textual) DOT language as the source code. The dot2c tool presented in this paper: De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo Silva. Efficient formal verification for the Linux kernel. In: International Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. p. 315-332. Translates a deterministic automaton in the DOT format into a C source code representation that to be used for monitoring. This header file implements helper functions to facilitate the usage of the C output from dot2c/k for monitoring. Link: https://lkml.kernel.org/r/563234f2bfa84b540f60cf9e39c2d9f0eea95a55.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/rv/automata.h | 75 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 include/rv/automata.h diff --git a/include/rv/automata.h b/include/rv/automata.h new file mode 100644 index 0000000000000..eb9e636809a0d --- /dev/null +++ b/include/rv/automata.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira + * + * Deterministic automata helper functions, to be used with the automata + * models in C generated by the dot2k tool. + */ + +/* + * DECLARE_AUTOMATA_HELPERS - define a set of helper functions for automata + * + * Define a set of helper functions for automata. The 'name' argument is used + * as suffix for the functions and data. These functions will handle automaton + * with data type 'type'. + */ +#define DECLARE_AUTOMATA_HELPERS(name, type) \ + \ +/* \ + * model_get_state_name_##name - return the (string) name of the given state \ + */ \ +static char *model_get_state_name_##name(enum states_##name state) \ +{ \ + if ((state < 0) || (state >= state_max_##name)) \ + return "INVALID"; \ + \ + return automaton_##name.state_names[state]; \ +} \ + \ +/* \ + * model_get_event_name_##name - return the (string) name of the given event \ + */ \ +static char *model_get_event_name_##name(enum events_##name event) \ +{ \ + if ((event < 0) || (event >= event_max_##name)) \ + return "INVALID"; \ + \ + return automaton_##name.event_names[event]; \ +} \ + \ +/* \ + * model_get_initial_state_##name - return the automaton's initial state \ + */ \ +static inline type model_get_initial_state_##name(void) \ +{ \ + return automaton_##name.initial_state; \ +} \ + \ +/* \ + * model_get_next_state_##name - process an automaton event occurrence \ + * \ + * Given the current state (curr_state) and the event (event), returns \ + * the next state, or INVALID_STATE in case of error. \ + */ \ +static inline type model_get_next_state_##name(enum states_##name curr_state, \ + enum events_##name event) \ +{ \ + if ((curr_state < 0) || (curr_state >= state_max_##name)) \ + return INVALID_STATE; \ + \ + if ((event < 0) || (event >= event_max_##name)) \ + return INVALID_STATE; \ + \ + return automaton_##name.function[curr_state][event]; \ +} \ + \ +/* \ + * model_is_final_state_##name - check if the given state is a final state \ + */ \ +static inline bool model_is_final_state_##name(enum states_##name state) \ +{ \ + if ((state < 0) || (state >= state_max_##name)) \ + return 0; \ + \ + return automaton_##name.final_states[state]; \ +} From 792575348ff70e05c6040d02fce38e949ef92c37 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:43 +0200 Subject: [PATCH 31/50] rv/include: Add deterministic automata monitor definition via C macros In Linux terms, the runtime verification monitors are encapsulated inside the "RV monitor" abstraction. The "RV monitor" includes a set of instances of the monitor (per-cpu monitor, per-task monitor, and so on), the helper functions that glue the monitor to the system reference model, and the trace output as a reaction for event parsing and exceptions, as depicted below: Linux +----- RV Monitor ----------------------------------+ Formal Realm | | Realm +-------------------+ +----------------+ +-----------------+ | Linux kernel | | Monitor | | Reference | | Tracing | -> | Instance(s) | <- | Model | | (instrumentation) | | (verification) | | (specification) | +-------------------+ +----------------+ +-----------------+ | | | | V | | +----------+ | | | Reaction | | | +--+--+--+-+ | | | | | | | | | +-> trace output ? | +------------------------|--|----------------------+ | +----> panic ? +-------> Add the rv/da_monitor.h, enabling automatic code generation for the *Monitor Instance(s)* using C macros, and code to support it. The benefits of the usage of macro for monitor synthesis are 3-fold as it: - Reduces the code duplication; - Facilitates the bug fix/improvement; - Avoids the case of developers changing the core of the monitor code to manipulate the model in a (let's say) non-standard way. This initial implementation presents three different types of monitor instances: - DECLARE_DA_MON_GLOBAL(name, type) - DECLARE_DA_MON_PER_CPU(name, type) - DECLARE_DA_MON_PER_TASK(name, type) The first declares the functions for a global deterministic automata monitor, the second for monitors with per-cpu instances, and the third with per-task instances. Link: https://lkml.kernel.org/r/51b0bf425a281e226dfeba7401d2115d6091f84e.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/linux/rv.h | 10 + include/rv/da_monitor.h | 541 ++++++++++++++++++++++++++++++++++++++ include/trace/events/rv.h | 120 +++++++++ kernel/fork.c | 14 + kernel/trace/rv/Kconfig | 11 + kernel/trace/rv/rv.c | 5 + 6 files changed, 701 insertions(+) create mode 100644 include/rv/da_monitor.h create mode 100644 include/trace/events/rv.h diff --git a/include/linux/rv.h b/include/linux/rv.h index dcce56987cf7c..8883b41d88ec4 100644 --- a/include/linux/rv.h +++ b/include/linux/rv.h @@ -7,7 +7,16 @@ #ifndef _LINUX_RV_H #define _LINUX_RV_H +#define MAX_DA_NAME_LEN 24 + #ifdef CONFIG_RV +/* + * Deterministic automaton per-object variables. + */ +struct da_monitor { + bool monitoring; + unsigned int curr_state; +}; /* * Per-task RV monitors count. Nowadays fixed in RV_PER_TASK_MONITORS. @@ -22,6 +31,7 @@ * Futher monitor types are expected, so make this a union. */ union rv_task_monitor { + struct da_monitor da_mon; }; #ifdef CONFIG_RV_REACTORS diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h new file mode 100644 index 0000000000000..001bc298289f5 --- /dev/null +++ b/include/rv/da_monitor.h @@ -0,0 +1,541 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira + * + * Deterministic automata (DA) monitor functions, to be used together + * with automata models in C generated by the dot2k tool. + * + * The dot2k tool is available at tools/verification/dot2k/ + */ + +#include +#include +#include + +#ifdef CONFIG_RV_REACTORS + +#define DECLARE_RV_REACTING_HELPERS(name, type) \ +static char REACT_MSG_##name[1024]; \ + \ +static inline char *format_react_msg_##name(type curr_state, type event) \ +{ \ + snprintf(REACT_MSG_##name, 1024, \ + "rv: monitor %s does not allow event %s on state %s\n", \ + #name, \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(curr_state)); \ + return REACT_MSG_##name; \ +} \ + \ +static void cond_react_##name(char *msg) \ +{ \ + if (rv_##name.react) \ + rv_##name.react(msg); \ +} \ + \ +static bool rv_reacting_on_##name(void) \ +{ \ + return rv_reacting_on(); \ +} + +#else /* CONFIG_RV_REACTOR */ + +#define DECLARE_RV_REACTING_HELPERS(name, type) \ +static inline char *format_react_msg_##name(type curr_state, type event) \ +{ \ + return NULL; \ +} \ + \ +static void cond_react_##name(char *msg) \ +{ \ + return; \ +} \ + \ +static bool rv_reacting_on_##name(void) \ +{ \ + return 0; \ +} +#endif + +/* + * Generic helpers for all types of deterministic automata monitors. + */ +#define DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ + \ +DECLARE_RV_REACTING_HELPERS(name, type) \ + \ +/* \ + * da_monitor_reset_##name - reset a monitor and setting it to init state \ + */ \ +static inline void da_monitor_reset_##name(struct da_monitor *da_mon) \ +{ \ + da_mon->monitoring = 0; \ + da_mon->curr_state = model_get_initial_state_##name(); \ +} \ + \ +/* \ + * da_monitor_curr_state_##name - return the current state \ + */ \ +static inline type da_monitor_curr_state_##name(struct da_monitor *da_mon) \ +{ \ + return da_mon->curr_state; \ +} \ + \ +/* \ + * da_monitor_set_state_##name - set the new current state \ + */ \ +static inline void \ +da_monitor_set_state_##name(struct da_monitor *da_mon, enum states_##name state) \ +{ \ + da_mon->curr_state = state; \ +} \ + \ +/* \ + * da_monitor_start_##name - start monitoring \ + * \ + * The monitor will ignore all events until monitoring is set to true. This \ + * function needs to be called to tell the monitor to start monitoring. \ + */ \ +static inline void da_monitor_start_##name(struct da_monitor *da_mon) \ +{ \ + da_mon->curr_state = model_get_initial_state_##name(); \ + da_mon->monitoring = 1; \ +} \ + \ +/* \ + * da_monitoring_##name - returns true if the monitor is processing events \ + */ \ +static inline bool da_monitoring_##name(struct da_monitor *da_mon) \ +{ \ + return da_mon->monitoring; \ +} \ + \ +/* \ + * da_monitor_enabled_##name - checks if the monitor is enabled \ + */ \ +static inline bool da_monitor_enabled_##name(void) \ +{ \ + /* global switch */ \ + if (unlikely(!rv_monitoring_on())) \ + return 0; \ + \ + /* monitor enabled */ \ + if (unlikely(!rv_##name.enabled)) \ + return 0; \ + \ + return 1; \ +} \ + \ +/* \ + * da_monitor_handling_event_##name - checks if the monitor is ready to handle events \ + */ \ +static inline bool da_monitor_handling_event_##name(struct da_monitor *da_mon) \ +{ \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + /* monitor is actually monitoring */ \ + if (unlikely(!da_monitoring_##name(da_mon))) \ + return 0; \ + \ + return 1; \ +} + +/* + * Event handler for implicit monitors. Implicit monitor is the one which the + * handler does not need to specify which da_monitor to manipulate. Examples + * of implicit monitor are the per_cpu or the global ones. + */ +#define DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ + \ +static inline bool \ +da_event_##name(struct da_monitor *da_mon, enum events_##name event) \ +{ \ + type curr_state = da_monitor_curr_state_##name(da_mon); \ + type next_state = model_get_next_state_##name(curr_state, event); \ + \ + if (next_state != INVALID_STATE) { \ + da_monitor_set_state_##name(da_mon, next_state); \ + \ + trace_event_##name(model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(next_state), \ + model_is_final_state_##name(next_state)); \ + \ + return true; \ + } \ + \ + if (rv_reacting_on_##name()) \ + cond_react_##name(format_react_msg_##name(curr_state, event)); \ + \ + trace_error_##name(model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event)); \ + \ + return false; \ +} \ + +/* + * Event handler for per_task monitors. + */ +#define DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \ + \ +static inline bool da_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \ + enum events_##name event) \ +{ \ + type curr_state = da_monitor_curr_state_##name(da_mon); \ + type next_state = model_get_next_state_##name(curr_state, event); \ + \ + if (next_state != INVALID_STATE) { \ + da_monitor_set_state_##name(da_mon, next_state); \ + \ + trace_event_##name(tsk->pid, \ + model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event), \ + model_get_state_name_##name(next_state), \ + model_is_final_state_##name(next_state)); \ + \ + return true; \ + } \ + \ + if (rv_reacting_on_##name()) \ + cond_react_##name(format_react_msg_##name(curr_state, event)); \ + \ + trace_error_##name(tsk->pid, \ + model_get_state_name_##name(curr_state), \ + model_get_event_name_##name(event)); \ + \ + return false; \ +} + +/* + * Functions to define, init and get a global monitor. + */ +#define DECLARE_DA_MON_INIT_GLOBAL(name, type) \ + \ +/* \ + * global monitor (a single variable) \ + */ \ +static struct da_monitor da_mon_##name; \ + \ +/* \ + * da_get_monitor_##name - return the global monitor address \ + */ \ +static struct da_monitor *da_get_monitor_##name(void) \ +{ \ + return &da_mon_##name; \ +} \ + \ +/* \ + * da_monitor_reset_all_##name - reset the single monitor \ + */ \ +static void da_monitor_reset_all_##name(void) \ +{ \ + da_monitor_reset_##name(da_get_monitor_##name()); \ +} \ + \ +/* \ + * da_monitor_init_##name - initialize a monitor \ + */ \ +static inline int da_monitor_init_##name(void) \ +{ \ + da_monitor_reset_all_##name(); \ + return 0; \ +} \ + \ +/* \ + * da_monitor_destroy_##name - destroy the monitor \ + */ \ +static inline void da_monitor_destroy_##name(void) \ +{ \ + return; \ +} + +/* + * Functions to define, init and get a per-cpu monitor. + */ +#define DECLARE_DA_MON_INIT_PER_CPU(name, type) \ + \ +/* \ + * per-cpu monitor variables \ + */ \ +DEFINE_PER_CPU(struct da_monitor, da_mon_##name); \ + \ +/* \ + * da_get_monitor_##name - return current CPU monitor address \ + */ \ +static struct da_monitor *da_get_monitor_##name(void) \ +{ \ + return this_cpu_ptr(&da_mon_##name); \ +} \ + \ +/* \ + * da_monitor_reset_all_##name - reset all CPUs' monitor \ + */ \ +static void da_monitor_reset_all_##name(void) \ +{ \ + struct da_monitor *da_mon; \ + int cpu; \ + for_each_cpu(cpu, cpu_online_mask) { \ + da_mon = per_cpu_ptr(&da_mon_##name, cpu); \ + da_monitor_reset_##name(da_mon); \ + } \ +} \ + \ +/* \ + * da_monitor_init_##name - initialize all CPUs' monitor \ + */ \ +static inline int da_monitor_init_##name(void) \ +{ \ + da_monitor_reset_all_##name(); \ + return 0; \ +} \ + \ +/* \ + * da_monitor_destroy_##name - destroy the monitor \ + */ \ +static inline void da_monitor_destroy_##name(void) \ +{ \ + return; \ +} + +/* + * Functions to define, init and get a per-task monitor. + */ +#define DECLARE_DA_MON_INIT_PER_TASK(name, type) \ + \ +/* \ + * The per-task monitor is stored a vector in the task struct. This variable \ + * stores the position on the vector reserved for this monitor. \ + */ \ +static int task_mon_slot_##name = RV_PER_TASK_MONITOR_INIT; \ + \ +/* \ + * da_get_monitor_##name - return the monitor in the allocated slot for tsk \ + */ \ +static inline struct da_monitor *da_get_monitor_##name(struct task_struct *tsk) \ +{ \ + return &tsk->rv[task_mon_slot_##name].da_mon; \ +} \ + \ +static void da_monitor_reset_all_##name(void) \ +{ \ + struct task_struct *g, *p; \ + \ + read_lock(&tasklist_lock); \ + for_each_process_thread(g, p) \ + da_monitor_reset_##name(da_get_monitor_##name(p)); \ + read_unlock(&tasklist_lock); \ +} \ + \ +/* \ + * da_monitor_init_##name - initialize the per-task monitor \ + * \ + * Try to allocate a slot in the task's vector of monitors. If there \ + * is an available slot, use it and reset all task's monitor. \ + */ \ +static int da_monitor_init_##name(void) \ +{ \ + int slot; \ + \ + slot = rv_get_task_monitor_slot(); \ + if (slot < 0 || slot >= RV_PER_TASK_MONITOR_INIT) \ + return slot; \ + \ + task_mon_slot_##name = slot; \ + \ + da_monitor_reset_all_##name(); \ + return 0; \ +} \ + \ +/* \ + * da_monitor_destroy_##name - return the allocated slot \ + */ \ +static inline void da_monitor_destroy_##name(void) \ +{ \ + if (task_mon_slot_##name == RV_PER_TASK_MONITOR_INIT) { \ + WARN_ONCE(1, "Disabling a disabled monitor: " #name); \ + return; \ + } \ + rv_put_task_monitor_slot(task_mon_slot_##name); \ + task_mon_slot_##name = RV_PER_TASK_MONITOR_INIT; \ + return; \ +} + +/* + * Handle event for implicit monitor: da_get_monitor_##name() will figure out + * the monitor. + */ +#define DECLARE_DA_MON_MONITOR_HANDLER_IMPLICIT(name, type) \ + \ +static inline void __da_handle_event_##name(struct da_monitor *da_mon, \ + enum events_##name event) \ +{ \ + bool retval; \ + \ + retval = da_event_##name(da_mon, event); \ + if (!retval) \ + da_monitor_reset_##name(da_mon); \ +} \ + \ +/* \ + * da_handle_event_##name - handle an event \ + */ \ +static inline void da_handle_event_##name(enum events_##name event) \ +{ \ + struct da_monitor *da_mon = da_get_monitor_##name(); \ + bool retval; \ + \ + retval = da_monitor_handling_event_##name(da_mon); \ + if (!retval) \ + return; \ + \ + __da_handle_event_##name(da_mon, event); \ +} \ + \ +/* \ + * da_handle_start_event_##name - start monitoring or handle event \ + * \ + * This function is used to notify the monitor that the system is returning \ + * to the initial state, so the monitor can start monitoring in the next event. \ + * Thus: \ + * \ + * If the monitor already started, handle the event. \ + * If the monitor did not start yet, start the monitor but skip the event. \ + */ \ +static inline bool da_handle_start_event_##name(enum events_##name event) \ +{ \ + struct da_monitor *da_mon; \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + da_mon = da_get_monitor_##name(); \ + \ + if (unlikely(!da_monitoring_##name(da_mon))) { \ + da_monitor_start_##name(da_mon); \ + return 0; \ + } \ + \ + __da_handle_event_##name(da_mon, event); \ + \ + return 1; \ +} \ + \ +/* \ + * da_handle_start_run_event_##name - start monitoring and handle event \ + * \ + * This function is used to notify the monitor that the system is in the \ + * initial state, so the monitor can start monitoring and handling event. \ + */ \ +static inline bool da_handle_start_run_event_##name(enum events_##name event) \ +{ \ + struct da_monitor *da_mon; \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + da_mon = da_get_monitor_##name(); \ + \ + if (unlikely(!da_monitoring_##name(da_mon))) \ + da_monitor_start_##name(da_mon); \ + \ + __da_handle_event_##name(da_mon, event); \ + \ + return 1; \ +} + +/* + * Handle event for per task. + */ +#define DECLARE_DA_MON_MONITOR_HANDLER_PER_TASK(name, type) \ + \ +static inline void \ +__da_handle_event_##name(struct da_monitor *da_mon, struct task_struct *tsk, \ + enum events_##name event) \ +{ \ + bool retval; \ + \ + retval = da_event_##name(da_mon, tsk, event); \ + if (!retval) \ + da_monitor_reset_##name(da_mon); \ +} \ + \ +/* \ + * da_handle_event_##name - handle an event \ + */ \ +static inline void \ +da_handle_event_##name(struct task_struct *tsk, enum events_##name event) \ +{ \ + struct da_monitor *da_mon = da_get_monitor_##name(tsk); \ + bool retval; \ + \ + retval = da_monitor_handling_event_##name(da_mon); \ + if (!retval) \ + return; \ + \ + __da_handle_event_##name(da_mon, tsk, event); \ +} \ + \ +/* \ + * da_handle_start_event_##name - start monitoring or handle event \ + * \ + * This function is used to notify the monitor that the system is returning \ + * to the initial state, so the monitor can start monitoring in the next event. \ + * Thus: \ + * \ + * If the monitor already started, handle the event. \ + * If the monitor did not start yet, start the monitor but skip the event. \ + */ \ +static inline bool \ +da_handle_start_event_##name(struct task_struct *tsk, enum events_##name event) \ +{ \ + struct da_monitor *da_mon; \ + \ + if (!da_monitor_enabled_##name()) \ + return 0; \ + \ + da_mon = da_get_monitor_##name(tsk); \ + \ + if (unlikely(!da_monitoring_##name(da_mon))) { \ + da_monitor_start_##name(da_mon); \ + return 0; \ + } \ + \ + __da_handle_event_##name(da_mon, tsk, event); \ + \ + return 1; \ +} + +/* + * Entry point for the global monitor. + */ +#define DECLARE_DA_MON_GLOBAL(name, type) \ + \ +DECLARE_AUTOMATA_HELPERS(name, type) \ +DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ +DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ +DECLARE_DA_MON_INIT_GLOBAL(name, type) \ +DECLARE_DA_MON_MONITOR_HANDLER_IMPLICIT(name, type) + +/* + * Entry point for the per-cpu monitor. + */ +#define DECLARE_DA_MON_PER_CPU(name, type) \ + \ +DECLARE_AUTOMATA_HELPERS(name, type) \ +DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ +DECLARE_DA_MON_MODEL_HANDLER_IMPLICIT(name, type) \ +DECLARE_DA_MON_INIT_PER_CPU(name, type) \ +DECLARE_DA_MON_MONITOR_HANDLER_IMPLICIT(name, type) + +/* + * Entry point for the per-task monitor. + */ +#define DECLARE_DA_MON_PER_TASK(name, type) \ + \ +DECLARE_AUTOMATA_HELPERS(name, type) \ +DECLARE_DA_MON_GENERIC_HELPERS(name, type) \ +DECLARE_DA_MON_MODEL_HANDLER_PER_TASK(name, type) \ +DECLARE_DA_MON_INIT_PER_TASK(name, type) \ +DECLARE_DA_MON_MONITOR_HANDLER_PER_TASK(name, type) diff --git a/include/trace/events/rv.h b/include/trace/events/rv.h new file mode 100644 index 0000000000000..20a2e09c64166 --- /dev/null +++ b/include/trace/events/rv.h @@ -0,0 +1,120 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rv + +#if !defined(_TRACE_RV_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RV_H + +#include +#include + +#ifdef CONFIG_DA_MON_EVENTS_IMPLICIT +DECLARE_EVENT_CLASS(event_da_monitor, + + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + + TP_ARGS(state, event, next_state, final_state), + + TP_STRUCT__entry( + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + __array( char, next_state, MAX_DA_NAME_LEN ) + __field( bool, final_state ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN); + __entry->final_state = final_state; + ), + + TP_printk("%s x %s -> %s %s", + __entry->state, + __entry->event, + __entry->next_state, + __entry->final_state ? "(final)" : "") +); + +DECLARE_EVENT_CLASS(error_da_monitor, + + TP_PROTO(char *state, char *event), + + TP_ARGS(state, event), + + TP_STRUCT__entry( + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + ), + + TP_printk("event %s not expected in the state %s", + __entry->event, + __entry->state) +); +#endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */ + +#ifdef CONFIG_DA_MON_EVENTS_ID +DECLARE_EVENT_CLASS(event_da_monitor_id, + + TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state), + + TP_ARGS(id, state, event, next_state, final_state), + + TP_STRUCT__entry( + __field( int, id ) + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + __array( char, next_state, MAX_DA_NAME_LEN ) + __field( bool, final_state ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + memcpy(__entry->next_state, next_state, MAX_DA_NAME_LEN); + __entry->id = id; + __entry->final_state = final_state; + ), + + TP_printk("%d: %s x %s -> %s %s", + __entry->id, + __entry->state, + __entry->event, + __entry->next_state, + __entry->final_state ? "(final)" : "") +); + +DECLARE_EVENT_CLASS(error_da_monitor_id, + + TP_PROTO(int id, char *state, char *event), + + TP_ARGS(id, state, event), + + TP_STRUCT__entry( + __field( int, id ) + __array( char, state, MAX_DA_NAME_LEN ) + __array( char, event, MAX_DA_NAME_LEN ) + ), + + TP_fast_assign( + memcpy(__entry->state, state, MAX_DA_NAME_LEN); + memcpy(__entry->event, event, MAX_DA_NAME_LEN); + __entry->id = id; + ), + + TP_printk("%d: event %s not expected in the state %s", + __entry->id, + __entry->event, + __entry->state) +); +#endif /* CONFIG_DA_MON_EVENTS_ID */ +#endif /* _TRACE_RV_H */ + +/* This part ust be outside protection */ +#undef TRACE_INCLUDE_PATH +#include diff --git a/kernel/fork.c b/kernel/fork.c index 9d44f2d46c696..6f1f82ccd5f27 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1964,6 +1964,18 @@ static void copy_oom_score_adj(u64 clone_flags, struct task_struct *tsk) mutex_unlock(&oom_adj_mutex); } +#ifdef CONFIG_RV +static void rv_task_fork(struct task_struct *p) +{ + int i; + + for (i = 0; i < RV_PER_TASK_MONITORS; i++) + p->rv[i].da_mon.monitoring = false; +} +#else +#define rv_task_fork(p) do {} while (0) +#endif + /* * This creates a new process as a copy of the old one, * but does not actually start it yet. @@ -2399,6 +2411,8 @@ static __latent_entropy struct task_struct *copy_process( */ copy_seccomp(p); + rv_task_fork(p); + rseq_fork(p, clone_flags); /* Don't start children in a dying pid namespace */ diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index 3eb5d48ab4f68..8714800e22ade 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -1,5 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only # +config DA_MON_EVENTS + bool + +config DA_MON_EVENTS_IMPLICIT + select DA_MON_EVENTS + bool + +config DA_MON_EVENTS_ID + select DA_MON_EVENTS + bool + menuconfig RV bool "Runtime Verification" depends on TRACING diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index 45cf64eb26000..df6678c863345 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -140,6 +140,11 @@ #include #include +#ifdef CONFIG_DA_MON_EVENTS +#define CREATE_TRACE_POINTS +#include +#endif + #include "rv.h" DEFINE_MUTEX(rv_interface_lock); From cc8e71c81746de1f8a44873015bc963a868eccba Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:44 +0200 Subject: [PATCH 32/50] rv/include: Add instrumentation helper functions Instrumentation helper functions to facilitate the instrumentation of auto-generated RV monitors create by dot2k. Link: https://lkml.kernel.org/r/3b36c9435f9d9299beb84e5c7c46920e205bedec.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- include/rv/instrumentation.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/rv/instrumentation.h diff --git a/include/rv/instrumentation.h b/include/rv/instrumentation.h new file mode 100644 index 0000000000000..d4e7a02ede1a3 --- /dev/null +++ b/include/rv/instrumentation.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira + * + * Helper functions to facilitate the instrumentation of auto-generated + * RV monitors create by dot2k. + * + * The dot2k tool is available at tools/verification/dot2/ + */ + +#include + +/* + * rv_attach_trace_probe - check and attach a handler function to a tracepoint + */ +#define rv_attach_trace_probe(monitor, tp, rv_handler) \ + do { \ + check_trace_callback_type_##tp(rv_handler); \ + WARN_ONCE(register_trace_##tp(rv_handler, NULL), \ + "fail attaching " #monitor " " #tp "handler"); \ + } while (0) + +/* + * rv_detach_trace_probe - detach a handler function to a tracepoint + */ +#define rv_detach_trace_probe(monitor, tp, rv_handler) \ + do { \ + unregister_trace_##tp(rv_handler, NULL); \ + } while (0) From ff0aaf671230d409a68fd7400f41e9eb3ac61dd8 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:45 +0200 Subject: [PATCH 33/50] Documentation/rv: Add a basic documentation Add the runtime-verification.rst document, explaining the basics of RV and how to use the interface. Link: https://lkml.kernel.org/r/4be7d1a88ab1e2eb0767521e1ab52a149a154bc4.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/index.rst | 1 + Documentation/trace/rv/index.rst | 9 + .../trace/rv/runtime-verification.rst | 231 ++++++++++++++++++ kernel/trace/rv/Kconfig | 3 + kernel/trace/rv/rv.c | 3 + 5 files changed, 247 insertions(+) create mode 100644 Documentation/trace/rv/index.rst create mode 100644 Documentation/trace/rv/runtime-verification.rst diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst index f9b7bcb5a6302..2d73e8697523e 100644 --- a/Documentation/trace/index.rst +++ b/Documentation/trace/index.rst @@ -32,3 +32,4 @@ Linux Tracing Technologies sys-t coresight/index user_events + rv/index diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst new file mode 100644 index 0000000000000..b54e49b1d0deb --- /dev/null +++ b/Documentation/trace/rv/index.rst @@ -0,0 +1,9 @@ +==================== +Runtime Verification +==================== + +.. toctree:: + :maxdepth: 2 + :glob: + + runtime-verification.rst diff --git a/Documentation/trace/rv/runtime-verification.rst b/Documentation/trace/rv/runtime-verification.rst new file mode 100644 index 0000000000000..c46b6149470ef --- /dev/null +++ b/Documentation/trace/rv/runtime-verification.rst @@ -0,0 +1,231 @@ +==================== +Runtime Verification +==================== + +Runtime Verification (RV) is a lightweight (yet rigorous) method that +complements classical exhaustive verification techniques (such as *model +checking* and *theorem proving*) with a more practical approach for complex +systems. + +Instead of relying on a fine-grained model of a system (e.g., a +re-implementation a instruction level), RV works by analyzing the trace of the +system's actual execution, comparing it against a formal specification of +the system behavior. + +The main advantage is that RV can give precise information on the runtime +behavior of the monitored system, without the pitfalls of developing models +that require a re-implementation of the entire system in a modeling language. +Moreover, given an efficient monitoring method, it is possible execute an +*online* verification of a system, enabling the *reaction* for unexpected +events, avoiding, for example, the propagation of a failure on safety-critical +systems. + +Runtime Monitors and Reactors +============================= + +A monitor is the central part of the runtime verification of a system. The +monitor stands in between the formal specification of the desired (or +undesired) behavior, and the trace of the actual system. + +In Linux terms, the runtime verification monitors are encapsulated inside the +*RV monitor* abstraction. A *RV monitor* includes a reference model of the +system, a set of instances of the monitor (per-cpu monitor, per-task monitor, +and so on), and the helper functions that glue the monitor to the system via +trace, as depicted bellow:: + + Linux +---- RV Monitor ----------------------------------+ Formal + Realm | | Realm + +-------------------+ +----------------+ +-----------------+ + | Linux kernel | | Monitor | | Reference | + | Tracing | -> | Instance(s) | <- | Model | + | (instrumentation) | | (verification) | | (specification) | + +-------------------+ +----------------+ +-----------------+ + | | | + | V | + | +----------+ | + | | Reaction | | + | +--+--+--+-+ | + | | | | | + | | | +-> trace output ? | + +------------------------|--|----------------------+ + | +----> panic ? + +-------> + +In addition to the verification and monitoring of the system, a monitor can +react to an unexpected event. The forms of reaction can vary from logging the +event occurrence to the enforcement of the correct behavior to the extreme +action of taking a system down to avoid the propagation of a failure. + +In Linux terms, a *reactor* is an reaction method available for *RV monitors*. +By default, all monitors should provide a trace output of their actions, +which is already a reaction. In addition, other reactions will be available +so the user can enable them as needed. + +For further information about the principles of runtime verification and +RV applied to Linux: + + Bartocci, Ezio, et al. *Introduction to runtime verification.* In: Lectures on + Runtime Verification. Springer, Cham, 2018. p. 1-33. + + Falcone, Ylies, et al. *A taxonomy for classifying runtime verification tools.* + In: International Conference on Runtime Verification. Springer, Cham, 2018. p. + 241-262. + + De Oliveira, Daniel Bristot. *Automata-based formal analysis and + verification of the real-time Linux kernel.* Ph.D. Thesis, 2020. + +Online RV monitors +================== + +Monitors can be classified as *offline* and *online* monitors. *Offline* +monitor process the traces generated by a system after the events, generally by +reading the trace execution from a permanent storage system. *Online* monitors +process the trace during the execution of the system. Online monitors are said +to be *synchronous* if the processing of an event is attached to the system +execution, blocking the system during the event monitoring. On the other hand, +an *asynchronous* monitor has its execution detached from the system. Each type +of monitor has a set of advantages. For example, *offline* monitors can be +executed on different machines but require operations to save the log to a +file. In contrast, *synchronous online* method can react at the exact moment +a violation occurs. + +Another important aspect regarding monitors is the overhead associated with the +event analysis. If the system generates events at a frequency higher than the +monitor's ability to process them in the same system, only the *offline* +methods are viable. On the other hand, if the tracing of the events incurs +on higher overhead than the simple handling of an event by a monitor, then a +*synchronous online* monitors will incur on lower overhead. + +Indeed, the research presented in: + + De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo Silva. + *Efficient formal verification for the Linux kernel.* In: International + Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. + p. 315-332. + +Shows that for Deterministic Automata models, the synchronous processing of +events in-kernel causes lower overhead than saving the same events to the trace +buffer, not even considering collecting the trace for user-space analysis. +This motivated the development of an in-kernel interface for online monitors. + +For further information about modeling of Linux kernel behavior using automata, +see: + + De Oliveira, Daniel B.; De Oliveira, Romulo S.; Cucinotta, Tommaso. *A thread + synchronization model for the PREEMPT_RT Linux kernel.* Journal of Systems + Architecture, 2020, 107: 101729. + +The user interface +================== + +The user interface resembles the tracing interface (on purpose). It is +currently at "/sys/kernel/tracing/rv/". + +The following files/folders are currently available: + +**available_monitors** + +- Reading list the available monitors, one per line + +For example:: + + # cat available_monitors + wip + wwnr + +**available_reactors** + +- Reading shows the available reactors, one per line. + +For example:: + + # cat available_reactors + nop + panic + printk + +**enabled_monitors**: + +- Reading lists the enabled monitors, one per line +- Writing to it enables a given monitor +- Writing a monitor name with a '!' prefix disables it +- Truncating the file disables all enabled monitors + +For example:: + + # cat enabled_monitors + # echo wip > enabled_monitors + # echo wwnr >> enabled_monitors + # cat enabled_monitors + wip + wwnr + # echo '!wip' >> enabled_monitors + # cat enabled_monitors + wwnr + # echo > enabled_monitors + # cat enabled_monitors + # + +Note that it is possible to enable more than one monitor concurrently. + +**monitoring_on** + +This is an on/off general switcher for monitoring. It resembles the +"tracing_on" switcher in the trace interface. + +- Writing "0" stops the monitoring +- Writing "1" continues the monitoring +- Reading returns the current status of the monitoring + +Note that it does not disable enabled monitors but stop the per-entity +monitors monitoring the events received from the system. + +**reacting_on** + +- Writing "0" prevents reactions for happening +- Writing "1" enable reactions +- Reading returns the current status of the reaction + +**monitors/** + +Each monitor will have its own directory inside "monitors/". There the +monitor-specific files will be presented. The "monitors/" directory resembles +the "events" directory on tracefs. + +For example:: + + # cd monitors/wip/ + # ls + desc enable + # cat desc + wakeup in preemptive per-cpu testing monitor. + # cat enable + 0 + +**monitors/MONITOR/desc** + +- Reading shows a description of the monitor *MONITOR* + +**monitors/MONITOR/enable** + +- Writing "0" disables the *MONITOR* +- Writing "1" enables the *MONITOR* +- Reading return the current status of the *MONITOR* + +**monitors/MONITOR/reactors** + +- List available reactors, with the select reaction for the given *MONITOR* + inside "[]". The default one is the nop (no operation) reactor. +- Writing the name of a reactor enables it to the given MONITOR. + +For example:: + + # cat monitors/wip/reactors + [nop] + panic + printk + # echo panic > monitors/wip/reactors + # cat monitors/wip/reactors + nop + [panic] + printk diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index 8714800e22ade..0d9552b406c62 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -22,6 +22,9 @@ menuconfig RV actual execution, comparing it against a formal specification of the system behavior. + For further information, see: + Documentation/trace/rv/runtime-verification.rst + config RV_REACTORS bool "Runtime verification reactors" default y diff --git a/kernel/trace/rv/rv.c b/kernel/trace/rv/rv.c index df6678c863345..6c97cc2d754aa 100644 --- a/kernel/trace/rv/rv.c +++ b/kernel/trace/rv/rv.c @@ -133,6 +133,9 @@ * auto-generated wakeup in preemptive monitor. * # cat enable * 0 + * + * For further information, see: + * Documentation/trace/rv/runtime-verification.rst */ #include From e3c9fc78f096b83e81329b213c25fb9a376e373a Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:46 +0200 Subject: [PATCH 34/50] tools/rv: Add dot2c dot2c is a tool that transforms an automata in the graphiviz .dot file into an C representation of the automata. usage: dot2c [-h] dot_file dot2c: converts a .dot file into a C structure positional arguments: dot_file The dot file to be converted optional arguments: -h, --help show this help message and exit Link: https://lkml.kernel.org/r/b26204ba9509c80bcda31b76cdea31ddb188cd24.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/Makefile | 21 +++ tools/verification/dot2/automata.py | 171 +++++++++++++++++++ tools/verification/dot2/dot2c | 23 +++ tools/verification/dot2/dot2c.py | 251 ++++++++++++++++++++++++++++ 4 files changed, 466 insertions(+) create mode 100644 tools/verification/dot2/Makefile create mode 100644 tools/verification/dot2/automata.py create mode 100644 tools/verification/dot2/dot2c create mode 100644 tools/verification/dot2/dot2c.py diff --git a/tools/verification/dot2/Makefile b/tools/verification/dot2/Makefile new file mode 100644 index 0000000000000..235d182f6b2c5 --- /dev/null +++ b/tools/verification/dot2/Makefile @@ -0,0 +1,21 @@ +INSTALL=install + +prefix ?= /usr +bindir ?= $(prefix)/bin +mandir ?= $(prefix)/share/man +miscdir ?= $(prefix)/share/dot2 +srcdir ?= $(prefix)/src + +PYLIB ?= $(shell python3 -c 'import sysconfig; print (sysconfig.get_path("purelib"))') + +.PHONY: all +all: + +.PHONY: clean +clean: + +.PHONY: install +install: + $(INSTALL) automata.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/automata.py + $(INSTALL) dot2c.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2c.py + $(INSTALL) dot2c -D -m 755 $(DESTDIR)$(bindir)/ diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py new file mode 100644 index 0000000000000..f22e1dff19ce9 --- /dev/null +++ b/tools/verification/dot2/automata.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira +# +# Automata object: parse an automata in dot file digraph format into a python object + +import ntpath + +class Automata: + """Automata class: Reads a dot file and part it as an automata. + + Attributes: + dot_file: A dot file with an state_automaton definition. + """ + + invalid_state_str = "INVALID_STATE" + + def __init__(self, file_path): + self.__dot_path = file_path + self.name = self.__get_model_name() + self.__dot_lines = self.__open_dot() + self.states, self.initial_state, self.final_states = self.__get_state_variables() + self.events = self.__get_event_variables() + self.function = self.__create_matrix() + + def __get_model_name(self): + basename = ntpath.basename(self.__dot_path) + if basename.endswith(".dot") == False: + print("not a dot file") + raise Exception("not a dot file: %s" % self.__dot_path) + + model_name = basename[0:-4] + if model_name.__len__() == 0: + raise Exception("not a dot file: %s" % self.__dot_path) + + return model_name + + def __open_dot(self): + cursor = 0 + dot_lines = [] + try: + dot_file = open(self.__dot_path) + except: + raise Exception("Cannot open the file: %s" % self.__dot_path) + + dot_lines = dot_file.read().splitlines() + dot_file.close() + + # checking the first line: + line = dot_lines[cursor].split() + + if (line[0] != "digraph") and (line[1] != "state_automaton"): + raise Exception("Not a valid .dot format: %s" % self.__dot_path) + else: + cursor += 1 + return dot_lines + + def __get_cursor_begin_states(self): + cursor = 0 + while self.__dot_lines[cursor].split()[0] != "{node": + cursor += 1 + return cursor + + def __get_cursor_begin_events(self): + cursor = 0 + while self.__dot_lines[cursor].split()[0] != "{node": + cursor += 1 + while self.__dot_lines[cursor].split()[0] == "{node": + cursor += 1 + # skip initial state transition + cursor += 1 + return cursor + + def __get_state_variables(self): + # wait for node declaration + states = [] + final_states = [] + + has_final_states = False + cursor = self.__get_cursor_begin_states() + + # process nodes + while self.__dot_lines[cursor].split()[0] == "{node": + line = self.__dot_lines[cursor].split() + raw_state = line[-1] + + # "enabled_fired"}; -> enabled_fired + state = raw_state.replace('"', '').replace('};', '').replace(',','_') + if state[0:7] == "__init_": + initial_state = state[7:] + else: + states.append(state) + if self.__dot_lines[cursor].__contains__("doublecircle") == True: + final_states.append(state) + has_final_states = True + + if self.__dot_lines[cursor].__contains__("ellipse") == True: + final_states.append(state) + has_final_states = True + + cursor += 1 + + states = sorted(set(states)) + states.remove(initial_state) + + # Insert the initial state at the bein og the states + states.insert(0, initial_state) + + if has_final_states == False: + final_states.append(initial_state) + + return states, initial_state, final_states + + def __get_event_variables(self): + # here we are at the begin of transitions, take a note, we will return later. + cursor = self.__get_cursor_begin_events() + + events = [] + while self.__dot_lines[cursor][1] == '"': + # transitions have the format: + # "all_fired" -> "both_fired" [ label = "disable_irq" ]; + # ------------ event is here ------------^^^^^ + if self.__dot_lines[cursor].split()[1] == "->": + line = self.__dot_lines[cursor].split() + event = line[-2].replace('"','') + + # when a transition has more than one lables, they are like this + # "local_irq_enable\nhw_local_irq_enable_n" + # so split them. + + event = event.replace("\\n", " ") + for i in event.split(): + events.append(i) + cursor += 1 + + return sorted(set(events)) + + def __create_matrix(self): + # transform the array into a dictionary + events = self.events + states = self.states + events_dict = {} + states_dict = {} + nr_event = 0 + for event in events: + events_dict[event] = nr_event + nr_event += 1 + + nr_state = 0 + for state in states: + states_dict[state] = nr_state + nr_state += 1 + + # declare the matrix.... + matrix = [[ self.invalid_state_str for x in range(nr_event)] for y in range(nr_state)] + + # and we are back! Let's fill the matrix + cursor = self.__get_cursor_begin_events() + + while self.__dot_lines[cursor][1] == '"': + if self.__dot_lines[cursor].split()[1] == "->": + line = self.__dot_lines[cursor].split() + origin_state = line[0].replace('"','').replace(',','_') + dest_state = line[2].replace('"','').replace(',','_') + possible_events = line[-2].replace('"','').replace("\\n", " ") + for event in possible_events.split(): + matrix[states_dict[origin_state]][events_dict[event]] = dest_state + cursor += 1 + + return matrix diff --git a/tools/verification/dot2/dot2c b/tools/verification/dot2/dot2c new file mode 100644 index 0000000000000..8a8cd84bdfcf6 --- /dev/null +++ b/tools/verification/dot2/dot2c @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira +# +# dot2c: parse an automata in dot file digraph format into a C +# +# This program was written in the development of this paper: +# de Oliveira, D. B. and Cucinotta, T. and de Oliveira, R. S. +# "Efficient Formal Verification for the Linux Kernel." International +# Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. + +if __name__ == '__main__': + from dot2 import dot2c + import argparse + import sys + + parser = argparse.ArgumentParser(description='dot2c: converts a .dot file into a C structure') + parser.add_argument('dot_file', help='The dot file to be converted') + + args = parser.parse_args() + d = dot2c.Dot2c(args.dot_file) + d.print_model_classic() diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/dot2/dot2c.py new file mode 100644 index 0000000000000..bca902eec4832 --- /dev/null +++ b/tools/verification/dot2/dot2c.py @@ -0,0 +1,251 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira +# +# dot2c: parse an automata in dot file digraph format into a C +# +# This program was written in the development of this paper: +# de Oliveira, D. B. and Cucinotta, T. and de Oliveira, R. S. +# "Efficient Formal Verification for the Linux Kernel." International +# Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. + +from dot2.automata import Automata + +class Dot2c(Automata): + enum_suffix = "" + enum_states_def = "states" + enum_events_def = "events" + struct_automaton_def = "automaton" + var_automaton_def = "aut" + + def __init__(self, file_path): + super().__init__(file_path) + self.line_length = 100 + + def __buff_to_string(self, buff): + string = "" + + for line in buff: + string = string + line + "\n" + + # cut off the last \n + return string[:-1] + + def __get_enum_states_content(self): + buff = [] + buff.append("\t%s%s = 0," % (self.initial_state, self.enum_suffix)) + for state in self.states: + if state != self.initial_state: + buff.append("\t%s%s," % (state, self.enum_suffix)) + buff.append("\tstate_max%s" % (self.enum_suffix)) + + return buff + + def get_enum_states_string(self): + buff = self.__get_enum_states_content() + return self.__buff_to_string(buff) + + def format_states_enum(self): + buff = [] + buff.append("enum %s {" % self.enum_states_def) + buff.append(self.get_enum_states_string()) + buff.append("};\n") + + return buff + + def __get_enum_events_content(self): + buff = [] + first = True + for event in self.events: + if first: + buff.append("\t%s%s = 0," % (event, self.enum_suffix)) + first = False + else: + buff.append("\t%s%s," % (event, self.enum_suffix)) + + buff.append("\tevent_max%s" % self.enum_suffix) + + return buff + + def get_enum_events_string(self): + buff = self.__get_enum_events_content() + return self.__buff_to_string(buff) + + def format_events_enum(self): + buff = [] + buff.append("enum %s {" % self.enum_events_def) + buff.append(self.get_enum_events_string()) + buff.append("};\n") + + return buff + + def get_minimun_type(self): + min_type = "unsigned char" + + if self.states.__len__() > 255: + min_type = "unsigned short" + + if self.states.__len__() > 65535: + min_type = "unsigned int" + + if self.states.__len__() > 1000000: + raise Exception("Too many states: %d" % self.states.__len__()) + + return min_type + + def format_automaton_definition(self): + min_type = self.get_minimun_type() + buff = [] + buff.append("struct %s {" % self.struct_automaton_def) + buff.append("\tchar *state_names[state_max%s];" % (self.enum_suffix)) + buff.append("\tchar *event_names[event_max%s];" % (self.enum_suffix)) + buff.append("\t%s function[state_max%s][event_max%s];" % (min_type, self.enum_suffix, self.enum_suffix)) + buff.append("\t%s initial_state;" % min_type) + buff.append("\tbool final_states[state_max%s];" % (self.enum_suffix)) + buff.append("};\n") + return buff + + def format_aut_init_header(self): + buff = [] + buff.append("struct %s %s = {" % (self.struct_automaton_def, self.var_automaton_def)) + return buff + + def __get_string_vector_per_line_content(self, buff): + first = True + string = "" + for entry in buff: + if first: + string = string + "\t\t\"" + entry + first = False; + else: + string = string + "\",\n\t\t\"" + entry + string = string + "\"" + + return string + + def get_aut_init_events_string(self): + return self.__get_string_vector_per_line_content(self.events) + + def get_aut_init_states_string(self): + return self.__get_string_vector_per_line_content(self.states) + + def format_aut_init_events_string(self): + buff = [] + buff.append("\t.event_names = {") + buff.append(self.get_aut_init_events_string()) + buff.append("\t},") + return buff + + def format_aut_init_states_string(self): + buff = [] + buff.append("\t.state_names = {") + buff.append(self.get_aut_init_states_string()) + buff.append("\t},") + + return buff + + def __get_max_strlen_of_states(self): + max_state_name = max(self.states, key = len).__len__() + return max(max_state_name, self.invalid_state_str.__len__()) + + def __get_state_string_length(self): + maxlen = self.__get_max_strlen_of_states() + self.enum_suffix.__len__() + return "%" + str(maxlen) + "s" + + def get_aut_init_function(self): + nr_states = self.states.__len__() + nr_events = self.events.__len__() + buff = [] + + strformat = self.__get_state_string_length() + + for x in range(nr_states): + line = "\t\t{ " + for y in range(nr_events): + next_state = self.function[x][y] + if next_state != self.invalid_state_str: + next_state = self.function[x][y] + self.enum_suffix + + if y != nr_events-1: + line = line + strformat % next_state + ", " + else: + line = line + strformat % next_state + " }," + buff.append(line) + + return self.__buff_to_string(buff) + + def format_aut_init_function(self): + buff = [] + buff.append("\t.function = {") + buff.append(self.get_aut_init_function()) + buff.append("\t},") + + return buff + + def get_aut_init_initial_state(self): + return self.initial_state + + def format_aut_init_initial_state(self): + buff = [] + initial_state = self.get_aut_init_initial_state() + buff.append("\t.initial_state = " + initial_state + self.enum_suffix + ",") + + return buff + + def get_aut_init_final_states(self): + line = "" + first = True + for state in self.states: + if first == False: + line = line + ', ' + else: + first = False + + if self.final_states.__contains__(state): + line = line + '1' + else: + line = line + '0' + return line + + def format_aut_init_final_states(self): + buff = [] + buff.append("\t.final_states = { %s }," % self.get_aut_init_final_states()) + + return buff + + def __get_automaton_initialization_footer_string(self): + footer = "};\n" + return footer + + def format_aut_init_footer(self): + buff = [] + buff.append(self.__get_automaton_initialization_footer_string()) + + return buff + + def format_invalid_state(self): + buff = [] + buff.append("#define %s state_max%s\n" % (self.invalid_state_str, self.enum_suffix)) + + return buff + + def format_model(self): + buff = [] + buff += self.format_states_enum() + buff += self.format_invalid_state() + buff += self.format_events_enum() + buff += self.format_automaton_definition() + buff += self.format_aut_init_header() + buff += self.format_aut_init_states_string() + buff += self.format_aut_init_events_string() + buff += self.format_aut_init_function() + buff += self.format_aut_init_initial_state() + buff += self.format_aut_init_final_states() + buff += self.format_aut_init_footer() + + return buff + + def print_model_classic(self): + buff = self.format_model() + print(self.__buff_to_string(buff)) From 4041b9bbfbcddd239ff2c090f0da43bb3df7818c Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:47 +0200 Subject: [PATCH 35/50] Documentation/rv: Add deterministic automaton documentation Add documentation about deterministic automaton and its possible representations (formal, graphic, .dot and C). Link: https://lkml.kernel.org/r/387edaed87630bd5eb37c4275045dfd229700aa6.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- .../trace/rv/deterministic_automata.rst | 184 ++++++++++++++++++ Documentation/trace/rv/index.rst | 1 + tools/verification/dot2/automata.py | 3 + tools/verification/dot2/dot2c | 3 + tools/verification/dot2/dot2c.py | 3 + 5 files changed, 194 insertions(+) create mode 100644 Documentation/trace/rv/deterministic_automata.rst diff --git a/Documentation/trace/rv/deterministic_automata.rst b/Documentation/trace/rv/deterministic_automata.rst new file mode 100644 index 0000000000000..d0638f95a455e --- /dev/null +++ b/Documentation/trace/rv/deterministic_automata.rst @@ -0,0 +1,184 @@ +Deterministic Automata +====================== + +Formally, a deterministic automaton, denoted by G, is defined as a quintuple: + + *G* = { *X*, *E*, *f*, x\ :subscript:`0`, X\ :subscript:`m` } + +where: + +- *X* is the set of states; +- *E* is the finite set of events; +- x\ :subscript:`0` is the initial state; +- X\ :subscript:`m` (subset of *X*) is the set of marked (or final) states. +- *f* : *X* x *E* -> *X* $ is the transition function. It defines the state + transition in the occurrence of an event from *E* in the state *X*. In the + special case of deterministic automata, the occurrence of the event in *E* + in a state in *X* has a deterministic next state from *X*. + +For example, a given automaton named 'wip' (wakeup in preemptive) can +be defined as: + +- *X* = { ``preemptive``, ``non_preemptive``} +- *E* = { ``preempt_enable``, ``preempt_disable``, ``sched_waking``} +- x\ :subscript:`0` = ``preemptive`` +- X\ :subscript:`m` = {``preemptive``} +- *f* = + - *f*\ (``preemptive``, ``preempt_disable``) = ``non_preemptive`` + - *f*\ (``non_preemptive``, ``sched_waking``) = ``non_preemptive`` + - *f*\ (``non_preemptive``, ``preempt_enable``) = ``preemptive`` + +One of the benefits of this formal definition is that it can be presented +in multiple formats. For example, using a *graphical representation*, using +vertices (nodes) and edges, which is very intuitive for *operating system* +practitioners, without any loss. + +The previous 'wip' automaton can also be represented as:: + + preempt_enable + +---------------------------------+ + v | + #============# preempt_disable +------------------+ + --> H preemptive H -----------------> | non_preemptive | + #============# +------------------+ + ^ | + | sched_waking | + +--------------+ + +Deterministic Automaton in C +---------------------------- + +In the paper "Efficient formal verification for the Linux kernel", +the authors present a simple way to represent an automaton in C that can +be used as regular code in the Linux kernel. + +For example, the 'wip' automata can be presented as (augmented with comments):: + + /* enum representation of X (set of states) to be used as index */ + enum states { + preemptive = 0, + non_preemptive, + state_max + }; + + #define INVALID_STATE state_max + + /* enum representation of E (set of events) to be used as index */ + enum events { + preempt_disable = 0, + preempt_enable, + sched_waking, + event_max + }; + + struct automaton { + char *state_names[state_max]; // X: the set of states + char *event_names[event_max]; // E: the finite set of events + unsigned char function[state_max][event_max]; // f: transition function + unsigned char initial_state; // x_0: the initial state + bool final_states[state_max]; // X_m: the set of marked states + }; + + struct automaton aut = { + .state_names = { + "preemptive", + "non_preemptive" + }, + .event_names = { + "preempt_disable", + "preempt_enable", + "sched_waking" + }, + .function = { + { non_preemptive, INVALID_STATE, INVALID_STATE }, + { INVALID_STATE, preemptive, non_preemptive }, + }, + .initial_state = preemptive, + .final_states = { 1, 0 }, + }; + +The *transition function* is represented as a matrix of states (lines) and +events (columns), and so the function *f* : *X* x *E* -> *X* can be solved +in O(1). For example:: + + next_state = automaton_wip.function[curr_state][event]; + +Graphviz .dot format +-------------------- + +The Graphviz open-source tool can produce the graphical representation +of an automaton using the (textual) DOT language as the source code. +The DOT format is widely used and can be converted to many other formats. + +For example, this is the 'wip' model in DOT:: + + digraph state_automaton { + {node [shape = circle] "non_preemptive"}; + {node [shape = plaintext, style=invis, label=""] "__init_preemptive"}; + {node [shape = doublecircle] "preemptive"}; + {node [shape = circle] "preemptive"}; + "__init_preemptive" -> "preemptive"; + "non_preemptive" [label = "non_preemptive"]; + "non_preemptive" -> "non_preemptive" [ label = "sched_waking" ]; + "non_preemptive" -> "preemptive" [ label = "preempt_enable" ]; + "preemptive" [label = "preemptive"]; + "preemptive" -> "non_preemptive" [ label = "preempt_disable" ]; + { rank = min ; + "__init_preemptive"; + "preemptive"; + } + } + +This DOT format can be transformed into a bitmap or vectorial image +using the dot utility, or into an ASCII art using graph-easy. For +instance:: + + $ dot -Tsvg -o wip.svg wip.dot + $ graph-easy wip.dot > wip.txt + +dot2c +----- + +dot2c is a utility that can parse a .dot file containing an automaton as +in the example above and automatically convert it to the C representation +presented in [3]. + +For example, having the previous 'wip' model into a file named 'wip.dot', +the following command will transform the .dot file into the C +representation (previously shown) in the 'wip.h' file:: + + $ dot2c wip.dot > wip.h + +The 'wip.h' content is the code sample in section 'Deterministic Automaton +in C'. + +Remarks +------- + +The automata formalism allows modeling discrete event systems (DES) in +multiple formats, suitable for different applications/users. + +For example, the formal description using set theory is better suitable +for automata operations, while the graphical format for human interpretation; +and computer languages for machine execution. + +References +---------- + +Many textbooks cover automata formalism. For a brief introduction see:: + + O'Regan, Gerard. Concise guide to software engineering. Springer, + Cham, 2017. + +For a detailed description, including operations, and application on Discrete +Event Systems (DES), see:: + + Cassandras, Christos G., and Stephane Lafortune, eds. Introduction to discrete + event systems. Boston, MA: Springer US, 2008. + +For the C representation in kernel, see:: + + De Oliveira, Daniel Bristot; Cucinotta, Tommaso; De Oliveira, Romulo + Silva. Efficient formal verification for the Linux kernel. In: + International Conference on Software Engineering and Formal Methods. + Springer, Cham, 2019. p. 315-332. diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst index b54e49b1d0deb..013a41a410cf0 100644 --- a/Documentation/trace/rv/index.rst +++ b/Documentation/trace/rv/index.rst @@ -7,3 +7,4 @@ Runtime Verification :glob: runtime-verification.rst + deterministic_automata.rst diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index f22e1dff19ce9..baffeb960ff0b 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -4,6 +4,9 @@ # Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira # # Automata object: parse an automata in dot file digraph format into a python object +# +# For further information, see: +# Documentation/trace/rv/deterministic_automata.rst import ntpath diff --git a/tools/verification/dot2/dot2c b/tools/verification/dot2/dot2c index 8a8cd84bdfcf6..3fe89ab88b65a 100644 --- a/tools/verification/dot2/dot2c +++ b/tools/verification/dot2/dot2c @@ -9,6 +9,9 @@ # de Oliveira, D. B. and Cucinotta, T. and de Oliveira, R. S. # "Efficient Formal Verification for the Linux Kernel." International # Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. +# +# For further information, see: +# Documentation/trace/rv/deterministic_automata.rst if __name__ == '__main__': from dot2 import dot2c diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/dot2/dot2c.py index bca902eec4832..fa73353f7e560 100644 --- a/tools/verification/dot2/dot2c.py +++ b/tools/verification/dot2/dot2c.py @@ -9,6 +9,9 @@ # de Oliveira, D. B. and Cucinotta, T. and de Oliveira, R. S. # "Efficient Formal Verification for the Linux Kernel." International # Conference on Software Engineering and Formal Methods. Springer, Cham, 2019. +# +# For further information, see: +# Documentation/trace/rv/deterministic_automata.rst from dot2.automata import Automata From 24bce201d79807b668bf9d9e0aca801c5c0d5f78 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:48 +0200 Subject: [PATCH 36/50] tools/rv: Add dot2k transform .dot file into kernel rv monitor usage: dot2k [-h] -d DOT_FILE -t MONITOR_TYPE [-n MODEL_NAME] [-D DESCRIPTION] optional arguments: -h, --help show this help message and exit -d DOT_FILE, --dot DOT_FILE -t MONITOR_TYPE, --monitor_type MONITOR_TYPE -n MODEL_NAME, --model_name MODEL_NAME -D DESCRIPTION, --description DESCRIPTION Link: https://lkml.kernel.org/r/083b3ae61e5a62c1e2e5d08009baa91f82181618.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- tools/verification/dot2/Makefile | 5 + tools/verification/dot2/dot2k | 44 +++++ tools/verification/dot2/dot2k.py | 174 ++++++++++++++++++ .../dot2/dot2k_templates/main_global.c | 91 +++++++++ .../dot2/dot2k_templates/main_per_cpu.c | 91 +++++++++ .../dot2/dot2k_templates/main_per_task.c | 91 +++++++++ 6 files changed, 496 insertions(+) create mode 100644 tools/verification/dot2/dot2k create mode 100644 tools/verification/dot2/dot2k.py create mode 100644 tools/verification/dot2/dot2k_templates/main_global.c create mode 100644 tools/verification/dot2/dot2k_templates/main_per_cpu.c create mode 100644 tools/verification/dot2/dot2k_templates/main_per_task.c diff --git a/tools/verification/dot2/Makefile b/tools/verification/dot2/Makefile index 235d182f6b2c5..021beb07a5213 100644 --- a/tools/verification/dot2/Makefile +++ b/tools/verification/dot2/Makefile @@ -19,3 +19,8 @@ install: $(INSTALL) automata.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/automata.py $(INSTALL) dot2c.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2c.py $(INSTALL) dot2c -D -m 755 $(DESTDIR)$(bindir)/ + $(INSTALL) dot2k.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2k.py + $(INSTALL) dot2k -D -m 755 $(DESTDIR)$(bindir)/ + + mkdir -p ${miscdir}/ + cp -rp dot2k_templates $(DESTDIR)$(miscdir)/ diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k new file mode 100644 index 0000000000000..69106f4b7682a --- /dev/null +++ b/tools/verification/dot2/dot2k @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira +# +# dot2k: transform dot files into a monitor for the Linux kernel. + +if __name__ == '__main__': + from dot2.dot2k import dot2k + import argparse + import ntpath + import os + import platform + import sys + import sys + import argparse + + parser = argparse.ArgumentParser(description='transform .dot file into kernel rv monitor') + parser.add_argument('-d', "--dot", dest="dot_file", required=True) + parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=True) + parser.add_argument('-n', "--model_name", dest="model_name", required=False) + parser.add_argument("-D", "--description", dest="description", required=False) + params = parser.parse_args() + + print("Opening and parsing the dot file %s" % params.dot_file) + try: + monitor=dot2k(params.dot_file, params.monitor_type) + except Exception as e: + print('Error: '+ str(e)) + print("Sorry : :-(") + sys.exit(1) + + # easier than using argparse action. + if params.model_name != None: + print(params.model_name) + + print("Writing the monitor into the directory %s" % monitor.name) + monitor.print_files() + print("Almost done, checklist") + print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name)) + print(" - Edit include/trace/events/rv.h to add the tracepoint entry") + print(" - Move it to the kernel's monitor directory") + print(" - Edit kernel/trace/rv/Makefile") + print(" - Edit kernel/trace/rv/Kconfig") diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py new file mode 100644 index 0000000000000..d85f755e3bc75 --- /dev/null +++ b/tools/verification/dot2/dot2k.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira +# +# dot2k: transform dot files into a monitor for the Linux kernel. + +from dot2.dot2c import Dot2c +import platform +import os + +class dot2k(Dot2c): + monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 } + monitor_templates_dir = "dot2k/rv_templates/" + monitor_type = "per_cpu" + + def __init__(self, file_path, MonitorType): + super().__init__(file_path) + + self.monitor_type = self.monitor_types.get(MonitorType) + if self.monitor_type == None: + raise Exception("Unknown monitor type: %s" % MonitorType) + + self.monitor_type = MonitorType + self.__fill_rv_templates_dir() + self.main_c = self.__open_file(self.monitor_templates_dir + "main_" + MonitorType + ".c") + self.enum_suffix = "_%s" % self.name + + def __fill_rv_templates_dir(self): + + if os.path.exists(self.monitor_templates_dir) == True: + return + + if platform.system() != "Linux": + raise Exception("I can only run on Linux.") + + kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release()) + + if os.path.exists(kernel_path) == True: + self.monitor_templates_dir = kernel_path + return + + if os.path.exists("/usr/share/dot2/dot2k_templates/") == True: + self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/" + return + + raise Exception("Could not find the template directory, do you have the kernel source installed?") + + + def __open_file(self, path): + try: + fd = open(path) + except OSError: + raise Exception("Cannot open the file: %s" % path) + + content = fd.read() + + return content + + def __buff_to_string(self, buff): + string = "" + + for line in buff: + string = string + line + "\n" + + # cut off the last \n + return string[:-1] + + def fill_tracepoint_handlers_skel(self): + buff = [] + for event in self.events: + buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event) + buff.append("{") + if self.monitor_type == "per_task": + buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;"); + buff.append("\tda_handle_event_%s(p, %s%s);" % (self.name, event, self.enum_suffix)); + else: + buff.append("\tda_handle_event_%s(%s%s);" % (self.name, event, self.enum_suffix)); + buff.append("}") + buff.append("") + return self.__buff_to_string(buff) + + def fill_tracepoint_attach_probe(self): + buff = [] + for event in self.events: + buff.append("\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) + return self.__buff_to_string(buff) + + def fill_tracepoint_detach_helper(self): + buff = [] + for event in self.events: + buff.append("\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) + return self.__buff_to_string(buff) + + def fill_main_c(self): + main_c = self.main_c + min_type = self.get_minimun_type() + nr_events = self.events.__len__() + tracepoint_handlers = self.fill_tracepoint_handlers_skel() + tracepoint_attach = self.fill_tracepoint_attach_probe() + tracepoint_detach = self.fill_tracepoint_detach_helper() + + main_c = main_c.replace("MIN_TYPE", min_type) + main_c = main_c.replace("MODEL_NAME", self.name) + main_c = main_c.replace("NR_EVENTS", str(nr_events)) + main_c = main_c.replace("TRACEPOINT_HANDLERS_SKEL", tracepoint_handlers) + main_c = main_c.replace("TRACEPOINT_ATTACH", tracepoint_attach) + main_c = main_c.replace("TRACEPOINT_DETACH", tracepoint_detach) + + return main_c + + def fill_model_h_header(self): + buff = [] + buff.append("/*") + buff.append(" * Automatically generated C representation of %s automaton" % (self.name)) + buff.append(" * For further information about this format, see kernel documentation:") + buff.append(" * Documentation/trace/rv/deterministic_automata.rst") + buff.append(" */") + buff.append("") + + return buff + + def fill_model_h(self): + # + # Adjust the definition names + # + self.enum_states_def = "states_%s" % self.name + self.enum_events_def = "events_%s" % self.name + self.struct_automaton_def = "automaton_%s" % self.name + self.var_automaton_def = "automaton_%s" % self.name + + buff = self.fill_model_h_header() + buff += self.format_model() + + return self.__buff_to_string(buff) + + def __create_directory(self): + try: + os.mkdir(self.name) + except FileExistsError: + return + except: + print("Fail creating the output dir: %s" % self.name) + + def __create_file(self, file_name, content): + path = "%s/%s" % (self.name, file_name) + try: + file = open(path, 'w') + except FileExistsError: + return + except: + print("Fail creating file: %s" % path) + + file.write(content) + + file.close() + + def __get_main_name(self): + path = "%s/%s" % (self.name, "main.c") + if os.path.exists(path) == False: + return "main.c" + return "__main.c" + + def print_files(self): + main_c = self.fill_main_c() + model_h = self.fill_model_h() + + self.__create_directory() + + path = "%s.c" % self.name + self.__create_file(path, main_c) + + path = "%s.h" % self.name + self.__create_file(path, model_h) diff --git a/tools/verification/dot2/dot2k_templates/main_global.c b/tools/verification/dot2/dot2k_templates/main_global.c new file mode 100644 index 0000000000000..f4b712dbc92e0 --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main_global.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME "MODEL_NAME" + +/* + * XXX: include required tracepoint headers, e.g., + * #include + */ +#include + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "MODEL_NAME.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +struct rv_monitor rv_MODEL_NAME; +DECLARE_DA_MON_GLOBAL(MODEL_NAME, MIN_TYPE); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +TRACEPOINT_HANDLERS_SKEL +static int enable_MODEL_NAME(void) +{ + int retval; + + retval = da_monitor_init_MODEL_NAME(); + if (retval) + return retval; + +TRACEPOINT_ATTACH + + return 0; +} + +static void disable_MODEL_NAME(void) +{ + rv_MODEL_NAME.enabled = 0; + +TRACEPOINT_DETACH + + da_monitor_destroy_MODEL_NAME(); +} + +/* + * This is the monitor register section. + */ +struct rv_monitor rv_MODEL_NAME = { + .name = "MODEL_NAME", + .description = "auto-generated MODEL_NAME", + .enable = enable_MODEL_NAME, + .disable = disable_MODEL_NAME, + .reset = da_monitor_reset_all_MODEL_NAME, + .enabled = 0, +}; + +static int register_MODEL_NAME(void) +{ + rv_register_monitor(&rv_MODEL_NAME); + return 0; +} + +static void unregister_MODEL_NAME(void) +{ + rv_unregister_monitor(&rv_MODEL_NAME); +} + +module_init(register_MODEL_NAME); +module_exit(unregister_MODEL_NAME); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_cpu.c b/tools/verification/dot2/dot2k_templates/main_per_cpu.c new file mode 100644 index 0000000000000..4080d1ca3354a --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main_per_cpu.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME "MODEL_NAME" + +/* + * XXX: include required tracepoint headers, e.g., + * #include + */ +#include + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "MODEL_NAME.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +struct rv_monitor rv_MODEL_NAME; +DECLARE_DA_MON_PER_CPU(MODEL_NAME, MIN_TYPE); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +TRACEPOINT_HANDLERS_SKEL +static int enable_MODEL_NAME(void) +{ + int retval; + + retval = da_monitor_init_MODEL_NAME(); + if (retval) + return retval; + +TRACEPOINT_ATTACH + + return 0; +} + +static void disable_MODEL_NAME(void) +{ + rv_MODEL_NAME.enabled = 0; + +TRACEPOINT_DETACH + + da_monitor_destroy_MODEL_NAME(); +} + +/* + * This is the monitor register section. + */ +struct rv_monitor rv_MODEL_NAME = { + .name = "MODEL_NAME", + .description = "auto-generated MODEL_NAME", + .enable = enable_MODEL_NAME, + .disable = disable_MODEL_NAME, + .reset = da_monitor_reset_all_MODEL_NAME, + .enabled = 0, +}; + +static int register_MODEL_NAME(void) +{ + rv_register_monitor(&rv_MODEL_NAME); + return 0; +} + +static void unregister_MODEL_NAME(void) +{ + rv_unregister_monitor(&rv_MODEL_NAME); +} + +module_init(register_MODEL_NAME); +module_exit(unregister_MODEL_NAME); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("MODEL_NAME"); diff --git a/tools/verification/dot2/dot2k_templates/main_per_task.c b/tools/verification/dot2/dot2k_templates/main_per_task.c new file mode 100644 index 0000000000000..89197175384f7 --- /dev/null +++ b/tools/verification/dot2/dot2k_templates/main_per_task.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME "MODEL_NAME" + +/* + * XXX: include required tracepoint headers, e.g., + * #include + */ +#include + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "MODEL_NAME.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +struct rv_monitor rv_MODEL_NAME; +DECLARE_DA_MON_PER_TASK(MODEL_NAME, MIN_TYPE); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +TRACEPOINT_HANDLERS_SKEL +static int enable_MODEL_NAME(void) +{ + int retval; + + retval = da_monitor_init_MODEL_NAME(); + if (retval) + return retval; + +TRACEPOINT_ATTACH + + return 0; +} + +static void disable_MODEL_NAME(void) +{ + rv_MODEL_NAME.enabled = 0; + +TRACEPOINT_DETACH + + da_monitor_destroy_MODEL_NAME(); +} + +/* + * This is the monitor register section. + */ +struct rv_monitor rv_MODEL_NAME = { + .name = "MODEL_NAME", + .description = "auto-generated MODEL_NAME", + .enable = enable_MODEL_NAME, + .disable = disable_MODEL_NAME, + .reset = da_monitor_reset_all_MODEL_NAME, + .enabled = 0, +}; + +static int register_MODEL_NAME(void) +{ + rv_register_monitor(&rv_MODEL_NAME); + return 0; +} + +static void unregister_MODEL_NAME(void) +{ + rv_unregister_monitor(&rv_MODEL_NAME); +} + +module_init(register_MODEL_NAME); +module_exit(unregister_MODEL_NAME); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("MODEL_NAME"); From d57aff24796f8f784e1f7beed6da3308e5bb13c0 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:49 +0200 Subject: [PATCH 37/50] Documentation/rv: Add deterministic automata monitor synthesis documentation Add the da_monitor_synthesis.rst introduces some concepts behind the Deterministic Automata (DA) monitor synthesis and interface. Link: https://lkml.kernel.org/r/7873bdb7b2e5d2bc0b2eb6ca0b324af9a0ba27a0.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- .../trace/rv/da_monitor_synthesis.rst | 147 ++++++++++++++++++ Documentation/trace/rv/index.rst | 1 + include/rv/da_monitor.h | 3 + tools/verification/dot2/dot2k | 3 + tools/verification/dot2/dot2k.py | 3 + 5 files changed, 157 insertions(+) create mode 100644 Documentation/trace/rv/da_monitor_synthesis.rst diff --git a/Documentation/trace/rv/da_monitor_synthesis.rst b/Documentation/trace/rv/da_monitor_synthesis.rst new file mode 100644 index 0000000000000..0dbdcd1e62b90 --- /dev/null +++ b/Documentation/trace/rv/da_monitor_synthesis.rst @@ -0,0 +1,147 @@ +Deterministic Automata Monitor Synthesis +======================================== + +The starting point for the application of runtime verification (RV) technics +is the *specification* or *modeling* of the desired (or undesired) behavior +of the system under scrutiny. + +The formal representation needs to be then *synthesized* into a *monitor* +that can then be used in the analysis of the trace of the system. The +*monitor* connects to the system via an *instrumentation* that converts +the events from the *system* to the events of the *specification*. + + +In Linux terms, the runtime verification monitors are encapsulated inside +the *RV monitor* abstraction. The RV monitor includes a set of instances +of the monitor (per-cpu monitor, per-task monitor, and so on), the helper +functions that glue the monitor to the system reference model, and the +trace output as a reaction to event parsing and exceptions, as depicted +below:: + + Linux +----- RV Monitor ----------------------------------+ Formal + Realm | | Realm + +-------------------+ +----------------+ +-----------------+ + | Linux kernel | | Monitor | | Reference | + | Tracing | -> | Instance(s) | <- | Model | + | (instrumentation) | | (verification) | | (specification) | + +-------------------+ +----------------+ +-----------------+ + | | | + | V | + | +----------+ | + | | Reaction | | + | +--+--+--+-+ | + | | | | | + | | | +-> trace output ? | + +------------------------|--|----------------------+ + | +----> panic ? + +-------> + +DA monitor synthesis +-------------------- + +The synthesis of automata-based models into the Linux *RV monitor* abstraction +is automated by the dot2k tool and the rv/da_monitor.h header file that +contains a set of macros that automatically generate the monitor's code. + +dot2k +----- + +The dot2k utility leverages dot2c by converting an automaton model in +the DOT format into the C representation [1] and creating the skeleton of +a kernel monitor in C. + +For example, it is possible to transform the wip.dot model present in +[1] into a per-cpu monitor with the following command:: + + $ dot2k -d wip.dot -t per_cpu + +This will create a directory named wip/ with the following files: + +- wip.h: the wip model in C +- wip.c: the RV monitor + +The wip.c file contains the monitor declaration and the starting point for +the system instrumentation. + +Monitor macros +-------------- + +The rv/da_monitor.h enables automatic code generation for the *Monitor +Instance(s)* using C macros. + +The benefits of the usage of macro for monitor synthesis are 3-fold as it: + +- Reduces the code duplication; +- Facilitates the bug fix/improvement; +- Avoids the case of developers changing the core of the monitor code + to manipulate the model in a (let's say) non-standard way. + +This initial implementation presents three different types of monitor instances: + +- ``#define DECLARE_DA_MON_GLOBAL(name, type)`` +- ``#define DECLARE_DA_MON_PER_CPU(name, type)`` +- ``#define DECLARE_DA_MON_PER_TASK(name, type)`` + +The first declares the functions for a global deterministic automata monitor, +the second for monitors with per-cpu instances, and the third with per-task +instances. + +In all cases, the 'name' argument is a string that identifies the monitor, and +the 'type' argument is the data type used by dot2k on the representation of +the model in C. + +For example, the wip model with two states and three events can be +stored in an 'unsigned char' type. Considering that the preemption control +is a per-cpu behavior, the monitor declaration in the 'wip.c' file is:: + + DECLARE_DA_MON_PER_CPU(wip, unsigned char); + +The monitor is executed by sending events to be processed via the functions +presented below:: + + da_handle_event_$(MONITOR_NAME)($(event from event enum)); + da_handle_start_event_$(MONITOR_NAME)($(event from event enum)); + da_handle_start_run_event_$(MONITOR_NAME)($(event from event enum)); + +The function ``da_handle_event_$(MONITOR_NAME)()`` is the regular case where +the event will be processed if the monitor is processing events. + +When a monitor is enabled, it is placed in the initial state of the automata. +However, the monitor does not know if the system is in the *initial state*. + +The ``da_handle_start_event_$(MONITOR_NAME)()`` function is used to notify the +monitor that the system is returning to the initial state, so the monitor can +start monitoring the next event. + +The ``da_handle_start_run_event_$(MONITOR_NAME)()`` function is used to notify +the monitor that the system is known to be in the initial state, so the +monitor can start monitoring and monitor the current event. + +Using the wip model as example, the events "preempt_disable" and +"sched_waking" should be sent to monitor, respectively, via [2]:: + + da_handle_event_wip(preempt_disable_wip); + da_handle_event_wip(sched_waking_wip); + +While the event "preempt_enabled" will use:: + + da_handle_start_event_wip(preempt_enable_wip); + +To notify the monitor that the system will be returning to the initial state, +so the system and the monitor should be in sync. + +Final remarks +------------- + +With the monitor synthesis in place using the rv/da_monitor.h and +dot2k, the developer's work should be limited to the instrumentation +of the system, increasing the confidence in the overall approach. + +[1] For details about deterministic automata format and the translation +from one representation to another, see:: + + Documentation/trace/rv/deterministic_automata.rst + +[2] dot2k appends the monitor's name suffix to the events enums to +avoid conflicting variables when exporting the global vmlinux.h +use by BPF programs. diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst index 013a41a410cf0..46d47f33052c5 100644 --- a/Documentation/trace/rv/index.rst +++ b/Documentation/trace/rv/index.rst @@ -8,3 +8,4 @@ Runtime Verification runtime-verification.rst deterministic_automata.rst + da_monitor_synthesis.rst diff --git a/include/rv/da_monitor.h b/include/rv/da_monitor.h index 001bc298289f5..9eb75683e0125 100644 --- a/include/rv/da_monitor.h +++ b/include/rv/da_monitor.h @@ -6,6 +6,9 @@ * with automata models in C generated by the dot2k tool. * * The dot2k tool is available at tools/verification/dot2k/ + * + * For further information, see: + * Documentation/trace/rv/da_monitor_synthesis.rst */ #include diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k index 69106f4b7682a..9dcd38abe20a1 100644 --- a/tools/verification/dot2/dot2k +++ b/tools/verification/dot2/dot2k @@ -4,6 +4,9 @@ # Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira # # dot2k: transform dot files into a monitor for the Linux kernel. +# +# For further information, see: +# Documentation/trace/rv/da_monitor_synthesis.rst if __name__ == '__main__': from dot2.dot2k import dot2k diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/dot2/dot2k.py index d85f755e3bc75..016550fccf1f2 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/dot2/dot2k.py @@ -4,6 +4,9 @@ # Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira # # dot2k: transform dot files into a monitor for the Linux kernel. +# +# For further information, see: +# Documentation/trace/rv/da_monitor_synthesis.rst from dot2.dot2c import Dot2c import platform From b6172b5185d4f57f93ef85b7729ee06c5bc0cbe3 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:50 +0200 Subject: [PATCH 38/50] Documentation/rv: Add deterministic automata instrumentation documentation Add the da_monitor_instrumentation.rst. It describes the basics of RV monitor instrumentation. Link: https://lkml.kernel.org/r/0557d5c68e2fc252f2643c2cc5295a67e2b73277.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- .../trace/rv/da_monitor_instrumentation.rst | 171 ++++++++++++++++++ Documentation/trace/rv/index.rst | 1 + 2 files changed, 172 insertions(+) create mode 100644 Documentation/trace/rv/da_monitor_instrumentation.rst diff --git a/Documentation/trace/rv/da_monitor_instrumentation.rst b/Documentation/trace/rv/da_monitor_instrumentation.rst new file mode 100644 index 0000000000000..6c67c7b578111 --- /dev/null +++ b/Documentation/trace/rv/da_monitor_instrumentation.rst @@ -0,0 +1,171 @@ +Deterministic Automata Instrumentation +====================================== + +The RV monitor file created by dot2k, with the name "$MODEL_NAME.c" +includes a section dedicated to instrumentation. + +In the example of the wip.dot monitor created on [1], it will look like:: + + /* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ + static void handle_preempt_disable(void *data, /* XXX: fill header */) + { + da_handle_event_wip(preempt_disable_wip); + } + + static void handle_preempt_enable(void *data, /* XXX: fill header */) + { + da_handle_event_wip(preempt_enable_wip); + } + + static void handle_sched_waking(void *data, /* XXX: fill header */) + { + da_handle_event_wip(sched_waking_wip); + } + + static int enable_wip(void) + { + int retval; + + retval = da_monitor_init_wip(); + if (retval) + return retval; + + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + + return 0; + } + +The comment at the top of the section explains the general idea: the +instrumentation section translates *kernel events* into the *model's +event*. + +Tracing callback functions +-------------------------- + +The first three functions are the starting point of the callback *handler +functions* for each of the three events from the wip model. The developer +does not necessarily need to use them: they are just starting points. + +Using the example of:: + + void handle_preempt_disable(void *data, /* XXX: fill header */) + { + da_handle_event_wip(preempt_disable_wip); + } + +The preempt_disable event from the model connects directly to the +preemptirq:preempt_disable. The preemptirq:preempt_disable event +has the following signature, from include/trace/events/preemptirq.h:: + + TP_PROTO(unsigned long ip, unsigned long parent_ip) + +Hence, the handle_preempt_disable() function will look like:: + + void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) + +In this case, the kernel event translates one to one with the automata +event, and indeed, no other change is required for this function. + +The next handler function, handle_preempt_enable() has the same argument +list from the handle_preempt_disable(). The difference is that the +preempt_enable event will be used to synchronize the system to the model. + +Initially, the *model* is placed in the initial state. However, the *system* +might or might not be in the initial state. The monitor cannot start +processing events until it knows that the system has reached the initial state. +Otherwise, the monitor and the system could be out-of-sync. + +Looking at the automata definition, it is possible to see that the system +and the model are expected to return to the initial state after the +preempt_enable execution. Hence, it can be used to synchronize the +system and the model at the initialization of the monitoring section. + +The start is informed via a special handle function, the +"da_handle_start_event_$(MONITOR_NAME)(event)", in this case:: + + da_handle_start_event_wip(preempt_enable_wip); + +So, the callback function will look like:: + + void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) + { + da_handle_start_event_wip(preempt_enable_wip); + } + +Finally, the "handle_sched_waking()" will look like:: + + void handle_sched_waking(void *data, struct task_struct *task) + { + da_handle_event_wip(sched_waking_wip); + } + +And the explanation is left for the reader as an exercise. + +enable and disable functions +---------------------------- + +dot2k automatically creates two special functions:: + + enable_$(MONITOR_NAME)() + disable_$(MONITOR_NAME)() + +These functions are called when the monitor is enabled and disabled, +respectively. + +They should be used to *attach* and *detach* the instrumentation to the running +system. The developer must add to the relative function all that is needed to +*attach* and *detach* its monitor to the system. + +For the wip case, these functions were named:: + + enable_wip() + disable_wip() + +But no change was required because: by default, these functions *attach* and +*detach* the tracepoints_to_attach, which was enough for this case. + +Instrumentation helpers +----------------------- + +To complete the instrumentation, the *handler functions* need to be attached to a +kernel event, at the monitoring enable phase. + +The RV interface also facilitates this step. For example, the macro "rv_attach_trace_probe()" +is used to connect the wip model events to the relative kernel event. dot2k automatically +adds "rv_attach_trace_probe()" function call for each model event in the enable phase, as +a suggestion. + +For example, from the wip sample model:: + + static int enable_wip(void) + { + int retval; + + retval = da_monitor_init_wip(); + if (retval) + return retval; + + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); + + return 0; + } + +The probes then need to be detached at the disable phase. + +[1] The wip model is presented in:: + + Documentation/trace/rv/deterministic_automata.rst + +The wip monitor is presented in:: + + Documentation/trace/rv/da_monitor_synthesis.rst diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst index 46d47f33052c5..db2ae3f90b900 100644 --- a/Documentation/trace/rv/index.rst +++ b/Documentation/trace/rv/index.rst @@ -9,3 +9,4 @@ Runtime Verification runtime-verification.rst deterministic_automata.rst da_monitor_synthesis.rst + da_monitor_instrumentation.rst From 8812d21219b9c649dd25eb93915e00939944aeb7 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:51 +0200 Subject: [PATCH 39/50] rv/monitor: Add the wip monitor skeleton created by dot2k THIS CODE IS NOT LINKED TO THE MAKEFILE. This model does not compile because it lacks the instrumentation part, which will be added next. In the typical case, there will be only one patch, but it was split into two patches for educational purposes. This is the direct output this command line: $ dot2k -d tools/verification/models/wip.dot -t per_cpu Link: https://lkml.kernel.org/r/5eb7a9118917e8a814c5e49853a72fc62be0a101.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/monitors/wip/wip.c | 109 +++++++++++++++++++++++++++++ kernel/trace/rv/monitors/wip/wip.h | 46 ++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 kernel/trace/rv/monitors/wip/wip.c create mode 100644 kernel/trace/rv/monitors/wip/wip.h diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c new file mode 100644 index 0000000000000..79a054ca0cde4 --- /dev/null +++ b/kernel/trace/rv/monitors/wip/wip.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME "wip" + +/* + * XXX: include required tracepoint headers, e.g., + * #include + */ +#include + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "wip.h" + +/* + * Declare the deterministic automata monitor. + * + * The rv monitor reference is needed for the monitor declaration. + */ +struct rv_monitor rv_wip; +DECLARE_DA_MON_PER_CPU(wip, unsigned char); + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + * + */ +static void handle_preempt_disable(void *data, /* XXX: fill header */) +{ + da_handle_event_wip(preempt_disable_wip); +} + +static void handle_preempt_enable(void *data, /* XXX: fill header */) +{ + da_handle_event_wip(preempt_enable_wip); +} + +static void handle_sched_waking(void *data, /* XXX: fill header */) +{ + da_handle_event_wip(sched_waking_wip); +} + +static int enable_wip(void) +{ + int retval; + + retval = da_monitor_init_wip(); + if (retval) + return retval; + + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); + rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + + return 0; +} + +static void disable_wip(void) +{ + rv_wip.enabled = 0; + + rv_detach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); + rv_detach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); + rv_detach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + + da_monitor_destroy_wip(); +} + +/* + * This is the monitor register section. + */ +struct rv_monitor rv_wip = { + .name = "wip", + .description = "auto-generated wip", + .enable = enable_wip, + .disable = disable_wip, + .reset = da_monitor_reset_all_wip, + .enabled = 0, +}; + +static int register_wip(void) +{ + rv_register_monitor(&rv_wip); + return 0; +} + +static void unregister_wip(void) +{ + rv_unregister_monitor(&rv_wip); +} + +module_init(register_wip); +module_exit(unregister_wip); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("dot2k: auto-generated"); +MODULE_DESCRIPTION("wip"); diff --git a/kernel/trace/rv/monitors/wip/wip.h b/kernel/trace/rv/monitors/wip/wip.h new file mode 100644 index 0000000000000..c1c47e2305efa --- /dev/null +++ b/kernel/trace/rv/monitors/wip/wip.h @@ -0,0 +1,46 @@ +/* + * Automatically generated C representation of wip automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_wip { + preemptive_wip = 0, + non_preemptive_wip, + state_max_wip +}; + +#define INVALID_STATE state_max_wip + +enum events_wip { + preempt_disable_wip = 0, + preempt_enable_wip, + sched_waking_wip, + event_max_wip +}; + +struct automaton_wip { + char *state_names[state_max_wip]; + char *event_names[event_max_wip]; + unsigned char function[state_max_wip][event_max_wip]; + unsigned char initial_state; + bool final_states[state_max_wip]; +}; + +struct automaton_wip automaton_wip = { + .state_names = { + "preemptive", + "non_preemptive" + }, + .event_names = { + "preempt_disable", + "preempt_enable", + "sched_waking" + }, + .function = { + { non_preemptive_wip, INVALID_STATE, INVALID_STATE }, + { INVALID_STATE, preemptive_wip, non_preemptive_wip }, + }, + .initial_state = preemptive_wip, + .final_states = { 1, 0 }, +}; From 10bde81c74863472047f31304064018c40f488ee Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:52 +0200 Subject: [PATCH 40/50] rv/monitor: Add the wip monitor The wakeup in preemptive (wip) monitor verifies if the wakeup events always take place with preemption disabled: | | v #==================# H preemptive H <+ #==================# | | | | preempt_disable | preempt_enable v | sched_waking +------------------+ | +--------------- | | | | | non_preemptive | | +--------------> | | -+ +------------------+ The wakeup event always takes place with preemption disabled because of the scheduler synchronization. However, because the preempt_count and its trace event are not atomic with regard to interrupts, some inconsistencies might happen. The documentation illustrates one of these cases. Link: https://lkml.kernel.org/r/c98ca678df81115fddc04921b3c79720c836b18f.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/rv/index.rst | 1 + Documentation/trace/rv/monitor_wip.rst | 55 ++++++++++++++++++++++++++ include/trace/events/rv.h | 10 +++++ kernel/trace/rv/Kconfig | 13 ++++++ kernel/trace/rv/Makefile | 1 + kernel/trace/rv/monitors/wip/wip.c | 51 +++++++----------------- tools/verification/models/wip.dot | 16 ++++++++ 7 files changed, 111 insertions(+), 36 deletions(-) create mode 100644 Documentation/trace/rv/monitor_wip.rst create mode 100644 tools/verification/models/wip.dot diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst index db2ae3f90b900..4cb71ed628b8e 100644 --- a/Documentation/trace/rv/index.rst +++ b/Documentation/trace/rv/index.rst @@ -10,3 +10,4 @@ Runtime Verification deterministic_automata.rst da_monitor_synthesis.rst da_monitor_instrumentation.rst + monitor_wip.rst diff --git a/Documentation/trace/rv/monitor_wip.rst b/Documentation/trace/rv/monitor_wip.rst new file mode 100644 index 0000000000000..a95763438c482 --- /dev/null +++ b/Documentation/trace/rv/monitor_wip.rst @@ -0,0 +1,55 @@ +Monitor wip +=========== + +- Name: wip - wakeup in preemptive +- Type: per-cpu deterministic automaton +- Author: Daniel Bristot de Oliveira + +Description +----------- + +The wakeup in preemptive (wip) monitor is a sample per-cpu monitor +that verifies if the wakeup events always take place with +preemption disabled:: + + | + | + v + #==================# + H preemptive H <+ + #==================# | + | | + | preempt_disable | preempt_enable + v | + sched_waking +------------------+ | + +--------------- | | | + | | non_preemptive | | + +--------------> | | -+ + +------------------+ + +The wakeup event always takes place with preemption disabled because +of the scheduler synchronization. However, because the preempt_count +and its trace event are not atomic with regard to interrupts, some +inconsistencies might happen. For example:: + + preempt_disable() { + __preempt_count_add(1) + -------> smp_apic_timer_interrupt() { + preempt_disable() + do not trace (preempt count >= 1) + + wake up a thread + + preempt_enable() + do not trace (preempt count >= 1) + } + <------ + trace_preempt_disable(); + } + +This problem was reported and discussed here: + https://lore.kernel.org/r/cover.1559051152.git.bristot@redhat.com/ + +Specification +------------- +Grapviz Dot file in tools/verification/models/wip.dot diff --git a/include/trace/events/rv.h b/include/trace/events/rv.h index 20a2e09c64166..e972f27d8df39 100644 --- a/include/trace/events/rv.h +++ b/include/trace/events/rv.h @@ -56,6 +56,16 @@ DECLARE_EVENT_CLASS(error_da_monitor, __entry->event, __entry->state) ); + +#ifdef CONFIG_RV_MON_WIP +DEFINE_EVENT(event_da_monitor, event_wip, + TP_PROTO(char *state, char *event, char *next_state, bool final_state), + TP_ARGS(state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor, error_wip, + TP_PROTO(char *state, char *event), + TP_ARGS(state, event)); +#endif /* CONFIG_RV_MON_WIP */ #endif /* CONFIG_DA_MON_EVENTS_IMPLICIT */ #ifdef CONFIG_DA_MON_EVENTS_ID diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index 0d9552b406c62..e50f3346164aa 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -25,6 +25,19 @@ menuconfig RV For further information, see: Documentation/trace/rv/runtime-verification.rst +config RV_MON_WIP + depends on RV + depends on PREEMPT_TRACER + select DA_MON_EVENTS_IMPLICIT + bool "wip monitor" + help + Enable wip (wakeup in preemptive) sample monitor that illustrates + the usage of per-cpu monitors, and one limitation of the + preempt_disable/enable events. + + For further information, see: + Documentation/trace/rv/monitor_wip.rst + config RV_REACTORS bool "Runtime verification reactors" default y diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index 8944274d9b412..b41109d2750a1 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_RV) += rv.o obj-$(CONFIG_RV_REACTORS) += rv_reactors.o +obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o diff --git a/kernel/trace/rv/monitors/wip/wip.c b/kernel/trace/rv/monitors/wip/wip.c index 79a054ca0cde4..83cace53b9faf 100644 --- a/kernel/trace/rv/monitors/wip/wip.c +++ b/kernel/trace/rv/monitors/wip/wip.c @@ -10,44 +10,26 @@ #define MODULE_NAME "wip" -/* - * XXX: include required tracepoint headers, e.g., - * #include - */ #include +#include +#include -/* - * This is the self-generated part of the monitor. Generally, there is no need - * to touch this section. - */ #include "wip.h" -/* - * Declare the deterministic automata monitor. - * - * The rv monitor reference is needed for the monitor declaration. - */ struct rv_monitor rv_wip; DECLARE_DA_MON_PER_CPU(wip, unsigned char); -/* - * This is the instrumentation part of the monitor. - * - * This is the section where manual work is required. Here the kernel events - * are translated into model's event. - * - */ -static void handle_preempt_disable(void *data, /* XXX: fill header */) +static void handle_preempt_disable(void *data, unsigned long ip, unsigned long parent_ip) { da_handle_event_wip(preempt_disable_wip); } -static void handle_preempt_enable(void *data, /* XXX: fill header */) +static void handle_preempt_enable(void *data, unsigned long ip, unsigned long parent_ip) { - da_handle_event_wip(preempt_enable_wip); + da_handle_start_event_wip(preempt_enable_wip); } -static void handle_sched_waking(void *data, /* XXX: fill header */) +static void handle_sched_waking(void *data, struct task_struct *task) { da_handle_event_wip(sched_waking_wip); } @@ -60,9 +42,9 @@ static int enable_wip(void) if (retval) return retval; - rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); - rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); - rv_attach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + rv_attach_trace_probe("wip", preempt_enable, handle_preempt_enable); + rv_attach_trace_probe("wip", sched_waking, handle_sched_waking); + rv_attach_trace_probe("wip", preempt_disable, handle_preempt_disable); return 0; } @@ -71,19 +53,16 @@ static void disable_wip(void) { rv_wip.enabled = 0; - rv_detach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_disable); - rv_detach_trace_probe("wip", /* XXX: tracepoint */, handle_preempt_enable); - rv_detach_trace_probe("wip", /* XXX: tracepoint */, handle_sched_waking); + rv_detach_trace_probe("wip", preempt_disable, handle_preempt_disable); + rv_detach_trace_probe("wip", preempt_enable, handle_preempt_enable); + rv_detach_trace_probe("wip", sched_waking, handle_sched_waking); da_monitor_destroy_wip(); } -/* - * This is the monitor register section. - */ struct rv_monitor rv_wip = { .name = "wip", - .description = "auto-generated wip", + .description = "wakeup in preemptive per-cpu testing monitor.", .enable = enable_wip, .disable = disable_wip, .reset = da_monitor_reset_all_wip, @@ -105,5 +84,5 @@ module_init(register_wip); module_exit(unregister_wip); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("dot2k: auto-generated"); -MODULE_DESCRIPTION("wip"); +MODULE_AUTHOR("Daniel Bristot de Oliveira "); +MODULE_DESCRIPTION("wip: wakeup in preemptive - per-cpu sample monitor."); diff --git a/tools/verification/models/wip.dot b/tools/verification/models/wip.dot new file mode 100644 index 0000000000000..2a53a9700a89c --- /dev/null +++ b/tools/verification/models/wip.dot @@ -0,0 +1,16 @@ +digraph state_automaton { + {node [shape = circle] "non_preemptive"}; + {node [shape = plaintext, style=invis, label=""] "__init_preemptive"}; + {node [shape = doublecircle] "preemptive"}; + {node [shape = circle] "preemptive"}; + "__init_preemptive" -> "preemptive"; + "non_preemptive" [label = "non_preemptive"]; + "non_preemptive" -> "non_preemptive" [ label = "sched_waking" ]; + "non_preemptive" -> "preemptive" [ label = "preempt_enable" ]; + "preemptive" [label = "preemptive"]; + "preemptive" -> "non_preemptive" [ label = "preempt_disable" ]; + { rank = min ; + "__init_preemptive"; + "preemptive"; + } +} From ccc319dcb450d57b7befe924453d06804d83ba73 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:53 +0200 Subject: [PATCH 41/50] rv/monitor: Add the wwnr monitor Per task wakeup while not running (wwnr) monitor. This model is broken, the reason is that a task can be running in the processor without being set as RUNNABLE. Think about a task about to sleep: 1: set_current_state(TASK_UNINTERRUPTIBLE); 2: schedule(); And then imagine an IRQ happening in between the lines one and two, waking the task up. BOOM, the wakeup will happen while the task is running. Q: Why do we need this model, so? A: To test the reactors. Link: https://lkml.kernel.org/r/473c0fc39967250fdebcff8b620311c11dccad30.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- Documentation/trace/rv/index.rst | 1 + Documentation/trace/rv/monitor_wwnr.rst | 45 +++++++++++++ include/trace/events/rv.h | 12 ++++ kernel/trace/rv/Kconfig | 12 ++++ kernel/trace/rv/Makefile | 1 + kernel/trace/rv/monitors/wwnr/wwnr.c | 87 +++++++++++++++++++++++++ kernel/trace/rv/monitors/wwnr/wwnr.h | 46 +++++++++++++ tools/verification/models/wwnr.dot | 16 +++++ 8 files changed, 220 insertions(+) create mode 100644 Documentation/trace/rv/monitor_wwnr.rst create mode 100644 kernel/trace/rv/monitors/wwnr/wwnr.c create mode 100644 kernel/trace/rv/monitors/wwnr/wwnr.h create mode 100644 tools/verification/models/wwnr.dot diff --git a/Documentation/trace/rv/index.rst b/Documentation/trace/rv/index.rst index 4cb71ed628b8e..15fa966102c01 100644 --- a/Documentation/trace/rv/index.rst +++ b/Documentation/trace/rv/index.rst @@ -11,3 +11,4 @@ Runtime Verification da_monitor_synthesis.rst da_monitor_instrumentation.rst monitor_wip.rst + monitor_wwnr.rst diff --git a/Documentation/trace/rv/monitor_wwnr.rst b/Documentation/trace/rv/monitor_wwnr.rst new file mode 100644 index 0000000000000..80f1777b85aac --- /dev/null +++ b/Documentation/trace/rv/monitor_wwnr.rst @@ -0,0 +1,45 @@ +Monitor wwnr +============ + +- Name: wwrn - wakeup while not running +- Type: per-task deterministic automaton +- Author: Daniel Bristot de Oliveira + +Description +----------- + +This is a per-task sample monitor, with the following +definition:: + + | + | + v + wakeup +-------------+ + +--------- | | + | | not_running | + +--------> | | <+ + +-------------+ | + | | + | switch_in | switch_out + v | + +-------------+ | + | running | -+ + +-------------+ + +This model is borken, the reason is that a task can be running +in the processor without being set as RUNNABLE. Think about a +task about to sleep:: + + 1: set_current_state(TASK_UNINTERRUPTIBLE); + 2: schedule(); + +And then imagine an IRQ happening in between the lines one and two, +waking the task up. BOOM, the wakeup will happen while the task is +running. + +- Why do we need this model, so? +- To test the reactors. + +Specification +------------- +Grapviz Dot file in tools/verification/models/wwnr.dot diff --git a/include/trace/events/rv.h b/include/trace/events/rv.h index e972f27d8df39..56592da9301c0 100644 --- a/include/trace/events/rv.h +++ b/include/trace/events/rv.h @@ -122,6 +122,18 @@ DECLARE_EVENT_CLASS(error_da_monitor_id, __entry->event, __entry->state) ); + +#ifdef CONFIG_RV_MON_WWNR +/* id is the pid of the task */ +DEFINE_EVENT(event_da_monitor_id, event_wwnr, + TP_PROTO(int id, char *state, char *event, char *next_state, bool final_state), + TP_ARGS(id, state, event, next_state, final_state)); + +DEFINE_EVENT(error_da_monitor_id, error_wwnr, + TP_PROTO(int id, char *state, char *event), + TP_ARGS(id, state, event)); +#endif /* CONFIG_RV_MON_WWNR */ + #endif /* CONFIG_DA_MON_EVENTS_ID */ #endif /* _TRACE_RV_H */ diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index e50f3346164aa..b259d6e8dc7ce 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -38,6 +38,18 @@ config RV_MON_WIP For further information, see: Documentation/trace/rv/monitor_wip.rst +config RV_MON_WWNR + depends on RV + select DA_MON_EVENTS_ID + bool "wwnr monitor" + help + Enable wwnr (wakeup while not running) sample monitor, this is a + sample monitor that illustrates the usage of per-task monitor. + The model is borken on purpose: it serves to test reactors. + + For further information, see: + Documentation/trace/rv/monitor_wwnr.rst + config RV_REACTORS bool "Runtime verification reactors" default y diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index b41109d2750a1..af0ff9a464184 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -3,3 +3,4 @@ obj-$(CONFIG_RV) += rv.o obj-$(CONFIG_RV_REACTORS) += rv_reactors.o obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o +obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.c b/kernel/trace/rv/monitors/wwnr/wwnr.c new file mode 100644 index 0000000000000..599225d9cf382 --- /dev/null +++ b/kernel/trace/rv/monitors/wwnr/wwnr.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME "wwnr" + +#include +#include + +#include "wwnr.h" + +struct rv_monitor rv_wwnr; +DECLARE_DA_MON_PER_TASK(wwnr, unsigned char); + +static void handle_switch(void *data, bool preempt, struct task_struct *p, + struct task_struct *n, unsigned int prev_state) +{ + /* start monitoring only after the first suspension */ + if (prev_state == TASK_INTERRUPTIBLE) + da_handle_start_event_wwnr(p, switch_out_wwnr); + else + da_handle_event_wwnr(p, switch_out_wwnr); + + da_handle_event_wwnr(n, switch_in_wwnr); +} + +static void handle_wakeup(void *data, struct task_struct *p) +{ + da_handle_event_wwnr(p, wakeup_wwnr); +} + +static int enable_wwnr(void) +{ + int retval; + + retval = da_monitor_init_wwnr(); + if (retval) + return retval; + + rv_attach_trace_probe("wwnr", sched_switch, handle_switch); + rv_attach_trace_probe("wwnr", sched_wakeup, handle_wakeup); + + return 0; +} + +static void disable_wwnr(void) +{ + rv_wwnr.enabled = 0; + + rv_detach_trace_probe("wwnr", sched_switch, handle_switch); + rv_detach_trace_probe("wwnr", sched_wakeup, handle_wakeup); + + da_monitor_destroy_wwnr(); +} + +struct rv_monitor rv_wwnr = { + .name = "wwnr", + .description = "wakeup while not running per-task testing model.", + .enable = enable_wwnr, + .disable = disable_wwnr, + .reset = da_monitor_reset_all_wwnr, + .enabled = 0, +}; + +static int register_wwnr(void) +{ + rv_register_monitor(&rv_wwnr); + return 0; +} + +static void unregister_wwnr(void) +{ + rv_unregister_monitor(&rv_wwnr); +} + +module_init(register_wwnr); +module_exit(unregister_wwnr); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Bristot de Oliveira "); +MODULE_DESCRIPTION("wwnr: wakeup while not running monitor"); diff --git a/kernel/trace/rv/monitors/wwnr/wwnr.h b/kernel/trace/rv/monitors/wwnr/wwnr.h new file mode 100644 index 0000000000000..d1afe55cdd4c1 --- /dev/null +++ b/kernel/trace/rv/monitors/wwnr/wwnr.h @@ -0,0 +1,46 @@ +/* + * Automatically generated C representation of wwnr automaton + * For further information about this format, see kernel documentation: + * Documentation/trace/rv/deterministic_automata.rst + */ + +enum states_wwnr { + not_running_wwnr = 0, + running_wwnr, + state_max_wwnr +}; + +#define INVALID_STATE state_max_wwnr + +enum events_wwnr { + switch_in_wwnr = 0, + switch_out_wwnr, + wakeup_wwnr, + event_max_wwnr +}; + +struct automaton_wwnr { + char *state_names[state_max_wwnr]; + char *event_names[event_max_wwnr]; + unsigned char function[state_max_wwnr][event_max_wwnr]; + unsigned char initial_state; + bool final_states[state_max_wwnr]; +}; + +struct automaton_wwnr automaton_wwnr = { + .state_names = { + "not_running", + "running" + }, + .event_names = { + "switch_in", + "switch_out", + "wakeup" + }, + .function = { + { running_wwnr, INVALID_STATE, not_running_wwnr }, + { INVALID_STATE, not_running_wwnr, INVALID_STATE }, + }, + .initial_state = not_running_wwnr, + .final_states = { 1, 0 }, +}; diff --git a/tools/verification/models/wwnr.dot b/tools/verification/models/wwnr.dot new file mode 100644 index 0000000000000..1b206e83129cd --- /dev/null +++ b/tools/verification/models/wwnr.dot @@ -0,0 +1,16 @@ +digraph state_automaton { + {node [shape = plaintext, style=invis, label=""] "__init_not_running"}; + {node [shape = ellipse] "not_running"}; + {node [shape = plaintext] "not_running"}; + {node [shape = plaintext] "running"}; + "__init_not_running" -> "not_running"; + "not_running" [label = "not_running", color = green3]; + "not_running" -> "not_running" [ label = "wakeup" ]; + "not_running" -> "running" [ label = "switch_in" ]; + "running" [label = "running"]; + "running" -> "not_running" [ label = "switch_out" ]; + { rank = min ; + "__init_not_running"; + "not_running"; + } +} From 135b881ea88566f27dd4acc5d2ed83ad418a3a69 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:54 +0200 Subject: [PATCH 42/50] rv/reactor: Add the printk reactor A reactor that printks the reaction message. Link: https://lkml.kernel.org/r/b65f18a7fd6dc6659a3008fd7b7392de3465d47b.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/Kconfig | 8 ++++++ kernel/trace/rv/Makefile | 3 ++- kernel/trace/rv/reactor_printk.c | 42 ++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 kernel/trace/rv/reactor_printk.c diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index b259d6e8dc7ce..e82d5015e6ab0 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -60,3 +60,11 @@ config RV_REACTORS on the model's execution. By default, the monitors have tracing reactions, printing the monitor output via tracepoints, but other reactions can be added (on-demand) via this interface. + +config RV_REACT_PRINTK + bool "Printk reactor" + depends on RV_REACTORS + default y + help + Enables the printk reactor. The printk reactor emits a printk() + message if an exception is found. diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index af0ff9a464184..a13c750a35c14 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_RV) += rv.o -obj-$(CONFIG_RV_REACTORS) += rv_reactors.o obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o +obj-$(CONFIG_RV_REACTORS) += rv_reactors.o +obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o diff --git a/kernel/trace/rv/reactor_printk.c b/kernel/trace/rv/reactor_printk.c new file mode 100644 index 0000000000000..31899f953af4e --- /dev/null +++ b/kernel/trace/rv/reactor_printk.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira + * + * Printk RV reactor: + * Prints the exception msg to the kernel message log. + */ +#include +#include +#include +#include +#include +#include + +static void rv_printk_reaction(char *msg) +{ + printk_deferred(msg); +} + +static struct rv_reactor rv_printk = { + .name = "printk", + .description = "prints the exception msg to the kernel message log.", + .react = rv_printk_reaction +}; + +static int register_react_printk(void) +{ + rv_register_reactor(&rv_printk); + return 0; +} + +static void unregister_react_printk(void) +{ + rv_unregister_reactor(&rv_printk); +} + +module_init(register_react_printk); +module_exit(unregister_react_printk); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Bristot de Oliveira"); +MODULE_DESCRIPTION("printk rv reactor: printk if an exception is hit."); From e88043c0ac16f19960048372dcffc6df7c05c5b8 Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Fri, 29 Jul 2022 11:38:55 +0200 Subject: [PATCH 43/50] rv/reactor: Add the panic reactor Sample reactor that panics the system when an exception is found. This is useful both to capture a vmcore, or to fail-safe a critical system. Link: https://lkml.kernel.org/r/729aae3aba95f35738b8f8180e626d747d1d9da2.1659052063.git.bristot@kernel.org Cc: Wim Van Sebroeck Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Will Deacon Cc: Catalin Marinas Cc: Marco Elver Cc: Dmitry Vyukov Cc: "Paul E. McKenney" Cc: Shuah Khan Cc: Gabriele Paoloni Cc: Juri Lelli Cc: Clark Williams Cc: Tao Zhou Cc: Randy Dunlap Cc: linux-doc@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-trace-devel@vger.kernel.org Signed-off-by: Daniel Bristot de Oliveira Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/Kconfig | 8 ++++++ kernel/trace/rv/Makefile | 1 + kernel/trace/rv/reactor_panic.c | 43 +++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+) create mode 100644 kernel/trace/rv/reactor_panic.c diff --git a/kernel/trace/rv/Kconfig b/kernel/trace/rv/Kconfig index e82d5015e6ab0..831779607e849 100644 --- a/kernel/trace/rv/Kconfig +++ b/kernel/trace/rv/Kconfig @@ -68,3 +68,11 @@ config RV_REACT_PRINTK help Enables the printk reactor. The printk reactor emits a printk() message if an exception is found. + +config RV_REACT_PANIC + bool "Panic reactor" + depends on RV_REACTORS + default y + help + Enables the panic reactor. The panic reactor emits a printk() + message if an exception is found and panic()s the system. diff --git a/kernel/trace/rv/Makefile b/kernel/trace/rv/Makefile index a13c750a35c14..963d14875b454 100644 --- a/kernel/trace/rv/Makefile +++ b/kernel/trace/rv/Makefile @@ -5,3 +5,4 @@ obj-$(CONFIG_RV_MON_WIP) += monitors/wip/wip.o obj-$(CONFIG_RV_MON_WWNR) += monitors/wwnr/wwnr.o obj-$(CONFIG_RV_REACTORS) += rv_reactors.o obj-$(CONFIG_RV_REACT_PRINTK) += reactor_printk.o +obj-$(CONFIG_RV_REACT_PANIC) += reactor_panic.o diff --git a/kernel/trace/rv/reactor_panic.c b/kernel/trace/rv/reactor_panic.c new file mode 100644 index 0000000000000..b698d05dd0695 --- /dev/null +++ b/kernel/trace/rv/reactor_panic.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira + * + * Panic RV reactor: + * Prints the exception msg to the kernel message log and panic(). + */ + +#include +#include +#include +#include +#include +#include + +static void rv_panic_reaction(char *msg) +{ + panic(msg); +} + +static struct rv_reactor rv_panic = { + .name = "panic", + .description = "panic the system if an exception is found.", + .react = rv_panic_reaction +}; + +static int register_react_panic(void) +{ + rv_register_reactor(&rv_panic); + return 0; +} + +static void unregister_react_panic(void) +{ + rv_unregister_reactor(&rv_panic); +} + +module_init(register_react_panic); +module_exit(unregister_react_panic); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Daniel Bristot de Oliveira"); +MODULE_DESCRIPTION("panic rv reactor: panic if an exception is found."); From 4c3d2f9388d36eb28640a220a6f908328442d873 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Sun, 31 Jul 2022 01:59:28 -0400 Subject: [PATCH 44/50] tracing: Use a struct alignof to determine trace event field alignment alignof() gives an alignment of types as they would be as standalone variables. But alignment in structures might be different, and when building the fields of events, the alignment must be the actual alignment otherwise the field offsets may not match what they actually are. This caused trace-cmd to crash, as libtraceevent did not check if the field offset was bigger than the event. The write_msr and read_msr events on 32 bit had their fields incorrect, because it had a u64 field between two ints. alignof(u64) would give 8, but the u64 field was at a 4 byte alignment. Define a macro as: ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b))) which gives the actual alignment of types in a structure. Link: https://lkml.kernel.org/r/20220731015928.7ab3a154@rorschach.local.home Cc: Ingo Molnar Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 04ae87a52074e ("ftrace: Rework event_create_dir()") Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage4_event_fields.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index c3790ec7a4530..80d34f3965555 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -2,16 +2,18 @@ /* Stage 4 definitions for creating trace events */ +#define ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b))) + #undef __field_ext #define __field_ext(_type, _item, _filter_type) { \ .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ + .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ .is_signed = is_signed_type(_type), .filter_type = _filter_type }, #undef __field_struct_ext #define __field_struct_ext(_type, _item, _filter_type) { \ .type = #_type, .name = #_item, \ - .size = sizeof(_type), .align = __alignof__(_type), \ + .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type), \ 0, .filter_type = _filter_type }, #undef __field @@ -23,7 +25,7 @@ #undef __array #define __array(_type, _item, _len) { \ .type = #_type"["__stringify(_len)"]", .name = #_item, \ - .size = sizeof(_type[_len]), .align = __alignof__(_type), \ + .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \ .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER }, #undef __dynamic_array From 59927cbe3f30c5ed7fc5d33487ef06611394a548 Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Wed, 20 Jul 2022 10:46:48 +0800 Subject: [PATCH 45/50] tracing: Use free_trace_buffer() in allocate_trace_buffers() In allocate_trace_buffers(), if allocating tr->max_buffer fails, we can directly call free_trace_buffer to free tr->array_buffer. Link: https://lkml.kernel.org/r/65f0702d-07f6-08de-2a07-4c50af56a67b@huawei.com Signed-off-by: Zhiqiang Liu Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 301305ec134b6..27febd4ee33eb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -9101,6 +9101,16 @@ allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size return 0; } +static void free_trace_buffer(struct array_buffer *buf) +{ + if (buf->buffer) { + ring_buffer_free(buf->buffer); + buf->buffer = NULL; + free_percpu(buf->data); + buf->data = NULL; + } +} + static int allocate_trace_buffers(struct trace_array *tr, int size) { int ret; @@ -9113,10 +9123,7 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) ret = allocate_trace_buffer(tr, &tr->max_buffer, allocate_snapshot ? size : 1); if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) { - ring_buffer_free(tr->array_buffer.buffer); - tr->array_buffer.buffer = NULL; - free_percpu(tr->array_buffer.data); - tr->array_buffer.data = NULL; + free_trace_buffer(&tr->array_buffer); return -ENOMEM; } tr->allocated_snapshot = allocate_snapshot; @@ -9131,16 +9138,6 @@ static int allocate_trace_buffers(struct trace_array *tr, int size) return 0; } -static void free_trace_buffer(struct array_buffer *buf) -{ - if (buf->buffer) { - ring_buffer_free(buf->buffer); - buf->buffer = NULL; - free_percpu(buf->data); - buf->data = NULL; - } -} - static void free_trace_buffers(struct trace_array *tr) { if (!tr) From 170ab26b01d7f445c46261eff69341dab7e50a24 Mon Sep 17 00:00:00 2001 From: Li zeming Date: Thu, 21 Jul 2022 16:19:04 +0800 Subject: [PATCH 46/50] tracepoints: It is CONFIG_TRACEPOINTS not CONFIG_TRACEPOINT When reading this note, CONFIG_TRACEPOINT searches my configuration file, and the result is CONFIG_TRACEPOINTS, the search results are consistent with the following macro definitions. I think it should be repaired. Link: https://lkml.kernel.org/r/20220721081904.3798-1-zeming@nfschina.com Signed-off-by: Li zeming Signed-off-by: Steven Rostedt (Google) --- include/linux/tracepoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 28031b15f8783..2908cc5ed70e1 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -151,7 +151,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p) /* * Individual subsystem my have a separate configuration to * enable their tracepoints. By default, this file will create - * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem + * the tracepoints if CONFIG_TRACEPOINTS is defined. If a subsystem * wants to be able to disable its tracepoints from being created * it can define NOTRACE before including the tracepoint headers. */ From 95522f0b18a059afa5aca036aa454c98beb553b5 Mon Sep 17 00:00:00 2001 From: Slark Xiao Date: Fri, 22 Jul 2022 18:29:07 +0800 Subject: [PATCH 47/50] scripts/tracing: Fix typo 'the the' in comment Replace 'the the' with 'the' in the comment. Link: https://lkml.kernel.org/r/20220722102907.81949-1-slark_xiao@163.com Signed-off-by: Slark Xiao Signed-off-by: Steven Rostedt (Google) --- scripts/tracing/draw_functrace.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py index 7011fbe003ff2..438516bdfb3cb 100755 --- a/scripts/tracing/draw_functrace.py +++ b/scripts/tracing/draw_functrace.py @@ -8,7 +8,7 @@ kernel/trace/trace_functions.c The resulted trace is processed into a tree to produce a more human view of the call stack by drawing textual but hierarchical tree of -calls. Only the functions's names and the the call time are provided. +calls. Only the functions's names and the call time are provided. Usage: Be sure that you have CONFIG_FUNCTION_TRACER From 2f63e5d2e391837c70741311b5b70d2fbd15d138 Mon Sep 17 00:00:00 2001 From: "Masami Hiramatsu (Google)" Date: Mon, 1 Aug 2022 11:32:15 +0900 Subject: [PATCH 48/50] tracing/eprobe: Show syntax error logs in error_log file Show the syntax errors for event probes in error_log file as same as other dynamic events, so that user can understand what is the problem. Link: https://lkml.kernel.org/r/165932113556.2850673.3483079297896607612.stgit@devnote2 Signed-off-by: Masami Hiramatsu (Google) Signed-off-by: Steven Rostedt (Google) --- kernel/trace/trace_eprobe.c | 11 +++++++++-- kernel/trace/trace_probe.h | 5 ++++- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/kernel/trace/trace_eprobe.c b/kernel/trace/trace_eprobe.c index a30f21499e812..4a0e9d927443c 100644 --- a/kernel/trace/trace_eprobe.c +++ b/kernel/trace/trace_eprobe.c @@ -839,8 +839,11 @@ static int trace_eprobe_tp_update_arg(struct trace_eprobe *ep, const char *argv[ if (ret) return ret; - if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) + if (ep->tp.args[i].code->op == FETCH_OP_TP_ARG) { ret = trace_eprobe_tp_arg_update(ep, i); + if (ret) + trace_probe_log_err(0, BAD_ATTACH_ARG); + } return ret; } @@ -880,8 +883,10 @@ static int __trace_eprobe_create(int argc, const char *argv[]) trace_probe_log_set_index(1); sys_event = argv[1]; ret = traceprobe_parse_event_name(&sys_event, &sys_name, buf2, 0); - if (!sys_event || !sys_name) + if (!sys_event || !sys_name) { + trace_probe_log_err(0, NO_EVENT_INFO); goto parse_error; + } if (!event) { strscpy(buf1, argv[1], MAX_EVENT_NAME_LEN); @@ -896,6 +901,8 @@ static int __trace_eprobe_create(int argc, const char *argv[]) if (IS_ERR(ep)) { ret = PTR_ERR(ep); + if (ret == -ENODEV) + trace_probe_log_err(0, BAD_ATTACH_EVENT); /* This must return -ENOMEM or missing event, else there is a bug */ WARN_ON_ONCE(ret != -ENOMEM && ret != -ENODEV); ep = NULL; diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h index 92cc149af0fde..3b3869ae8cfd1 100644 --- a/kernel/trace/trace_probe.h +++ b/kernel/trace/trace_probe.h @@ -442,7 +442,10 @@ extern int traceprobe_define_arg_fields(struct trace_event_call *event_call, C(FAIL_REG_PROBE, "Failed to register probe event"),\ C(DIFF_PROBE_TYPE, "Probe type is different from existing probe"),\ C(DIFF_ARG_TYPE, "Argument type or name is different from existing probe"),\ - C(SAME_PROBE, "There is already the exact same probe event"), + C(SAME_PROBE, "There is already the exact same probe event"),\ + C(NO_EVENT_INFO, "This requires both group and event name to attach"),\ + C(BAD_ATTACH_EVENT, "Attached event does not exist"),\ + C(BAD_ATTACH_ARG, "Attached event does not have this field"), #undef C #define C(a, b) TP_ERR_##a From 09794a5a6c348f629b35fc1687071a1622ef4265 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Google)" Date: Tue, 2 Aug 2022 15:44:12 -0400 Subject: [PATCH 49/50] tracing: Use alignof__(struct {type b;}) instead of offsetof() Simplify: #define ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b))) with #define ALIGN_STRUCTFIELD(type) __alignof__(struct {type b;}) Which works just the same. Link: https://lore.kernel.org/all/a7d202457150472588df0bd3b7334b3f@AcuMS.aculab.com/ Link: https://lkml.kernel.org/r/20220802154412.513c50e3@gandalf.local.home Suggested-by: David Laight Signed-off-by: Steven Rostedt (Google) --- include/trace/stages/stage4_event_fields.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h index 80d34f3965555..a8fb25f39a99d 100644 --- a/include/trace/stages/stage4_event_fields.h +++ b/include/trace/stages/stage4_event_fields.h @@ -2,7 +2,7 @@ /* Stage 4 definitions for creating trace events */ -#define ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b))) +#define ALIGN_STRUCTFIELD(type) ((int)(__alignof__(struct {type b;}))) #undef __field_ext #define __field_ext(_type, _item, _filter_type) { \ From f1a15b977ff864513133ecb611eb28603d32c1b4 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 4 Aug 2022 17:33:48 +0300 Subject: [PATCH 50/50] rv: Unlock on error path in rv_unregister_reactor() Unlock the "rv_interface_lock" mutex before returning. Link: https://lkml.kernel.org/r/YuvYzNfGMgV+PIhd@kili Fixes: 04acadcb4453 ("rv: Add runtime reactors interface") Signed-off-by: Dan Carpenter Signed-off-by: Steven Rostedt (Google) --- kernel/trace/rv/rv_reactors.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/trace/rv/rv_reactors.c b/kernel/trace/rv/rv_reactors.c index a6522c1963821..6aae106695b65 100644 --- a/kernel/trace/rv/rv_reactors.c +++ b/kernel/trace/rv/rv_reactors.c @@ -329,6 +329,7 @@ int rv_register_reactor(struct rv_reactor *reactor) int rv_unregister_reactor(struct rv_reactor *reactor) { struct rv_reactor_def *ptr, *next; + int ret = 0; mutex_lock(&rv_interface_lock); @@ -343,13 +344,14 @@ int rv_unregister_reactor(struct rv_reactor *reactor) ptr->reactor->name, ptr->counter); printk(KERN_WARNING "rv: the rv_reactor %s cannot be removed\n", ptr->reactor->name); - return -EBUSY; + ret = -EBUSY; + break; } } } mutex_unlock(&rv_interface_lock); - return 0; + return ret; } /*