From c504e5c2f9648a1e5c2be01e8c3f59d394192bd3 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:26 +0800 Subject: [PATCH 1/3] net: skb: introduce kfree_skb_reason() Introduce the interface kfree_skb_reason(), which is able to pass the reason why the skb is dropped to 'kfree_skb' tracepoint. Add the 'reason' field to 'trace_kfree_skb', therefor user can get more detail information about abnormal skb with 'drop_monitor' or eBPF. All drop reasons are defined in the enum 'skb_drop_reason', and they will be print as string in 'kfree_skb' tracepoint in format of 'reason: XXX'. ( Maybe the reasons should be defined in a uapi header file, so that user space can use them? ) Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 23 ++++++++++++++++++++++- include/trace/events/skb.h | 36 +++++++++++++++++++++++++++++------- net/core/dev.c | 3 ++- net/core/drop_monitor.c | 10 +++++++--- net/core/skbuff.c | 12 +++++++----- 5 files changed, 67 insertions(+), 17 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 642acb0d1646b..ef0870abc791c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -305,6 +305,17 @@ struct sk_buff_head { struct sk_buff; +/* The reason of skb drop, which is used in kfree_skb_reason(). + * en...maybe they should be splited by group? + * + * Each item here should also be in 'TRACE_SKB_DROP_REASON', which is + * used to translate the reason to string. + */ +enum skb_drop_reason { + SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_MAX, +}; + /* To allow 64K frame to be packed as single skb without frag_list we * require 64K/PAGE_SIZE pages plus 1 additional page to allow for * buffers which do not start on a page boundary. @@ -1085,8 +1096,18 @@ static inline bool skb_unref(struct sk_buff *skb) return true; } +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason); + +/** + * kfree_skb - free an sk_buff with 'NOT_SPECIFIED' reason + * @skb: buffer to free + */ +static inline void kfree_skb(struct sk_buff *skb) +{ + kfree_skb_reason(skb, SKB_DROP_REASON_NOT_SPECIFIED); +} + void skb_release_head_state(struct sk_buff *skb); -void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 9e92f22eb086c..294c61bbe44b6 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -9,29 +9,51 @@ #include #include +#define TRACE_SKB_DROP_REASON \ + EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ + EMe(SKB_DROP_REASON_MAX, MAX) + +#undef EM +#undef EMe + +#define EM(a, b) TRACE_DEFINE_ENUM(a); +#define EMe(a, b) TRACE_DEFINE_ENUM(a); + +TRACE_SKB_DROP_REASON + +#undef EM +#undef EMe +#define EM(a, b) { a, #b }, +#define EMe(a, b) { a, #b } + /* * Tracepoint for free an sk_buff: */ TRACE_EVENT(kfree_skb, - TP_PROTO(struct sk_buff *skb, void *location), + TP_PROTO(struct sk_buff *skb, void *location, + enum skb_drop_reason reason), - TP_ARGS(skb, location), + TP_ARGS(skb, location, reason), TP_STRUCT__entry( - __field( void *, skbaddr ) - __field( void *, location ) - __field( unsigned short, protocol ) + __field(void *, skbaddr) + __field(void *, location) + __field(unsigned short, protocol) + __field(enum skb_drop_reason, reason) ), TP_fast_assign( __entry->skbaddr = skb; __entry->location = location; __entry->protocol = ntohs(skb->protocol); + __entry->reason = reason; ), - TP_printk("skbaddr=%p protocol=%u location=%p", - __entry->skbaddr, __entry->protocol, __entry->location) + TP_printk("skbaddr=%p protocol=%u location=%p reason: %s", + __entry->skbaddr, __entry->protocol, __entry->location, + __print_symbolic(__entry->reason, + TRACE_SKB_DROP_REASON)) ); TRACE_EVENT(consume_skb, diff --git a/net/core/dev.c b/net/core/dev.c index 83a4089990a0a..84a0d9542fe94 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4899,7 +4899,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED)) trace_consume_skb(skb); else - trace_kfree_skb(skb, net_tx_action); + trace_kfree_skb(skb, net_tx_action, + SKB_DROP_REASON_NOT_SPECIFIED); if (skb->fclone != SKB_FCLONE_UNAVAILABLE) __kfree_skb(skb); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 3d0ab2eec9166..7b288a121a41a 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -110,7 +110,8 @@ static u32 net_dm_queue_len = 1000; struct net_dm_alert_ops { void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, - void *location); + void *location, + enum skb_drop_reason reason); void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, int work, int budget); void (*work_item_func)(struct work_struct *work); @@ -262,7 +263,9 @@ static void trace_drop_common(struct sk_buff *skb, void *location) spin_unlock_irqrestore(&data->lock, flags); } -static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, void *location) +static void trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, + void *location, + enum skb_drop_reason reason) { trace_drop_common(skb, location); } @@ -490,7 +493,8 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = { static void net_dm_packet_trace_kfree_skb_hit(void *ignore, struct sk_buff *skb, - void *location) + void *location, + enum skb_drop_reason reason) { ktime_t tstamp = ktime_get_real(); struct per_cpu_dm_data *data; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e514a36bcffcc..0118f0afaa4fc 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -759,21 +759,23 @@ void __kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(__kfree_skb); /** - * kfree_skb - free an sk_buff + * kfree_skb_reason - free an sk_buff with special reason * @skb: buffer to free + * @reason: reason why this skb is dropped * * Drop a reference to the buffer and free it if the usage count has - * hit zero. + * hit zero. Meanwhile, pass the drop reason to 'kfree_skb' + * tracepoint. */ -void kfree_skb(struct sk_buff *skb) +void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) { if (!skb_unref(skb)) return; - trace_kfree_skb(skb, __builtin_return_address(0)); + trace_kfree_skb(skb, __builtin_return_address(0), reason); __kfree_skb(skb); } -EXPORT_SYMBOL(kfree_skb); +EXPORT_SYMBOL(kfree_skb_reason); void kfree_skb_list(struct sk_buff *segs) { From 85125597419aec3aa7b8f3b8713e415f997796f2 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:27 +0800 Subject: [PATCH 2/3] net: skb: use kfree_skb_reason() in tcp_v4_rcv() Replace kfree_skb() with kfree_skb_reason() in tcp_v4_rcv(). Following drop reasons are added: SKB_DROP_REASON_NO_SOCKET SKB_DROP_REASON_PKT_TOO_SMALL SKB_DROP_REASON_TCP_CSUM SKB_DROP_REASON_TCP_FILTER After this patch, 'kfree_skb' event will print message like this: $ TASK-PID CPU# ||||| TIMESTAMP FUNCTION $ | | | ||||| | | -0 [000] ..s1. 36.113438: kfree_skb: skbaddr=(____ptrval____) protocol=2048 location=(____ptrval____) reason: NO_SOCKET The reason of skb drop is printed too. Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 4 ++++ include/trace/events/skb.h | 4 ++++ net/ipv4/tcp_ipv4.c | 14 +++++++++++--- 3 files changed, 19 insertions(+), 3 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ef0870abc791c..c9c97b0d0fe9e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -313,6 +313,10 @@ struct sk_buff; */ enum skb_drop_reason { SKB_DROP_REASON_NOT_SPECIFIED, + SKB_DROP_REASON_NO_SOCKET, + SKB_DROP_REASON_PKT_TOO_SMALL, + SKB_DROP_REASON_TCP_CSUM, + SKB_DROP_REASON_TCP_FILTER, SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index 294c61bbe44b6..faa7d068a7bc0 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -11,6 +11,10 @@ #define TRACE_SKB_DROP_REASON \ EM(SKB_DROP_REASON_NOT_SPECIFIED, NOT_SPECIFIED) \ + EM(SKB_DROP_REASON_NO_SOCKET, NO_SOCKET) \ + EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ + EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ + EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9861786b83365..b3f34e366b27f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1971,8 +1971,10 @@ int tcp_v4_rcv(struct sk_buff *skb) const struct tcphdr *th; bool refcounted; struct sock *sk; + int drop_reason; int ret; + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; @@ -1984,8 +1986,10 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; - if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) + if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; goto bad_packet; + } if (!pskb_may_pull(skb, th->doff * 4)) goto discard_it; @@ -2090,8 +2094,10 @@ int tcp_v4_rcv(struct sk_buff *skb) nf_reset_ct(skb); - if (tcp_filter(sk, skb)) + if (tcp_filter(sk, skb)) { + drop_reason = SKB_DROP_REASON_TCP_FILTER; goto discard_and_relse; + } th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); @@ -2124,6 +2130,7 @@ int tcp_v4_rcv(struct sk_buff *skb) return ret; no_tcp_socket: + drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; @@ -2131,6 +2138,7 @@ int tcp_v4_rcv(struct sk_buff *skb) if (tcp_checksum_complete(skb)) { csum_error: + drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -2141,7 +2149,7 @@ int tcp_v4_rcv(struct sk_buff *skb) discard_it: /* Discard frame. */ - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; discard_and_relse: From 1c7fab70df085d866a3765955f397ca2b4025b15 Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sun, 9 Jan 2022 14:36:28 +0800 Subject: [PATCH 3/3] net: skb: use kfree_skb_reason() in __udp4_lib_rcv() Replace kfree_skb() with kfree_skb_reason() in __udp4_lib_rcv. New drop reason 'SKB_DROP_REASON_UDP_CSUM' is added for udp csum error. Signed-off-by: Menglong Dong Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 1 + include/trace/events/skb.h | 1 + net/ipv4/udp.c | 10 ++++++++-- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c9c97b0d0fe9e..af64c7de9b536 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -317,6 +317,7 @@ enum skb_drop_reason { SKB_DROP_REASON_PKT_TOO_SMALL, SKB_DROP_REASON_TCP_CSUM, SKB_DROP_REASON_TCP_FILTER, + SKB_DROP_REASON_UDP_CSUM, SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index faa7d068a7bc0..3e042ca2cedb5 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -15,6 +15,7 @@ EM(SKB_DROP_REASON_PKT_TOO_SMALL, PKT_TOO_SMALL) \ EM(SKB_DROP_REASON_TCP_CSUM, TCP_CSUM) \ EM(SKB_DROP_REASON_TCP_FILTER, TCP_FILTER) \ + EM(SKB_DROP_REASON_UDP_CSUM, UDP_CSUM) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c2a4411d2b04d..464590ea922e1 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2411,6 +2411,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __be32 saddr, daddr; struct net *net = dev_net(skb->dev); bool refcounted; + int drop_reason; + + drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; /* * Validate the packet. @@ -2466,6 +2469,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp_lib_checksum_complete(skb)) goto csum_error; + drop_reason = SKB_DROP_REASON_NO_SOCKET; __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -2473,10 +2477,11 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * Hmm. We got an UDP packet to a port to which we * don't wanna listen. Ignore it. */ - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; short_packet: + drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), @@ -2489,6 +2494,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ + drop_reason = SKB_DROP_REASON_UDP_CSUM; net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", proto == IPPROTO_UDPLITE ? "Lite" : "", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), @@ -2496,7 +2502,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); drop: __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return 0; }