From 8fcb76b934daff12cde76adeab3d502eeb0734b1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Mar 2023 23:50:18 +0000 Subject: [PATCH 1/4] net: napi_schedule_rps() cleanup napi_schedule_rps() return value is ignored, remove it. Change the comment to clarify the intent. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Tested-by: Jason Xing Signed-off-by: Paolo Abeni --- net/core/dev.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 7172334a418fd..f7050b95d1250 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4582,11 +4582,18 @@ static void trigger_rx_softirq(void *data) } /* - * Check if this softnet_data structure is another cpu one - * If yes, queue it to our IPI list and return 1 - * If no, return 0 + * After we queued a packet into sd->input_pkt_queue, + * we need to make sure this queue is serviced soon. + * + * - If this is another cpu queue, link it to our rps_ipi_list, + * and make sure we will process rps_ipi_list from net_rx_action(). + * As we do not know yet if we are called from net_rx_action(), + * we have to raise NET_RX_SOFTIRQ. This might change in the future. + * + * - If this is our own queue, NAPI schedule our backlog. + * Note that this also raises NET_RX_SOFTIRQ. */ -static int napi_schedule_rps(struct softnet_data *sd) +static void napi_schedule_rps(struct softnet_data *sd) { struct softnet_data *mysd = this_cpu_ptr(&softnet_data); @@ -4596,11 +4603,10 @@ static int napi_schedule_rps(struct softnet_data *sd) mysd->rps_ipi_list = sd; __raise_softirq_irqoff(NET_RX_SOFTIRQ); - return 1; + return; } #endif /* CONFIG_RPS */ __napi_schedule_irqoff(&mysd->backlog); - return 0; } #ifdef CONFIG_NET_FLOW_LIMIT From c59647c0dc679008886756a888368da1c6d4ccd3 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Mar 2023 23:50:19 +0000 Subject: [PATCH 2/4] net: add softnet_data.in_net_rx_action We want to make two optimizations in napi_schedule_rps() and ____napi_schedule() which require to know if these helpers are called from net_rx_action(), instead of being called from other contexts. sd.in_net_rx_action is only read/written by the owning cpu. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Tested-by: Jason Xing Signed-off-by: Paolo Abeni --- include/linux/netdevice.h | 1 + net/core/dev.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 18a5be6ddd0f7..c8c634091a659 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3188,6 +3188,7 @@ struct softnet_data { #ifdef CONFIG_RPS struct softnet_data *rps_ipi_list; #endif + bool in_net_rx_action; #ifdef CONFIG_NET_FLOW_LIMIT struct sd_flow_limit __rcu *flow_limit; #endif diff --git a/net/core/dev.c b/net/core/dev.c index f7050b95d1250..15331edbacf4c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6646,6 +6646,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) LIST_HEAD(list); LIST_HEAD(repoll); + sd->in_net_rx_action = true; local_irq_disable(); list_splice_init(&sd->poll_list, &list); local_irq_enable(); @@ -6656,6 +6657,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) skb_defer_free_flush(sd); if (list_empty(&list)) { + sd->in_net_rx_action = false; if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) goto end; break; @@ -6682,6 +6684,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) __raise_softirq_irqoff(NET_RX_SOFTIRQ); + else + sd->in_net_rx_action = false; net_rps_action_and_irq_enable(sd); end:; From 821eba962d95806beb0440742c4062a9da8a386b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Mar 2023 23:50:20 +0000 Subject: [PATCH 3/4] net: optimize napi_schedule_rps() Based on initial patch from Jason Xing. Idea is to not raise NET_RX_SOFTIRQ from napi_schedule_rps() when we queued a packet into another cpu backlog. We can do this only in the context of us being called indirectly from net_rx_action(), to have the guarantee our rps_ipi_list will be processed before we exit from net_rx_action(). Link: https://lore.kernel.org/lkml/20230325152417.5403-1-kerneljasonxing@gmail.com/ Signed-off-by: Eric Dumazet Cc: Jason Xing Reviewed-by: Jason Xing Tested-by: Jason Xing Signed-off-by: Paolo Abeni --- net/core/dev.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 15331edbacf4c..f34ce93f2f02e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4587,8 +4587,6 @@ static void trigger_rx_softirq(void *data) * * - If this is another cpu queue, link it to our rps_ipi_list, * and make sure we will process rps_ipi_list from net_rx_action(). - * As we do not know yet if we are called from net_rx_action(), - * we have to raise NET_RX_SOFTIRQ. This might change in the future. * * - If this is our own queue, NAPI schedule our backlog. * Note that this also raises NET_RX_SOFTIRQ. @@ -4602,7 +4600,11 @@ static void napi_schedule_rps(struct softnet_data *sd) sd->rps_ipi_next = mysd->rps_ipi_list; mysd->rps_ipi_list = sd; - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + /* If not called from net_rx_action() + * we have to raise NET_RX_SOFTIRQ. + */ + if (!mysd->in_net_rx_action) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); return; } #endif /* CONFIG_RPS */ From 8b43fd3d1d7d88293eb15e92090826e6b7cc13e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Mar 2023 23:50:21 +0000 Subject: [PATCH 4/4] net: optimize ____napi_schedule() to avoid extra NET_RX_SOFTIRQ ____napi_schedule() adds a napi into current cpu softnet_data poll_list, then raises NET_RX_SOFTIRQ to make sure net_rx_action() will process it. Idea of this patch is to not raise NET_RX_SOFTIRQ when being called indirectly from net_rx_action(), because we can process poll_list from this point, without going to full softirq loop. This needs a change in net_rx_action() to make sure we restart its main loop if sd->poll_list was updated without NET_RX_SOFTIRQ being raised. Signed-off-by: Eric Dumazet Cc: Jason Xing Reviewed-by: Jason Xing Tested-by: Jason Xing Signed-off-by: Paolo Abeni --- net/core/dev.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index f34ce93f2f02e..0c4b21291348d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4360,7 +4360,11 @@ static inline void ____napi_schedule(struct softnet_data *sd, } list_add_tail(&napi->poll_list, &sd->poll_list); - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + /* If not called from net_rx_action() + * we have to raise NET_RX_SOFTIRQ. + */ + if (!sd->in_net_rx_action) + __raise_softirq_irqoff(NET_RX_SOFTIRQ); } #ifdef CONFIG_RPS @@ -6648,6 +6652,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) LIST_HEAD(list); LIST_HEAD(repoll); +start: sd->in_net_rx_action = true; local_irq_disable(); list_splice_init(&sd->poll_list, &list); @@ -6659,9 +6664,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h) skb_defer_free_flush(sd); if (list_empty(&list)) { - sd->in_net_rx_action = false; - if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) - goto end; + if (list_empty(&repoll)) { + sd->in_net_rx_action = false; + barrier(); + /* We need to check if ____napi_schedule() + * had refilled poll_list while + * sd->in_net_rx_action was true. + */ + if (!list_empty(&sd->poll_list)) + goto start; + if (!sd_has_rps_ipi_waiting(sd)) + goto end; + } break; }