From 59f09ae8fac4a990070fc6bdc889d0e0118664ea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Sep 2021 18:03:32 -0700 Subject: [PATCH 1/2] net: snmp: inline snmp_get_cpu_field() This trivial function is called ~90,000 times on 256 cpus hosts, when reading /proc/net/netstat. And this number keeps inflating. Inlining it saves many cycles. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/ip.h | 6 +++++- net/ipv4/af_inet.c | 6 ------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/include/net/ip.h b/include/net/ip.h index 9192444f2964e..cf229a5311942 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -291,7 +291,11 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, #define NET_ADD_STATS(net, field, adnd) SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) #define __NET_ADD_STATS(net, field, adnd) __SNMP_ADD_STATS((net)->mib.net_statistics, field, adnd) -u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offct); +static inline u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt) +{ + return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt); +} + unsigned long snmp_fold_field(void __percpu *mib, int offt); #if BITS_PER_LONG==32 u64 snmp_get_cpu_field64(void __percpu *mib, int cpu, int offct, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2fc6074583a41..8eb428387bac2 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1662,12 +1662,6 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family, } EXPORT_SYMBOL_GPL(inet_ctl_sock_create); -u64 snmp_get_cpu_field(void __percpu *mib, int cpu, int offt) -{ - return *(((unsigned long *)per_cpu_ptr(mib, cpu)) + offt); -} -EXPORT_SYMBOL_GPL(snmp_get_cpu_field); - unsigned long snmp_fold_field(void __percpu *mib, int offt) { unsigned long res = 0; From acbd0c8144138b6e652240bba248910d330d71bf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 29 Sep 2021 18:03:33 -0700 Subject: [PATCH 2/2] mptcp: use batch snmp operations in mptcp_seq_show() Using snmp_get_cpu_field_batch() allows for better cpu cache utilization, especially on hosts with large number of cpus. Also remove special handling when mptcp mibs where not yet allocated. I chose to use temporary storage on the stack to keep this patch simple. We might in the future use the storage allocated in netstat_seq_show(). Combined with prior patch (inlining snmp_get_cpu_field) time to fetch and output mptcp counters on a 256 cpu host [1] goes from 75 usec to 16 usec. [1] L1 cache size is 32KB, it is not big enough to hold all dataset. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/mptcp/mib.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index b21ff9be04c61..3240b72271a7f 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -72,6 +72,7 @@ bool mptcp_mib_alloc(struct net *net) void mptcp_seq_show(struct seq_file *seq) { + unsigned long sum[ARRAY_SIZE(mptcp_snmp_list) - 1]; struct net *net = seq->private; int i; @@ -81,17 +82,13 @@ void mptcp_seq_show(struct seq_file *seq) seq_puts(seq, "\nMPTcpExt:"); - if (!net->mib.mptcp_statistics) { - for (i = 0; mptcp_snmp_list[i].name; i++) - seq_puts(seq, " 0"); - - seq_putc(seq, '\n'); - return; - } + memset(sum, 0, sizeof(sum)); + if (net->mib.mptcp_statistics) + snmp_get_cpu_field_batch(sum, mptcp_snmp_list, + net->mib.mptcp_statistics); for (i = 0; mptcp_snmp_list[i].name; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.mptcp_statistics, - mptcp_snmp_list[i].entry)); + seq_printf(seq, " %lu", sum[i]); + seq_putc(seq, '\n'); }