Skip to content

Commit

Permalink
Merge branch 'net-group-together-hot-data'
Browse files Browse the repository at this point in the history
Eric Dumazet says:

====================
net: group together hot data

While our recent structure reorganizations were focused
on increasing max throughput, there is still an
area where improvements are much needed.

In many cases, a cpu handles one packet at a time,
instead of a nice batch.

Hardware interrupt.
 -> Software interrupt.
   -> Network/Protocol stacks.

If the cpu was idle or busy in other layers,
it has to pull many cache lines.

This series adds a new net_hotdata structure, where
some critical (and read-mostly) data used in
rx and tx path is packed in a small number of cache lines.

Synthetic benchmarks will not see much difference,
but latency of single packet should improve.

net_hodata current size on 64bit is 416 bytes,
but might grow in the future.

Also move RPS definitions to a new include file.
====================

Link: https://lore.kernel.org/r/20240306160031.874438-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Mar 8, 2024
2 parents d3423ed + ce7f49a commit e8bb2cc
Show file tree
Hide file tree
Showing 44 changed files with 391 additions and 320 deletions.
1 change: 1 addition & 0 deletions drivers/net/ethernet/intel/ice/ice_arfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/* Copyright (C) 2018-2020, Intel Corporation. */

#include "ice.h"
#include <net/rps.h>

/**
* ice_is_arfs_active - helper to check is aRFS is active
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/mellanox/mlx4/en_netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <net/ip.h>
#include <net/vxlan.h>
#include <net/devlink.h>
#include <net/rps.h>

#include <linux/mlx4/driver.h>
#include <linux/mlx4/device.h>
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#include <linux/mlx5/fs.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/rps.h>
#include "en.h"

#define ARFS_HASH_SHIFT BITS_PER_BYTE
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/sfc/rx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/iommu.h>
#include <net/rps.h>
#include "efx.h"
#include "nic.h"
#include "rx_common.h"
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/sfc/siena/rx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "net_driver.h"
#include <linux/module.h>
#include <linux/iommu.h>
#include <net/rps.h>
#include "efx.h"
#include "nic.h"
#include "rx_common.h"
Expand Down
1 change: 1 addition & 0 deletions drivers/net/tun.c
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
#include <net/ax25.h>
#include <net/rose.h>
#include <net/6lowpan.h>
#include <net/rps.h>

#include <linux/uaccess.h>
#include <linux/proc_fs.h>
Expand Down
88 changes: 0 additions & 88 deletions include/linux/netdevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,6 @@ struct net_device_core_stats {
#include <linux/cache.h>
#include <linux/skbuff.h>

#ifdef CONFIG_RPS
#include <linux/static_key.h>
extern struct static_key_false rps_needed;
extern struct static_key_false rfs_needed;
#endif

struct neighbour;
struct neigh_parms;
struct sk_buff;
Expand Down Expand Up @@ -730,86 +724,10 @@ static inline void netdev_queue_numa_node_write(struct netdev_queue *q, int node
#endif
}

#ifdef CONFIG_RPS
/*
* This structure holds an RPS map which can be of variable length. The
* map is an array of CPUs.
*/
struct rps_map {
unsigned int len;
struct rcu_head rcu;
u16 cpus[];
};
#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))

/*
* The rps_dev_flow structure contains the mapping of a flow to a CPU, the
* tail pointer for that CPU's input queue at the time of last enqueue, and
* a hardware filter index.
*/
struct rps_dev_flow {
u16 cpu;
u16 filter;
unsigned int last_qtail;
};
#define RPS_NO_FILTER 0xffff

/*
* The rps_dev_flow_table structure contains a table of flow mappings.
*/
struct rps_dev_flow_table {
unsigned int mask;
struct rcu_head rcu;
struct rps_dev_flow flows[];
};
#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
((_num) * sizeof(struct rps_dev_flow)))

/*
* The rps_sock_flow_table contains mappings of flows to the last CPU
* on which they were processed by the application (set in recvmsg).
* Each entry is a 32bit value. Upper part is the high-order bits
* of flow hash, lower part is CPU number.
* rps_cpu_mask is used to partition the space, depending on number of
* possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
* For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f,
* meaning we use 32-6=26 bits for the hash.
*/
struct rps_sock_flow_table {
u32 mask;

u32 ents[] ____cacheline_aligned_in_smp;
};
#define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))

#define RPS_NO_CPU 0xffff

extern u32 rps_cpu_mask;
extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;

static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
u32 hash)
{
if (table && hash) {
unsigned int index = hash & table->mask;
u32 val = hash & ~rps_cpu_mask;

/* We only give a hint, preemption can change CPU under us */
val |= raw_smp_processor_id();

/* The following WRITE_ONCE() is paired with the READ_ONCE()
* here, and another one in get_rps_cpu().
*/
if (READ_ONCE(table->ents[index]) != val)
WRITE_ONCE(table->ents[index], val);
}
}

#ifdef CONFIG_RFS_ACCEL
bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
u16 filter_id);
#endif
#endif /* CONFIG_RPS */

/* XPS map type and offset of the xps map within net_device->xps_maps[]. */
enum xps_map_type {
Expand Down Expand Up @@ -4793,11 +4711,6 @@ void dev_fetch_sw_netstats(struct rtnl_link_stats64 *s,
const struct pcpu_sw_netstats __percpu *netstats);
void dev_get_tstats64(struct net_device *dev, struct rtnl_link_stats64 *s);

extern int netdev_max_backlog;
extern int dev_rx_weight;
extern int dev_tx_weight;
extern int gro_normal_batch;

enum {
NESTED_SYNC_IMM_BIT,
NESTED_SYNC_TODO_BIT,
Expand Down Expand Up @@ -5307,7 +5220,6 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
#define PTYPE_HASH_SIZE (16)
#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)

extern struct list_head ptype_all __read_mostly;
extern struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;

extern struct net_device *blackhole_netdev;
Expand Down
1 change: 0 additions & 1 deletion include/linux/skbuff.h
Original file line number Diff line number Diff line change
Expand Up @@ -1271,7 +1271,6 @@ static inline void consume_skb(struct sk_buff *skb)

void __consume_stateless_skb(struct sk_buff *skb);
void __kfree_skb(struct sk_buff *skb);
extern struct kmem_cache *skbuff_cache;

void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
Expand Down
5 changes: 2 additions & 3 deletions include/net/gro.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <net/ip6_checksum.h>
#include <linux/skbuff.h>
#include <net/udp.h>
#include <net/hotdata.h>

struct napi_gro_cb {
union {
Expand Down Expand Up @@ -446,7 +447,7 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
{
list_add_tail(&skb->list, &napi->rx_list);
napi->rx_count += segs;
if (napi->rx_count >= READ_ONCE(gro_normal_batch))
if (napi->rx_count >= READ_ONCE(net_hotdata.gro_normal_batch))
gro_normal_list(napi);
}

Expand Down Expand Up @@ -493,6 +494,4 @@ static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *
#endif
}

extern struct list_head offload_base;

#endif /* _NET_IPV6_GRO_H */
52 changes: 52 additions & 0 deletions include/net/hotdata.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#ifndef _NET_HOTDATA_H
#define _NET_HOTDATA_H

#include <linux/types.h>
#include <linux/netdevice.h>
#include <net/protocol.h>

/* Read mostly data used in network fast paths. */
struct net_hotdata {
#if IS_ENABLED(CONFIG_INET)
struct packet_offload ip_packet_offload;
struct net_offload tcpv4_offload;
struct net_protocol tcp_protocol;
struct net_offload udpv4_offload;
struct net_protocol udp_protocol;
struct packet_offload ipv6_packet_offload;
struct net_offload tcpv6_offload;
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol tcpv6_protocol;
struct inet6_protocol udpv6_protocol;
#endif
struct net_offload udpv6_offload;
#endif
struct list_head offload_base;
struct list_head ptype_all;
struct kmem_cache *skbuff_cache;
struct kmem_cache *skbuff_fclone_cache;
struct kmem_cache *skb_small_head_cache;
#ifdef CONFIG_RPS
struct rps_sock_flow_table __rcu *rps_sock_flow_table;
u32 rps_cpu_mask;
#endif
int gro_normal_batch;
int netdev_budget;
int netdev_budget_usecs;
int tstamp_prequeue;
int max_backlog;
int dev_tx_weight;
int dev_rx_weight;
};

#define inet_ehash_secret net_hotdata.tcp_protocol.secret
#define udp_ehash_secret net_hotdata.udp_protocol.secret
#define inet6_ehash_secret net_hotdata.tcpv6_protocol.secret
#define tcp_ipv6_hash_secret net_hotdata.tcpv6_offload.secret
#define udp6_ehash_secret net_hotdata.udpv6_protocol.secret
#define udp_ipv6_hash_secret net_hotdata.udpv6_offload.secret

extern struct net_hotdata net_hotdata;

#endif /* _NET_HOTDATA_H */
3 changes: 3 additions & 0 deletions include/net/protocol.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ struct net_protocol {
* socket lookup?
*/
icmp_strict_tag_validation:1;
u32 secret;
};

#if IS_ENABLED(CONFIG_IPV6)
Expand All @@ -59,6 +60,7 @@ struct inet6_protocol {
__be32 info);

unsigned int flags; /* INET6_PROTO_xxx */
u32 secret;
};

#define INET6_PROTO_NOPOLICY 0x1
Expand All @@ -68,6 +70,7 @@ struct inet6_protocol {
struct net_offload {
struct offload_callbacks callbacks;
unsigned int flags; /* Flags used by IPv6 for now */
u32 secret;
};
/* This should be set for any extension header which is compatible with GSO. */
#define INET6_PROTO_GSO_EXTHDR 0x1
Expand Down
Loading

0 comments on commit e8bb2cc

Please sign in to comment.