Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…/git/bpf/bpf-next

Daniel Borkmann says:

====================
pull-request: bpf-next 2024-10-14

The following pull-request contains BPF updates for your *net-next* tree.

We've added 21 non-merge commits during the last 18 day(s) which contain
a total of 21 files changed, 1185 insertions(+), 127 deletions(-).

The main changes are:

1) Put xsk sockets on a struct diet and add various cleanups. Overall, this helps
   to bump performance by 12% for some workloads, from Maciej Fijalkowski.

2) Extend BPF selftests to increase coverage of XDP features in combination
   with BPF cpumap, from Alexis Lothoré (eBPF Foundation).

3) Extend netkit with an option to delegate skb->{mark,priority} scrubbing to
   its BPF program, from Daniel Borkmann.

4) Make the bpf_get_netns_cookie() helper available also to tc(x) BPF programs,
   from Mahe Tardy.

5) Extend BPF selftests covering a BPF program setting socket options per MPTCP
   subflow, from Geliang Tang and Nicolas Rybowski.

bpf-next-for-netdev

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (21 commits)
  xsk: Use xsk_buff_pool directly for cq functions
  xsk: Wrap duplicated code to function
  xsk: Carry a copy of xdp_zc_max_segs within xsk_buff_pool
  xsk: Get rid of xdp_buff_xsk::orig_addr
  xsk: s/free_list_node/list_node/
  xsk: Get rid of xdp_buff_xsk::xskb_list_node
  selftests/bpf: check program redirect in xdp_cpumap_attach
  selftests/bpf: make xdp_cpumap_attach keep redirect prog attached
  selftests/bpf: fix bpf_map_redirect call for cpu map test
  selftests/bpf: add tcx netns cookie tests
  bpf: add get_netns_cookie helper to tc programs
  selftests/bpf: add missing header include for htons
  selftests/bpf: Extend netkit tests to validate skb meta data
  tools: Sync if_link.h uapi tooling header
  netkit: Add add netkit scrub support to rt_link.yaml
  netkit: Simplify netkit mode over to use NLA_POLICY_MAX
  netkit: Add option for scrubbing skb meta data
  bpf: Remove unused macro
  selftests/bpf: Add mptcp subflow subtest
  selftests/bpf: Add getsockopt to inspect mptcp subflow
  ...
====================

Link: https://patch.msgid.link/20241014211110.16562-1-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
  • Loading branch information
Paolo Abeni committed Oct 15, 2024
2 parents de306f0 + e6c4047 commit 39ab206
Show file tree
Hide file tree
Showing 21 changed files with 1,185 additions and 127 deletions.
15 changes: 15 additions & 0 deletions Documentation/netlink/specs/rt_link.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,13 @@ definitions:
- name: l2
- name: l3

-
name: netkit-scrub
type: enum
entries:
- name: none
- name: default

attribute-sets:
-
name: link-attrs
Expand Down Expand Up @@ -2151,6 +2158,14 @@ attribute-sets:
name: mode
type: u32
enum: netkit-mode
-
name: scrub
type: u32
enum: netkit-scrub
-
name: peer-scrub
type: u32
enum: netkit-scrub

sub-messages:
-
Expand Down
2 changes: 1 addition & 1 deletion MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -16300,7 +16300,7 @@ F: include/net/mptcp.h
F: include/trace/events/mptcp.h
F: include/uapi/linux/mptcp*.h
F: net/mptcp/
F: tools/testing/selftests/bpf/*/*mptcp*.c
F: tools/testing/selftests/bpf/*/*mptcp*.[ch]
F: tools/testing/selftests/net/mptcp/

NETWORKING [TCP]
Expand Down
91 changes: 57 additions & 34 deletions drivers/net/netkit.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ struct netkit {
struct net_device __rcu *peer;
struct bpf_mprog_entry __rcu *active;
enum netkit_action policy;
enum netkit_scrub scrub;
struct bpf_mprog_bundle bundle;

/* Needed in slow-path */
Expand Down Expand Up @@ -50,12 +51,24 @@ netkit_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
return ret;
}

static void netkit_prep_forward(struct sk_buff *skb, bool xnet)
static void netkit_xnet(struct sk_buff *skb)
{
skb_scrub_packet(skb, xnet);
skb->priority = 0;
skb->mark = 0;
}

static void netkit_prep_forward(struct sk_buff *skb,
bool xnet, bool xnet_scrub)
{
skb_scrub_packet(skb, false);
nf_skip_egress(skb, true);
skb_reset_mac_header(skb);
if (!xnet)
return;
ipvs_reset(skb);
skb_clear_tstamp(skb);
if (xnet_scrub)
netkit_xnet(skb);
}

static struct netkit *netkit_priv(const struct net_device *dev)
Expand All @@ -80,7 +93,8 @@ static netdev_tx_t netkit_xmit(struct sk_buff *skb, struct net_device *dev)
!pskb_may_pull(skb, ETH_HLEN) ||
skb_orphan_frags(skb, GFP_ATOMIC)))
goto drop;
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)));
netkit_prep_forward(skb, !net_eq(dev_net(dev), dev_net(peer)),
nk->scrub);
eth_skb_pkt_type(skb, peer);
skb->dev = peer;
entry = rcu_dereference(nk->active);
Expand Down Expand Up @@ -297,20 +311,6 @@ static int netkit_check_policy(int policy, struct nlattr *tb,
}
}

static int netkit_check_mode(int mode, struct nlattr *tb,
struct netlink_ext_ack *extack)
{
switch (mode) {
case NETKIT_L2:
case NETKIT_L3:
return 0;
default:
NL_SET_ERR_MSG_ATTR(extack, tb,
"Provided device mode can only be L2 or L3");
return -EINVAL;
}
}

static int netkit_validate(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack)
{
Expand All @@ -332,8 +332,10 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb, *attr;
enum netkit_action default_prim = NETKIT_PASS;
enum netkit_action default_peer = NETKIT_PASS;
enum netkit_action policy_prim = NETKIT_PASS;
enum netkit_action policy_peer = NETKIT_PASS;
enum netkit_scrub scrub_prim = NETKIT_SCRUB_DEFAULT;
enum netkit_scrub scrub_peer = NETKIT_SCRUB_DEFAULT;
enum netkit_mode mode = NETKIT_L3;
unsigned char ifname_assign_type;
struct ifinfomsg *ifmp = NULL;
Expand All @@ -344,13 +346,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
int err;

if (data) {
if (data[IFLA_NETKIT_MODE]) {
attr = data[IFLA_NETKIT_MODE];
mode = nla_get_u32(attr);
err = netkit_check_mode(mode, attr, extack);
if (err < 0)
return err;
}
if (data[IFLA_NETKIT_MODE])
mode = nla_get_u32(data[IFLA_NETKIT_MODE]);
if (data[IFLA_NETKIT_PEER_INFO]) {
attr = data[IFLA_NETKIT_PEER_INFO];
ifmp = nla_data(attr);
Expand All @@ -362,17 +359,21 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,
return err;
tbp = peer_tb;
}
if (data[IFLA_NETKIT_SCRUB])
scrub_prim = nla_get_u32(data[IFLA_NETKIT_SCRUB]);
if (data[IFLA_NETKIT_PEER_SCRUB])
scrub_peer = nla_get_u32(data[IFLA_NETKIT_PEER_SCRUB]);
if (data[IFLA_NETKIT_POLICY]) {
attr = data[IFLA_NETKIT_POLICY];
default_prim = nla_get_u32(attr);
err = netkit_check_policy(default_prim, attr, extack);
policy_prim = nla_get_u32(attr);
err = netkit_check_policy(policy_prim, attr, extack);
if (err < 0)
return err;
}
if (data[IFLA_NETKIT_PEER_POLICY]) {
attr = data[IFLA_NETKIT_PEER_POLICY];
default_peer = nla_get_u32(attr);
err = netkit_check_policy(default_peer, attr, extack);
policy_peer = nla_get_u32(attr);
err = netkit_check_policy(policy_peer, attr, extack);
if (err < 0)
return err;
}
Expand Down Expand Up @@ -409,7 +410,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,

nk = netkit_priv(peer);
nk->primary = false;
nk->policy = default_peer;
nk->policy = policy_peer;
nk->scrub = scrub_peer;
nk->mode = mode;
bpf_mprog_bundle_init(&nk->bundle);

Expand All @@ -434,7 +436,8 @@ static int netkit_new_link(struct net *src_net, struct net_device *dev,

nk = netkit_priv(dev);
nk->primary = true;
nk->policy = default_prim;
nk->policy = policy_prim;
nk->scrub = scrub_prim;
nk->mode = mode;
bpf_mprog_bundle_init(&nk->bundle);

Expand Down Expand Up @@ -874,6 +877,18 @@ static int netkit_change_link(struct net_device *dev, struct nlattr *tb[],
return -EACCES;
}

if (data[IFLA_NETKIT_SCRUB]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_SCRUB],
"netkit scrubbing cannot be changed after device creation");
return -EACCES;
}

if (data[IFLA_NETKIT_PEER_SCRUB]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_SCRUB],
"netkit scrubbing cannot be changed after device creation");
return -EACCES;
}

if (data[IFLA_NETKIT_PEER_INFO]) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_NETKIT_PEER_INFO],
"netkit peer info cannot be changed after device creation");
Expand Down Expand Up @@ -908,8 +923,10 @@ static size_t netkit_get_size(const struct net_device *dev)
{
return nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_POLICY */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_POLICY */
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_SCRUB */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_PEER_SCRUB */
nla_total_size(sizeof(u32)) + /* IFLA_NETKIT_MODE */
nla_total_size(sizeof(u8)) + /* IFLA_NETKIT_PRIMARY */
0;
}

Expand All @@ -924,21 +941,27 @@ static int netkit_fill_info(struct sk_buff *skb, const struct net_device *dev)
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_MODE, nk->mode))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_SCRUB, nk->scrub))
return -EMSGSIZE;

if (peer) {
nk = netkit_priv(peer);
if (nla_put_u32(skb, IFLA_NETKIT_PEER_POLICY, nk->policy))
return -EMSGSIZE;
if (nla_put_u32(skb, IFLA_NETKIT_PEER_SCRUB, nk->scrub))
return -EMSGSIZE;
}

return 0;
}

static const struct nla_policy netkit_policy[IFLA_NETKIT_MAX + 1] = {
[IFLA_NETKIT_PEER_INFO] = { .len = sizeof(struct ifinfomsg) },
[IFLA_NETKIT_MODE] = NLA_POLICY_MAX(NLA_U32, NETKIT_L3),
[IFLA_NETKIT_POLICY] = { .type = NLA_U32 },
[IFLA_NETKIT_MODE] = { .type = NLA_U32 },
[IFLA_NETKIT_PEER_POLICY] = { .type = NLA_U32 },
[IFLA_NETKIT_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PEER_SCRUB] = NLA_POLICY_MAX(NLA_U32, NETKIT_SCRUB_DEFAULT),
[IFLA_NETKIT_PRIMARY] = { .type = NLA_REJECT,
.reject_message = "Primary attribute is read-only" },
};
Expand Down
14 changes: 7 additions & 7 deletions include/net/xdp_sock_drv.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ static inline void xsk_buff_free(struct xdp_buff *xdp)
if (likely(!xdp_buff_has_frags(xdp)))
goto out;

list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
list_del(&pos->xskb_list_node);
list_for_each_entry_safe(pos, tmp, xskb_list, list_node) {
list_del(&pos->list_node);
xp_free(pos);
}

Expand All @@ -140,7 +140,7 @@ static inline void xsk_buff_add_frag(struct xdp_buff *xdp)
{
struct xdp_buff_xsk *frag = container_of(xdp, struct xdp_buff_xsk, xdp);

list_add_tail(&frag->xskb_list_node, &frag->pool->xskb_list);
list_add_tail(&frag->list_node, &frag->pool->xskb_list);
}

static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
Expand All @@ -150,9 +150,9 @@ static inline struct xdp_buff *xsk_buff_get_frag(struct xdp_buff *first)
struct xdp_buff_xsk *frag;

frag = list_first_entry_or_null(&xskb->pool->xskb_list,
struct xdp_buff_xsk, xskb_list_node);
struct xdp_buff_xsk, list_node);
if (frag) {
list_del(&frag->xskb_list_node);
list_del(&frag->list_node);
ret = &frag->xdp;
}

Expand All @@ -163,7 +163,7 @@ static inline void xsk_buff_del_tail(struct xdp_buff *tail)
{
struct xdp_buff_xsk *xskb = container_of(tail, struct xdp_buff_xsk, xdp);

list_del(&xskb->xskb_list_node);
list_del(&xskb->list_node);
}

static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
Expand All @@ -172,7 +172,7 @@ static inline struct xdp_buff *xsk_buff_get_tail(struct xdp_buff *first)
struct xdp_buff_xsk *frag;

frag = list_last_entry(&xskb->pool->xskb_list, struct xdp_buff_xsk,
xskb_list_node);
list_node);
return &frag->xdp;
}

Expand Down
23 changes: 13 additions & 10 deletions include/net/xsk_buff_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ struct xdp_buff_xsk {
dma_addr_t dma;
dma_addr_t frame_dma;
struct xsk_buff_pool *pool;
u64 orig_addr;
struct list_head free_list_node;
struct list_head xskb_list_node;
struct list_head list_node;
};

#define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb))
Expand Down Expand Up @@ -78,6 +76,7 @@ struct xsk_buff_pool {
u32 chunk_size;
u32 chunk_shift;
u32 frame_len;
u32 xdp_zc_max_segs;
u8 tx_metadata_len; /* inherited from umem */
u8 cached_need_wakeup;
bool uses_need_wakeup;
Expand Down Expand Up @@ -120,7 +119,6 @@ void xp_free(struct xdp_buff_xsk *xskb);
static inline void xp_init_xskb_addr(struct xdp_buff_xsk *xskb, struct xsk_buff_pool *pool,
u64 addr)
{
xskb->orig_addr = addr;
xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
}

Expand Down Expand Up @@ -222,14 +220,19 @@ static inline void xp_release(struct xdp_buff_xsk *xskb)
xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
}

static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb)
static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb,
struct xsk_buff_pool *pool)
{
u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
u64 orig_addr = xskb->xdp.data - pool->addrs;
u64 offset;

offset += xskb->pool->headroom;
if (!xskb->pool->unaligned)
return xskb->orig_addr + offset;
return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
if (!pool->unaligned)
return orig_addr;

offset = xskb->xdp.data - xskb->xdp.data_hard_start;
orig_addr -= offset;
offset += pool->headroom;
return orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
}

static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool)
Expand Down
15 changes: 15 additions & 0 deletions include/uapi/linux/if_link.h
Original file line number Diff line number Diff line change
Expand Up @@ -1293,13 +1293,28 @@ enum netkit_mode {
NETKIT_L3,
};

/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to
* the BPF program if attached. This also means the latter can
* consume the two fields if they were populated earlier.
*
* NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before
* invoking the attached BPF program when the peer device resides
* in a different network namespace. This is the default behavior.
*/
enum netkit_scrub {
NETKIT_SCRUB_NONE,
NETKIT_SCRUB_DEFAULT,
};

enum {
IFLA_NETKIT_UNSPEC,
IFLA_NETKIT_PEER_INFO,
IFLA_NETKIT_PRIMARY,
IFLA_NETKIT_POLICY,
IFLA_NETKIT_PEER_POLICY,
IFLA_NETKIT_MODE,
IFLA_NETKIT_SCRUB,
IFLA_NETKIT_PEER_SCRUB,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
Expand Down
Loading

0 comments on commit 39ab206

Please sign in to comment.