Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…/git/bpf/bpf

Daniel Borkmann says:

====================
pull-request: bpf 2023-04-13

We've added 6 non-merge commits during the last 1 day(s) which contain
a total of 14 files changed, 205 insertions(+), 38 deletions(-).

The main changes are:

1) One late straggler fix on the XDP hints side which fixes
   bpf_xdp_metadata_rx_hash kfunc API before the release goes out
   in order to provide information on the RSS hash type,
   from Jesper Dangaard Brouer.

* tag 'for-netdev' of https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf:
  selftests/bpf: Adjust bpf_xdp_metadata_rx_hash for new arg
  mlx4: bpf_xdp_metadata_rx_hash add xdp rss hash type
  veth: bpf_xdp_metadata_rx_hash add xdp rss hash type
  mlx5: bpf_xdp_metadata_rx_hash add xdp rss hash type
  xdp: rss hash types representation
  selftests/bpf: xdp_hw_metadata remove bpf_printk and add counters
====================

Link: https://lore.kernel.org/r/20230413192939.10202-1-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Apr 13, 2023
2 parents 0646dc3 + b65ef48 commit d0f89c4
Showing 14 changed files with 205 additions and 38 deletions.
22 changes: 20 additions & 2 deletions drivers/net/ethernet/mellanox/mlx4/en_rx.c
Original file line number Diff line number Diff line change
@@ -681,14 +681,32 @@ int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}

int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
struct mlx4_en_xdp_buff *_ctx = (void *)ctx;
struct mlx4_cqe *cqe = _ctx->cqe;
enum xdp_rss_hash_type xht = 0;
__be16 status;

if (unlikely(!(_ctx->dev->features & NETIF_F_RXHASH)))
return -ENODATA;

*hash = be32_to_cpu(_ctx->cqe->immed_rss_invalid);
*hash = be32_to_cpu(cqe->immed_rss_invalid);
status = cqe->status;
if (status & cpu_to_be16(MLX4_CQE_STATUS_TCP))
xht = XDP_RSS_L4_TCP;
if (status & cpu_to_be16(MLX4_CQE_STATUS_UDP))
xht = XDP_RSS_L4_UDP;
if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 | MLX4_CQE_STATUS_IPV4F))
xht |= XDP_RSS_L3_IPV4;
if (status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) {
xht |= XDP_RSS_L3_IPV6;
if (cqe->ipv6_ext_mask)
xht |= XDP_RSS_L3_DYNHDR;
}
*rss_type = xht;

return 0;
}

3 changes: 2 additions & 1 deletion drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
Original file line number Diff line number Diff line change
@@ -798,7 +798,8 @@ int mlx4_en_netdev_event(struct notifier_block *this,

struct xdp_md;
int mlx4_en_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp);
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash);
int mlx4_en_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);

/*
* Functions for time stamping
63 changes: 61 additions & 2 deletions drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
Original file line number Diff line number Diff line change
@@ -34,6 +34,7 @@
#include <net/xdp_sock_drv.h>
#include "en/xdp.h"
#include "en/params.h"
#include <linux/bitfield.h>

int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk)
{
@@ -169,14 +170,72 @@ static int mlx5e_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}

static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
/* Mapping HW RSS Type bits CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 into 4-bits*/
#define RSS_TYPE_MAX_TABLE 16 /* 4-bits max 16 entries */
#define RSS_L4 GENMASK(1, 0)
#define RSS_L3 GENMASK(3, 2) /* Same as CQE_RSS_HTYPE_IP */

/* Valid combinations of CQE_RSS_HTYPE_IP + CQE_RSS_HTYPE_L4 sorted numerical */
enum mlx5_rss_hash_type {
RSS_TYPE_NO_HASH = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IP_NONE) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
RSS_TYPE_L3_IPV4 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
RSS_TYPE_L4_IPV4_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
RSS_TYPE_L4_IPV4_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
RSS_TYPE_L4_IPV4_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV4) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
RSS_TYPE_L3_IPV6 = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_NONE)),
RSS_TYPE_L4_IPV6_TCP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_TCP)),
RSS_TYPE_L4_IPV6_UDP = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_UDP)),
RSS_TYPE_L4_IPV6_IPSEC = (FIELD_PREP_CONST(RSS_L3, CQE_RSS_IPV6) |
FIELD_PREP_CONST(RSS_L4, CQE_RSS_L4_IPSEC)),
};

/* Invalid combinations will simply return zero, allows no boundary checks */
static const enum xdp_rss_hash_type mlx5_xdp_rss_type[RSS_TYPE_MAX_TABLE] = {
[RSS_TYPE_NO_HASH] = XDP_RSS_TYPE_NONE,
[1] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[2] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[3] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[RSS_TYPE_L3_IPV4] = XDP_RSS_TYPE_L3_IPV4,
[RSS_TYPE_L4_IPV4_TCP] = XDP_RSS_TYPE_L4_IPV4_TCP,
[RSS_TYPE_L4_IPV4_UDP] = XDP_RSS_TYPE_L4_IPV4_UDP,
[RSS_TYPE_L4_IPV4_IPSEC] = XDP_RSS_TYPE_L4_IPV4_IPSEC,
[RSS_TYPE_L3_IPV6] = XDP_RSS_TYPE_L3_IPV6,
[RSS_TYPE_L4_IPV6_TCP] = XDP_RSS_TYPE_L4_IPV6_TCP,
[RSS_TYPE_L4_IPV6_UDP] = XDP_RSS_TYPE_L4_IPV6_UDP,
[RSS_TYPE_L4_IPV6_IPSEC] = XDP_RSS_TYPE_L4_IPV6_IPSEC,
[12] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[13] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[14] = XDP_RSS_TYPE_NONE, /* Implicit zero */
[15] = XDP_RSS_TYPE_NONE, /* Implicit zero */
};

static int mlx5e_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
const struct mlx5e_xdp_buff *_ctx = (void *)ctx;
const struct mlx5_cqe64 *cqe = _ctx->cqe;
u32 hash_type, l4_type, ip_type, lookup;

if (unlikely(!(_ctx->xdp.rxq->dev->features & NETIF_F_RXHASH)))
return -ENODATA;

*hash = be32_to_cpu(_ctx->cqe->rss_hash_result);
*hash = be32_to_cpu(cqe->rss_hash_result);

hash_type = cqe->rss_hash_type;
BUILD_BUG_ON(CQE_RSS_HTYPE_IP != RSS_L3); /* same mask */
ip_type = hash_type & CQE_RSS_HTYPE_IP;
l4_type = FIELD_GET(CQE_RSS_HTYPE_L4, hash_type);
lookup = ip_type | l4_type;
*rss_type = mlx5_xdp_rss_type[lookup];

return 0;
}

10 changes: 7 additions & 3 deletions drivers/net/veth.c
Original file line number Diff line number Diff line change
@@ -1648,14 +1648,18 @@ static int veth_xdp_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
return 0;
}

static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash)
static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
struct veth_xdp_buff *_ctx = (void *)ctx;
struct sk_buff *skb = _ctx->skb;

if (!_ctx->skb)
if (!skb)
return -ENODATA;

*hash = skb_get_hash(_ctx->skb);
*hash = skb_get_hash(skb);
*rss_type = skb->l4_hash ? XDP_RSS_TYPE_L4_ANY : XDP_RSS_TYPE_NONE;

return 0;
}

14 changes: 12 additions & 2 deletions include/linux/mlx5/device.h
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@
#include <linux/types.h>
#include <rdma/ib_verbs.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/bitfield.h>

#if defined(__LITTLE_ENDIAN)
#define MLX5_SET_HOST_ENDIANNESS 0
@@ -980,14 +981,23 @@ enum {
};

enum {
CQE_RSS_HTYPE_IP = 0x3 << 2,
CQE_RSS_HTYPE_IP = GENMASK(3, 2),
/* cqe->rss_hash_type[3:2] - IP destination selected for hash
* (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved)
*/
CQE_RSS_HTYPE_L4 = 0x3 << 6,
CQE_RSS_IP_NONE = 0x0,
CQE_RSS_IPV4 = 0x1,
CQE_RSS_IPV6 = 0x2,
CQE_RSS_RESERVED = 0x3,

CQE_RSS_HTYPE_L4 = GENMASK(7, 6),
/* cqe->rss_hash_type[7:6] - L4 destination selected for hash
* (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI
*/
CQE_RSS_L4_NONE = 0x0,
CQE_RSS_L4_TCP = 0x1,
CQE_RSS_L4_UDP = 0x2,
CQE_RSS_L4_IPSEC = 0x3,
};

enum {
3 changes: 2 additions & 1 deletion include/linux/netdevice.h
Original file line number Diff line number Diff line change
@@ -1624,7 +1624,8 @@ struct net_device_ops {

struct xdp_metadata_ops {
int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash);
int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type);
};

/**
47 changes: 47 additions & 0 deletions include/net/xdp.h
Original file line number Diff line number Diff line change
@@ -8,6 +8,7 @@

#include <linux/skbuff.h> /* skb_shared_info */
#include <uapi/linux/netdev.h>
#include <linux/bitfield.h>

/**
* DOC: XDP RX-queue information
@@ -425,6 +426,52 @@ XDP_METADATA_KFUNC_xxx
MAX_XDP_METADATA_KFUNC,
};

enum xdp_rss_hash_type {
/* First part: Individual bits for L3/L4 types */
XDP_RSS_L3_IPV4 = BIT(0),
XDP_RSS_L3_IPV6 = BIT(1),

/* The fixed (L3) IPv4 and IPv6 headers can both be followed by
* variable/dynamic headers, IPv4 called Options and IPv6 called
* Extension Headers. HW RSS type can contain this info.
*/
XDP_RSS_L3_DYNHDR = BIT(2),

/* When RSS hash covers L4 then drivers MUST set XDP_RSS_L4 bit in
* addition to the protocol specific bit. This ease interaction with
* SKBs and avoids reserving a fixed mask for future L4 protocol bits.
*/
XDP_RSS_L4 = BIT(3), /* L4 based hash, proto can be unknown */
XDP_RSS_L4_TCP = BIT(4),
XDP_RSS_L4_UDP = BIT(5),
XDP_RSS_L4_SCTP = BIT(6),
XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */

/* Second part: RSS hash type combinations used for driver HW mapping */
XDP_RSS_TYPE_NONE = 0,
XDP_RSS_TYPE_L2 = XDP_RSS_TYPE_NONE,

XDP_RSS_TYPE_L3_IPV4 = XDP_RSS_L3_IPV4,
XDP_RSS_TYPE_L3_IPV6 = XDP_RSS_L3_IPV6,
XDP_RSS_TYPE_L3_IPV4_OPT = XDP_RSS_L3_IPV4 | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L3_IPV6_EX = XDP_RSS_L3_IPV6 | XDP_RSS_L3_DYNHDR,

XDP_RSS_TYPE_L4_ANY = XDP_RSS_L4,
XDP_RSS_TYPE_L4_IPV4_TCP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,

XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP,
XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP,
XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP,
XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC,

XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR,
XDP_RSS_TYPE_L4_IPV6_SCTP_EX = XDP_RSS_TYPE_L4_IPV6_SCTP | XDP_RSS_L3_DYNHDR,
};

#ifdef CONFIG_NET
u32 bpf_xdp_metadata_kfunc_id(int id);
bool bpf_dev_bound_kfunc_id(u32 btf_id);
10 changes: 9 additions & 1 deletion net/core/xdp.c
Original file line number Diff line number Diff line change
@@ -734,13 +734,21 @@ __bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *tim
* bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
* @ctx: XDP context pointer.
* @hash: Return value pointer.
* @rss_type: Return value pointer for RSS type.
*
* The RSS hash type (@rss_type) specifies what portion of packet headers NIC
* hardware used when calculating RSS hash value. The RSS type can be decoded
* via &enum xdp_rss_hash_type either matching on individual L3/L4 bits
* ``XDP_RSS_L*`` or by combined traditional *RSS Hashing Types*
* ``XDP_RSS_TYPE_L*``.
*
* Return:
* * Returns 0 on success or ``-errno`` on error.
* * ``-EOPNOTSUPP`` : means device driver doesn't implement kfunc
* * ``-ENODATA`` : means no RX-hash available for this frame
*/
__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash,
enum xdp_rss_hash_type *rss_type)
{
return -EOPNOTSUPP;
}
2 changes: 2 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
Original file line number Diff line number Diff line change
@@ -273,6 +273,8 @@ static int verify_xsk_metadata(struct xsk *xsk)
if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
return -1;

ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");

xsk_ring_cons__release(&xsk->rx, 1);
refill_rx(xsk, comp_addr);

42 changes: 24 additions & 18 deletions tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
Original file line number Diff line number Diff line change
@@ -12,10 +12,14 @@ struct {
__type(value, __u32);
} xsk SEC(".maps");

__u64 pkts_skip = 0;
__u64 pkts_fail = 0;
__u64 pkts_redir = 0;

extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
__u32 *hash) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;

SEC("xdp")
int rx(struct xdp_md *ctx)
@@ -26,7 +30,7 @@ int rx(struct xdp_md *ctx)
struct udphdr *udp = NULL;
struct iphdr *iph = NULL;
struct xdp_meta *meta;
int ret;
int err;

data = (void *)(long)ctx->data;
data_end = (void *)(long)ctx->data_end;
@@ -46,17 +50,20 @@ int rx(struct xdp_md *ctx)
udp = NULL;
}

if (!udp)
if (!udp) {
__sync_add_and_fetch(&pkts_skip, 1);
return XDP_PASS;
}

if (udp->dest != bpf_htons(9091))
/* Forwarding UDP:9091 to AF_XDP */
if (udp->dest != bpf_htons(9091)) {
__sync_add_and_fetch(&pkts_skip, 1);
return XDP_PASS;
}

bpf_printk("forwarding UDP:9091 to AF_XDP");

ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
if (ret != 0) {
bpf_printk("bpf_xdp_adjust_meta returned %d", ret);
err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
if (err) {
__sync_add_and_fetch(&pkts_fail, 1);
return XDP_PASS;
}

@@ -65,20 +72,19 @@ int rx(struct xdp_md *ctx)
meta = data_meta;

if (meta + 1 > data) {
bpf_printk("bpf_xdp_adjust_meta doesn't appear to work");
__sync_add_and_fetch(&pkts_fail, 1);
return XDP_PASS;
}

if (!bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp))
bpf_printk("populated rx_timestamp with %llu", meta->rx_timestamp);
else
err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
if (err)
meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */

if (!bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash))
bpf_printk("populated rx_hash with %u", meta->rx_hash);
else
meta->rx_hash = 0; /* Used by AF_XDP as not avail signal */
err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
if (err < 0)
meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */

__sync_add_and_fetch(&pkts_redir, 1);
return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}

6 changes: 3 additions & 3 deletions tools/testing/selftests/bpf/progs/xdp_metadata.c
Original file line number Diff line number Diff line change
@@ -21,8 +21,8 @@ struct {

extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
__u64 *timestamp) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx,
__u32 *hash) __ksym;
extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
enum xdp_rss_hash_type *rss_type) __ksym;

SEC("xdp")
int rx(struct xdp_md *ctx)
@@ -56,7 +56,7 @@ int rx(struct xdp_md *ctx)
if (timestamp == 0)
meta->rx_timestamp = 1;

bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash);
bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);

return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
}
Loading

0 comments on commit d0f89c4

Please sign in to comment.