From 5896351ea9360072f8bdd9eee186861a9d13db6d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 8 Jan 2018 07:51:17 -0800 Subject: [PATCH 01/18] bpf: fix verifier GPF in kmalloc failure path syzbot reported the following panic in the verifier triggered by kmalloc error injection: kasan: GPF could be caused by NULL-ptr deref or user memory access RIP: 0010:copy_func_state kernel/bpf/verifier.c:403 [inline] RIP: 0010:copy_verifier_state+0x364/0x590 kernel/bpf/verifier.c:431 Call Trace: pop_stack+0x8c/0x270 kernel/bpf/verifier.c:449 push_stack kernel/bpf/verifier.c:491 [inline] check_cond_jmp_op kernel/bpf/verifier.c:3598 [inline] do_check+0x4b60/0xa050 kernel/bpf/verifier.c:4731 bpf_check+0x3296/0x58c0 kernel/bpf/verifier.c:5489 bpf_prog_load+0xa2a/0x1b00 kernel/bpf/syscall.c:1198 SYSC_bpf kernel/bpf/syscall.c:1807 [inline] SyS_bpf+0x1044/0x4420 kernel/bpf/syscall.c:1769 when copy_verifier_state() aborts in the middle due to kmalloc failure some of the frames could have been partially copied while current free_verifier_state() loop for (i = 0; i <= state->curframe; i++) assumed that all frames are non-null. Simply fix it by adding 'if (!state)' to free_func_state(). Also avoid stressing copy frame logic more if kzalloc fails in push_stack() free env->cur_state right away. Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Reported-by: syzbot+32ac5a3e473f2e01cfc7@syzkaller.appspotmail.com Reported-by: syzbot+fa99e24f3c29d269a7d5@syzkaller.appspotmail.com Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a2b211262c254..d921ab387b0b0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -375,6 +375,8 @@ static int realloc_func_state(struct bpf_func_state *state, int size, static void free_func_state(struct bpf_func_state *state) { + if (!state) + return; kfree(state->stack); kfree(state); } @@ -487,6 +489,8 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, } return &elem->st; err: + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; /* pop all elements and return */ while (!pop_stack(env, NULL, NULL)); return NULL; From 141b52a98ab45a835ff1ea869414faccdc255a72 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 01:20:01 -0800 Subject: [PATCH 02/18] net: use the right variant of kfree kvzalloc'ed memory should be kvfree'd. Fixes: e817f85652c1 ("xdp: generic XDP handling of xdp_rxq_info") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/dev.c b/net/core/dev.c index d7925ef8743d2..852a54c769a3b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7645,7 +7645,7 @@ static int netif_alloc_rx_queues(struct net_device *dev) /* Rollback successful reg's and free other resources */ while (i--) xdp_rxq_info_unreg(&rx[i].xdp_rxq); - kfree(dev->_rx); + kvfree(dev->_rx); dev->_rx = NULL; return err; } From 82aaff2f63443e1d6cc4a186ed9c2a5718123906 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 01:20:02 -0800 Subject: [PATCH 03/18] net: free RX queue structures Looks like commit e817f85652c1 ("xdp: generic XDP handling of xdp_rxq_info") replaced kvfree(dev->_rx) in free_netdev() with a call to netif_free_rx_queues() which doesn't actually free the rings? While at it remove the unnecessary temporary variable. Fixes: e817f85652c1 ("xdp: generic XDP handling of xdp_rxq_info") Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- net/core/dev.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/core/dev.c b/net/core/dev.c index 852a54c769a3b..74e1e5d313376 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7653,16 +7653,15 @@ static int netif_alloc_rx_queues(struct net_device *dev) static void netif_free_rx_queues(struct net_device *dev) { unsigned int i, count = dev->num_rx_queues; - struct netdev_rx_queue *rx; /* netif_alloc_rx_queues alloc failed, resources have been unreg'ed */ if (!dev->_rx) return; - rx = dev->_rx; - for (i = 0; i < count; i++) - xdp_rxq_info_unreg(&rx[i].xdp_rxq); + xdp_rxq_info_unreg(&dev->_rx[i].xdp_rxq); + + kvfree(dev->_rx); } static void netdev_init_one_queue(struct net_device *dev, From a9c324be72b2243e1cddd3f9e7356849de39ae53 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:25:54 +0000 Subject: [PATCH 04/18] nfp: don't try to register XDP rxq structures on control queues Some RX rings are used for control messages, those will not have a netdev pointer in dp. Skip XDP rxq handling on those rings. Fixes: 7f1c684a8966 ("nfp: setup xdp_rxq_info") Signed-off-by: Jakub Kicinski Reviewed-by: Dirk van der Merwe Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 05e071b3dc5b9..104d6c520a524 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2253,7 +2253,8 @@ static void nfp_net_rx_ring_free(struct nfp_net_rx_ring *rx_ring) struct nfp_net_r_vector *r_vec = rx_ring->r_vec; struct nfp_net_dp *dp = &r_vec->nfp_net->dp; - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + if (dp->netdev) + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); kfree(rx_ring->rxbufs); if (rx_ring->rxds) @@ -2279,9 +2280,12 @@ nfp_net_rx_ring_alloc(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring) { int sz, err; - err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, rx_ring->idx); - if (err < 0) - return err; + if (dp->netdev) { + err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, dp->netdev, + rx_ring->idx); + if (err < 0) + return err; + } rx_ring->cnt = dp->rxd_cnt; rx_ring->size = sizeof(*rx_ring->rxds) * rx_ring->cnt; From 8c6a6d98044dca7e54bcc108da1d8e47b409ec6a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:25:55 +0000 Subject: [PATCH 05/18] nfp: fix incumbent kdoc warnings We should use % instead of @ for documenting preprocessor defines. Add missing documentation of __NFP_REPR_TYPE_MAX. This gets rid of all remaining kdoc warnings in the driver. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- .../net/ethernet/netronome/nfp/nfp_net_ctrl.h | 111 +++++++++--------- .../net/ethernet/netronome/nfp/nfp_net_repr.h | 1 + 2 files changed, 57 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index 782d452e0fc22..25c36001bffa6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -91,23 +91,24 @@ #define NFP_NET_RSS_IPV6_EX_UDP 9 /** - * @NFP_NET_TXR_MAX: Maximum number of TX rings - * @NFP_NET_RXR_MAX: Maximum number of RX rings + * Ring counts + * %NFP_NET_TXR_MAX: Maximum number of TX rings + * %NFP_NET_RXR_MAX: Maximum number of RX rings */ #define NFP_NET_TXR_MAX 64 #define NFP_NET_RXR_MAX 64 /** * Read/Write config words (0x0000 - 0x002c) - * @NFP_NET_CFG_CTRL: Global control - * @NFP_NET_CFG_UPDATE: Indicate which fields are updated - * @NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings - * @NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings - * @NFP_NET_CFG_MTU: Set MTU size - * @NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) - * @NFP_NET_CFG_EXN: MSI-X table entry for exceptions - * @NFP_NET_CFG_LSC: MSI-X table entry for link state changes - * @NFP_NET_CFG_MACADDR: MAC address + * %NFP_NET_CFG_CTRL: Global control + * %NFP_NET_CFG_UPDATE: Indicate which fields are updated + * %NFP_NET_CFG_TXRS_ENABLE: Bitmask of enabled TX rings + * %NFP_NET_CFG_RXRS_ENABLE: Bitmask of enabled RX rings + * %NFP_NET_CFG_MTU: Set MTU size + * %NFP_NET_CFG_FLBUFSZ: Set freelist buffer size (must be larger than MTU) + * %NFP_NET_CFG_EXN: MSI-X table entry for exceptions + * %NFP_NET_CFG_LSC: MSI-X table entry for link state changes + * %NFP_NET_CFG_MACADDR: MAC address * * TODO: * - define Error details in UPDATE @@ -176,14 +177,14 @@ /** * Read-only words (0x0030 - 0x0050): - * @NFP_NET_CFG_VERSION: Firmware version number - * @NFP_NET_CFG_STS: Status - * @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL) - * @NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings - * @NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings - * @NFP_NET_CFG_MAX_MTU: Maximum support MTU - * @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) - * @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) + * %NFP_NET_CFG_VERSION: Firmware version number + * %NFP_NET_CFG_STS: Status + * %NFP_NET_CFG_CAP: Capabilities (same bits as %NFP_NET_CFG_CTRL) + * %NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings + * %NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings + * %NFP_NET_CFG_MAX_MTU: Maximum support MTU + * %NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only) + * %NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only) * * TODO: * - define more STS bits @@ -228,31 +229,31 @@ /** * RSS capabilities - * @NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as - * @NFP_NET_CFG_RSS_HFUNC) + * %NFP_NET_CFG_RSS_CAP_HFUNC: supported hash functions (same bits as + * %NFP_NET_CFG_RSS_HFUNC) */ #define NFP_NET_CFG_RSS_CAP 0x0054 #define NFP_NET_CFG_RSS_CAP_HFUNC 0xff000000 /** * VXLAN/UDP encap configuration - * @NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports - * @NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes + * %NFP_NET_CFG_VXLAN_PORT: Base address of table of tunnels' UDP dst ports + * %NFP_NET_CFG_VXLAN_SZ: Size of the UDP port table in bytes */ #define NFP_NET_CFG_VXLAN_PORT 0x0060 #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** * BPF section - * @NFP_NET_CFG_BPF_ABI: BPF ABI version - * @NFP_NET_CFG_BPF_CAP: BPF capabilities - * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes - * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded - * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit - * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks - * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks - * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions - * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code + * %NFP_NET_CFG_BPF_ABI: BPF ABI version + * %NFP_NET_CFG_BPF_CAP: BPF capabilities + * %NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * %NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * %NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * %NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * %NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * %NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * %NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ #define NFP_NET_CFG_BPF_ABI 0x0080 #define NFP_NET_BPF_ABI 2 @@ -278,9 +279,9 @@ /** * RSS configuration (0x0100 - 0x01ac): * Used only when NFP_NET_CFG_CTRL_RSS is enabled - * @NFP_NET_CFG_RSS_CFG: RSS configuration word - * @NFP_NET_CFG_RSS_KEY: RSS "secret" key - * @NFP_NET_CFG_RSS_ITBL: RSS indirection table + * %NFP_NET_CFG_RSS_CFG: RSS configuration word + * %NFP_NET_CFG_RSS_KEY: RSS "secret" key + * %NFP_NET_CFG_RSS_ITBL: RSS indirection table */ #define NFP_NET_CFG_RSS_BASE 0x0100 #define NFP_NET_CFG_RSS_CTRL NFP_NET_CFG_RSS_BASE @@ -305,13 +306,13 @@ /** * TX ring configuration (0x200 - 0x800) - * @NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration - * @NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) - * @NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) - * @NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) - * @NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) - * @NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) - * @NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet + * %NFP_NET_CFG_TXR_BASE: Base offset for TX ring configuration + * %NFP_NET_CFG_TXR_ADDR: Per TX ring DMA address (8B entries) + * %NFP_NET_CFG_TXR_WB_ADDR: Per TX ring write back DMA address (8B entries) + * %NFP_NET_CFG_TXR_SZ: Per TX ring ring size (1B entries) + * %NFP_NET_CFG_TXR_VEC: Per TX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_TXR_PRIO: Per TX ring priority (1B entries) + * %NFP_NET_CFG_TXR_IRQ_MOD: Per TX ring interrupt moderation packet */ #define NFP_NET_CFG_TXR_BASE 0x0200 #define NFP_NET_CFG_TXR_ADDR(_x) (NFP_NET_CFG_TXR_BASE + ((_x) * 0x8)) @@ -325,12 +326,12 @@ /** * RX ring configuration (0x0800 - 0x0c00) - * @NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration - * @NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) - * @NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) - * @NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) - * @NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) - * @NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) + * %NFP_NET_CFG_RXR_BASE: Base offset for RX ring configuration + * %NFP_NET_CFG_RXR_ADDR: Per RX ring DMA address (8B entries) + * %NFP_NET_CFG_RXR_SZ: Per RX ring ring size (1B entries) + * %NFP_NET_CFG_RXR_VEC: Per RX ring MSI-X table entry (1B entries) + * %NFP_NET_CFG_RXR_PRIO: Per RX ring priority (1B entries) + * %NFP_NET_CFG_RXR_IRQ_MOD: Per RX ring interrupt moderation (4B entries) */ #define NFP_NET_CFG_RXR_BASE 0x0800 #define NFP_NET_CFG_RXR_ADDR(_x) (NFP_NET_CFG_RXR_BASE + ((_x) * 0x8)) @@ -343,7 +344,7 @@ /** * Interrupt Control/Cause registers (0x0c00 - 0x0d00) * These registers are only used when MSI-X auto-masking is not - * enabled (@NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index + * enabled (%NFP_NET_CFG_CTRL_MSIXAUTO not set). The array is index * by MSI-X entry and are 1B in size. If an entry is zero, the * corresponding entry is enabled. If the FW generates an interrupt, * it writes a cause into the corresponding field. This also masks @@ -393,8 +394,8 @@ /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry - * @NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) - * @NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) + * %NFP_NET_CFG_TXR_STATS: TX ring statistics (Packet and Byte count) + * %NFP_NET_CFG_RXR_STATS: RX ring statistics (Packet and Byte count) */ #define NFP_NET_CFG_TXR_STATS_BASE 0x1000 #define NFP_NET_CFG_TXR_STATS(_x) (NFP_NET_CFG_TXR_STATS_BASE + \ @@ -418,10 +419,10 @@ /** * VLAN filtering using general use mailbox - * @NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox - * @NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter - * @NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter - * @NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes + * %NFP_NET_CFG_VLAN_FILTER: Base address of VLAN filter mailbox + * %NFP_NET_CFG_VLAN_FILTER_VID: VLAN ID to filter + * %NFP_NET_CFG_VLAN_FILTER_PROTO: VLAN proto to filter + * %NFP_NET_CFG_VXLAN_SZ: Size of the VLAN filter mailbox in bytes */ #define NFP_NET_CFG_VLAN_FILTER NFP_NET_CFG_MBOX_VAL #define NFP_NET_CFG_VLAN_FILTER_VID NFP_NET_CFG_VLAN_FILTER diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h index 5d4d897bc9c63..cbc7badf40a03 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h @@ -89,6 +89,7 @@ struct nfp_repr { * @NFP_REPR_TYPE_PHYS_PORT: external NIC port * @NFP_REPR_TYPE_PF: physical function * @NFP_REPR_TYPE_VF: virtual function + * @__NFP_REPR_TYPE_MAX: number of representor types */ enum nfp_repr_type { NFP_REPR_TYPE_PHYS_PORT, From c4f7730be580f8c39decc058246a83ddf7d7b3cb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:25:56 +0000 Subject: [PATCH 06/18] nfp: bpf: round up the size of the stack Kernel enforces the alignment of the bottom of the stack, NFP deals with positive offsets better so we should align the top of the stack. Round the stack size to NFP word size (4B). Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index fa2905e67b073..8dbf13450bab6 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -147,7 +147,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, return -EOPNOTSUPP; } - nfp_prog->stack_depth = prog->aux->stack_depth; + nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4); nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); From ccbdc596f4f6f6795956d46bb4b5f58c7e4bc3c8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:25:57 +0000 Subject: [PATCH 07/18] nfp: bpf: don't allow changing MTU above BPF offload limit when active When BPF offload is active we need may need to restrict the MTU changes more than just to the limitation of the kernel XDP datapath. Allow the BPF code to veto a MTU change. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 20 +++++++++++++++++++ drivers/net/ethernet/netronome/nfp/nfp_app.h | 13 ++++++++++++ .../ethernet/netronome/nfp/nfp_net_common.c | 5 +++++ .../net/ethernet/netronome/nfp/nfp_net_repr.c | 8 ++++++++ 4 files changed, 46 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 4b63167906ca0..978086580ca01 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -194,6 +194,24 @@ static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn) return nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; } +static int +nfp_bpf_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + struct nfp_net *nn = netdev_priv(netdev); + unsigned int max_mtu; + + if (~nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF) + return 0; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (new_mtu > max_mtu) { + nn_info(nn, "BPF offload active, MTU over %u not supported\n", + max_mtu); + return -EBUSY; + } + return 0; +} + static int nfp_bpf_parse_cap_adjust_head(struct nfp_app_bpf *bpf, void __iomem *value, u32 length) @@ -311,6 +329,8 @@ const struct nfp_app_type app_bpf = { .init = nfp_bpf_init, .clean = nfp_bpf_clean, + .change_mtu = nfp_bpf_change_mtu, + .extra_cap = nfp_bpf_extra_cap, .vnic_alloc = nfp_bpf_vnic_alloc, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index 3af1943a85210..e6b59c28c4ca4 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -82,6 +82,8 @@ extern const struct nfp_app_type app_flower; * @repr_clean: representor about to be unregistered * @repr_open: representor netdev open callback * @repr_stop: representor netdev stop callback + * @change_mtu: MTU change on a netdev has been requested (veto-only, change + * is not guaranteed to be committed) * @start: start application logic * @stop: stop application logic * @ctrl_msg_rx: control message handler @@ -120,6 +122,9 @@ struct nfp_app_type { int (*repr_open)(struct nfp_app *app, struct nfp_repr *repr); int (*repr_stop)(struct nfp_app *app, struct nfp_repr *repr); + int (*change_mtu)(struct nfp_app *app, struct net_device *netdev, + int new_mtu); + int (*start)(struct nfp_app *app); void (*stop)(struct nfp_app *app); @@ -242,6 +247,14 @@ nfp_app_repr_clean(struct nfp_app *app, struct net_device *netdev) app->type->repr_clean(app, netdev); } +static inline int +nfp_app_change_mtu(struct nfp_app *app, struct net_device *netdev, int new_mtu) +{ + if (!app || !app->type->change_mtu) + return 0; + return app->type->change_mtu(app, netdev, new_mtu); +} + static inline int nfp_app_start(struct nfp_app *app, struct nfp_net *ctrl) { app->ctrl = ctrl; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 104d6c520a524..33cbb9b35e68f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3049,6 +3049,11 @@ static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu) { struct nfp_net *nn = netdev_priv(netdev); struct nfp_net_dp *dp; + int err; + + err = nfp_app_change_mtu(nn->app, netdev, new_mtu); + if (err) + return err; dp = nfp_net_clone_dp(nn); if (!dp) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c index f50aa119570ac..317f87cc3cc6d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c @@ -186,6 +186,13 @@ nfp_repr_get_offload_stats(int attr_id, const struct net_device *dev, return -EINVAL; } +static int nfp_repr_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nfp_repr *repr = netdev_priv(netdev); + + return nfp_app_change_mtu(repr->app, netdev, new_mtu); +} + static netdev_tx_t nfp_repr_xmit(struct sk_buff *skb, struct net_device *netdev) { struct nfp_repr *repr = netdev_priv(netdev); @@ -240,6 +247,7 @@ const struct net_device_ops nfp_repr_netdev_ops = { .ndo_open = nfp_repr_open, .ndo_stop = nfp_repr_stop, .ndo_start_xmit = nfp_repr_xmit, + .ndo_change_mtu = nfp_repr_change_mtu, .ndo_get_stats64 = nfp_repr_get_stats64, .ndo_has_offload_stats = nfp_repr_has_offload_stats, .ndo_get_offload_stats = nfp_repr_get_offload_stats, From a0f30c97acb685d391f40c527ddaf542c13966af Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:25:58 +0000 Subject: [PATCH 08/18] nfp: bpf: allow disabling TC offloads when XDP active TC BPF offload was added first, so we used to assume that the ethtool TC HW offload flag cannot be touched whenever any BPF program is loaded on the NIC. This unncessarily limits changes to the TC flag when offloaded program is XDP. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 978086580ca01..50b16b6cad0bc 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -191,7 +191,9 @@ static int nfp_bpf_setup_tc(struct nfp_app *app, struct net_device *netdev, static bool nfp_bpf_tc_busy(struct nfp_app *app, struct nfp_net *nn) { - return nn->dp.ctrl & NFP_NET_CFG_CTRL_BPF; + struct nfp_bpf_vnic *bv = nn->app_priv; + + return !!bv->tc_prog; } static int From 1549921da3e8efb6c95e39444c67ed1729a0ccaf Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:25:59 +0000 Subject: [PATCH 09/18] nfp: bpf: move jump resolution to jit.c Jump target resolution should be in jit.c not offload.c. No functional changes. Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 23 +++++++++++++++++++ drivers/net/ethernet/netronome/nfp/bpf/main.h | 1 + .../net/ethernet/netronome/nfp/bpf/offload.c | 18 +-------------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 0de59f04da84f..9caff3a7505a9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2742,3 +2742,26 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); } + +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) +{ + struct nfp_insn_meta *meta; + + /* Another pass to record jump information. */ + list_for_each_entry(meta, &nfp_prog->insns, l) { + u64 code = meta->insn.code; + + if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && + BPF_OP(code) != BPF_CALL) { + struct nfp_insn_meta *dst_meta; + unsigned short dst_indx; + + dst_indx = meta->n + 1 + meta->insn.off; + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, + cnt); + + meta->jmp_dst = dst_meta; + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; + } + } +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 89a9b6393882b..0b1347f2afd1a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -236,6 +236,7 @@ struct nfp_bpf_vnic { struct bpf_prog *tc_prog; }; +void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); int nfp_bpf_jit(struct nfp_prog *prog); extern const struct bpf_prog_offload_ops nfp_bpf_analyzer_ops; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 8dbf13450bab6..0ca6faaacc580 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -70,23 +70,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, list_add_tail(&meta->l, &nfp_prog->insns); } - /* Another pass to record jump information. */ - list_for_each_entry(meta, &nfp_prog->insns, l) { - u64 code = meta->insn.code; - - if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && - BPF_OP(code) != BPF_CALL) { - struct nfp_insn_meta *dst_meta; - unsigned short dst_indx; - - dst_indx = meta->n + 1 + meta->insn.off; - dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, - cnt); - - meta->jmp_dst = dst_meta; - dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; - } - } + nfp_bpf_jit_prepare(nfp_prog, cnt); return 0; } From 488feeaf6d2f9189bdb65f31094a8cff5fcd6c58 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:26:00 +0000 Subject: [PATCH 10/18] nfp: bpf: add helpers for modifying branch addresses In preparation for better handling of relocations move existing helper for setting branch offset to nfp_asm.c and add two more. Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 12 -------- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 30 ++++++++++++++++++++ drivers/net/ethernet/netronome/nfp/nfp_asm.h | 4 +++ 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 9caff3a7505a9..c5d1628a54146 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2110,18 +2110,6 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_EXIT] = goto_out, }; -/* --- Misc code --- */ -static void br_set_offset(u64 *instr, u16 offset) -{ - u16 addr_lo, addr_hi; - - addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); - addr_hi = offset != addr_lo; - *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); - *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); - *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); -} - /* --- Assembler logic --- */ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) { diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index d3610987fb073..9ee3a3f60cc7a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -50,6 +50,36 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, }; +u16 br_get_offset(u64 instr) +{ + u16 addr_lo, addr_hi; + + addr_lo = FIELD_GET(OP_BR_ADDR_LO, instr); + addr_hi = FIELD_GET(OP_BR_ADDR_HI, instr); + + return (addr_hi * ((OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)) + 1)) | + addr_lo; +} + +void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +void br_add_offset(u64 *instr, u16 offset) +{ + u16 addr; + + addr = br_get_offset(*instr); + br_set_offset(instr, addr + offset); +} + static u16 nfp_swreg_to_unreg(swreg reg, bool is_dst) { bool lm_id, lm_dec = false; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index a24daeab1a775..a50240ab0ce25 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -93,6 +93,10 @@ enum br_ctx_signal_state { BR_CSS_NONE = 2, }; +u16 br_get_offset(u64 instr); +void br_set_offset(u64 *instr, u16 offset); +void br_add_offset(u64 *instr, u16 offset); + #define OP_BBYTE_BASE 0x0c800000000ULL #define OP_BB_A_SRC 0x000000000ffULL #define OP_BB_BYTE 0x00000000300ULL From 2314fe9ed0a1760ceab96b81e6b7181963c93254 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:26:01 +0000 Subject: [PATCH 11/18] nfp: bpf: relocate jump targets just before the load Don't translate the program assuming it will be loaded at a given address. This will be required for sharing programs between ports of the same NIC, tail calls and subprograms. It will also make the jump targets easier to understand when dumping the program to user space. Translate the program as if it was going to be loaded at address zero. When load happens add the load offset in and set addresses of special branches. Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 149 ++++++++++-------- drivers/net/ethernet/netronome/nfp/bpf/main.c | 9 +- drivers/net/ethernet/netronome/nfp/bpf/main.h | 29 ++-- .../net/ethernet/netronome/nfp/bpf/offload.c | 15 +- 4 files changed, 115 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index c5d1628a54146..3a5c747fd12b4 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -85,7 +85,7 @@ static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) { - return nfp_prog->start_off + nfp_prog->prog_len; + return nfp_prog->prog_len; } static bool @@ -100,12 +100,6 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off); } -static unsigned int -nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) -{ - return offset - nfp_prog->start_off; -} - /* --- Emitters --- */ static void __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, @@ -195,22 +189,28 @@ __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, nfp_prog_push(nfp_prog, insn); } -static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +static void +emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer, + enum nfp_relo_type relo) { - if (defer > 2) { + if (mask == BR_UNC && defer > 2) { pr_err("BUG: branch defer out of bounds %d\n", defer); nfp_prog->error = -EFAULT; return; } - __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); + + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_RELO_TYPE, relo); } static void emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) { - __emit_br(nfp_prog, mask, - mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, - BR_CSS_NONE, addr, defer); + emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL); } static void @@ -515,16 +515,6 @@ static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count) emit_nop(nfp_prog); } -static void -wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, - enum br_special special) -{ - emit_br(nfp_prog, mask, 0, 0); - - nfp_prog->prog[nfp_prog->prog_len - 1] |= - FIELD_PREP(OP_BR_SPECIAL, special); -} - static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src) { emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src); @@ -749,7 +739,7 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, 0, 0, RELO_BR_GO_ABORT); /* Load data */ return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); @@ -762,7 +752,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) /* Check packet length */ tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); - wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, 0, 0, RELO_BR_GO_ABORT); /* Load data */ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); @@ -1269,7 +1259,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0); /* Skip over the -EINVAL ret code (defer 2) */ - emit_br_def(nfp_prog, end, 2); + emit_br(nfp_prog, BR_UNC, end, 2); emit_alu(nfp_prog, plen_reg(nfp_prog), plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2)); @@ -2036,7 +2026,7 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + emit_br_relo(nfp_prog, BR_UNC, 0, 0, RELO_BR_GO_OUT); return 0; } @@ -2125,11 +2115,9 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) continue; if (list_is_last(&meta->l, &nfp_prog->insns)) - idx = nfp_prog->last_bpf_off; + br_idx = nfp_prog->last_bpf_off; else - idx = list_next_entry(meta, l)->off - 1; - - br_idx = nfp_prog_offset_to_index(nfp_prog, idx); + br_idx = list_next_entry(meta, l)->off - 1; if (!nfp_is_br(nfp_prog->prog[br_idx])) { pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", @@ -2137,7 +2125,8 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } /* Leave special branches for later */ - if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) != + RELO_BR_REL) continue; if (!meta->jmp_dst) { @@ -2152,38 +2141,13 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } - for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); - idx <= br_idx; idx++) { + for (idx = meta->off; idx <= br_idx; idx++) { if (!nfp_is_br(nfp_prog->prog[idx])) continue; br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); } } - /* Fixup 'goto out's separately, they can be scattered around */ - for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { - enum br_special special; - - if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) - continue; - - special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); - switch (special) { - case OP_BR_NORMAL: - break; - case OP_BR_GO_OUT: - br_set_offset(&nfp_prog->prog[br_idx], - nfp_prog->tgt_out); - break; - case OP_BR_GO_ABORT: - br_set_offset(&nfp_prog->prog[br_idx], - nfp_prog->tgt_abort); - break; - } - - nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; - } - return 0; } @@ -2211,7 +2175,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); @@ -2238,7 +2202,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); emit_shf(nfp_prog, reg_b(2), reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); @@ -2257,7 +2221,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); @@ -2278,7 +2242,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); - emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); @@ -2694,20 +2658,19 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) return 0; } -static int nfp_bpf_ustore_calc(struct nfp_prog *nfp_prog, __le64 *ustore) +static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) { + __le64 *ustore = (__force __le64 *)prog; int i; - for (i = 0; i < nfp_prog->prog_len; i++) { + for (i = 0; i < len; i++) { int err; - err = nfp_ustore_check_valid_no_ecc(nfp_prog->prog[i]); + err = nfp_ustore_check_valid_no_ecc(prog[i]); if (err) return err; - nfp_prog->prog[i] = nfp_ustore_calc_ecc_insn(nfp_prog->prog[i]); - - ustore[i] = cpu_to_le64(nfp_prog->prog[i]); + ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i])); } return 0; @@ -2728,7 +2691,7 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) return -EINVAL; } - return nfp_bpf_ustore_calc(nfp_prog, (__force __le64 *)nfp_prog->prog); + return ret; } void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) @@ -2753,3 +2716,51 @@ void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt) } } } + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) +{ + unsigned int i; + u64 *prog; + int err; + + prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64), + GFP_KERNEL); + if (!prog) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < nfp_prog->prog_len; i++) { + enum nfp_relo_type special; + + special = FIELD_GET(OP_RELO_TYPE, prog[i]); + switch (special) { + case RELO_NONE: + continue; + case RELO_BR_REL: + br_add_offset(&prog[i], bv->start_off); + break; + case RELO_BR_GO_OUT: + br_set_offset(&prog[i], + nfp_prog->tgt_out + bv->start_off); + break; + case RELO_BR_GO_ABORT: + br_set_offset(&prog[i], + nfp_prog->tgt_abort + bv->start_off); + break; + case RELO_BR_NEXT_PKT: + br_set_offset(&prog[i], bv->tgt_done); + break; + } + + prog[i] &= ~OP_RELO_TYPE; + } + + err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len); + if (err) + goto err_free_prog; + + return prog; + +err_free_prog: + kfree(prog); + return ERR_PTR(err); +} diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 50b16b6cad0bc..9ed76ccdd3c12 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -87,16 +87,21 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) static int nfp_bpf_vnic_alloc(struct nfp_app *app, struct nfp_net *nn, unsigned int id) { + struct nfp_bpf_vnic *bv; int err; - nn->app_priv = kzalloc(sizeof(struct nfp_bpf_vnic), GFP_KERNEL); - if (!nn->app_priv) + bv = kzalloc(sizeof(*bv), GFP_KERNEL); + if (!bv) return -ENOMEM; + nn->app_priv = bv; err = nfp_app_nic_vnic_alloc(app, nn, id); if (err) goto err_free_priv; + bv->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + bv->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + return 0; err_free_priv: kfree(nn->app_priv); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 0b1347f2afd1a..5deba5099618f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -42,15 +42,20 @@ #include "../nfp_asm.h" -/* For branch fixup logic use up-most byte of branch instruction as scratch +/* For relocation logic use up-most byte of branch instruction as scratch * area. Remember to clear this before sending instructions to HW! */ -#define OP_BR_SPECIAL 0xff00000000000000ULL - -enum br_special { - OP_BR_NORMAL = 0, - OP_BR_GO_OUT, - OP_BR_GO_ABORT, +#define OP_RELO_TYPE 0xff00000000000000ULL + +enum nfp_relo_type { + RELO_NONE = 0, + /* standard internal jumps */ + RELO_BR_REL, + /* internal jumps to parts of the outro */ + RELO_BR_GO_OUT, + RELO_BR_GO_ABORT, + /* external jumps to fixed addresses */ + RELO_BR_NEXT_PKT, }; enum static_regs { @@ -191,11 +196,9 @@ static inline bool is_mbpf_store(const struct nfp_insn_meta *meta) * @__prog_alloc_len: alloc size of @prog array * @verifier_meta: temporary storage for verifier's insn meta * @type: BPF program type - * @start_off: address of the first instruction in the memory * @last_bpf_off: address of the last instruction translated from BPF * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) - * @tgt_done: jump target to get the next packet * @n_translated: number of successfully translated instructions (for errors) * @error: error code if something went wrong * @stack_depth: max stack depth from the verifier @@ -213,11 +216,9 @@ struct nfp_prog { enum bpf_prog_type type; - unsigned int start_off; unsigned int last_bpf_off; unsigned int tgt_out; unsigned int tgt_abort; - unsigned int tgt_done; unsigned int n_translated; int error; @@ -231,9 +232,13 @@ struct nfp_prog { /** * struct nfp_bpf_vnic - per-vNIC BPF priv structure * @tc_prog: currently loaded cls_bpf program + * @start_off: address of the first instruction in the memory + * @tgt_done: jump target to get the next packet */ struct nfp_bpf_vnic { struct bpf_prog *tc_prog; + unsigned int start_off; + unsigned int tgt_done; }; void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt); @@ -257,4 +262,6 @@ int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns); + +void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 0ca6faaacc580..5f165e1d7648b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -130,10 +130,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, prog->aux->stack_depth, stack_size); return -EOPNOTSUPP; } - nfp_prog->stack_depth = round_up(prog->aux->stack_depth, 4); - nfp_prog->start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); - nfp_prog->tgt_done = nn_readw(nn, NFP_NET_CFG_BPF_DONE); max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); @@ -161,6 +158,7 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int max_mtu; dma_addr_t dma_addr; + void *img; int err; max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; @@ -169,11 +167,17 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) return -EOPNOTSUPP; } - dma_addr = dma_map_single(nn->dp.dev, nfp_prog->prog, + img = nfp_bpf_relo_for_vnic(nfp_prog, nn->app_priv); + if (IS_ERR(img)) + return PTR_ERR(img); + + dma_addr = dma_map_single(nn->dp.dev, img, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); - if (dma_mapping_error(nn->dp.dev, dma_addr)) + if (dma_mapping_error(nn->dp.dev, dma_addr)) { + kfree(img); return -ENOMEM; + } nn_writew(nn, NFP_NET_CFG_BPF_SIZE, nfp_prog->prog_len); nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, dma_addr); @@ -185,6 +189,7 @@ static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) dma_unmap_single(nn->dp.dev, dma_addr, nfp_prog->prog_len * sizeof(u64), DMA_TO_DEVICE); + kfree(img); return err; } From 44a12ecc1cab7dcf4647dfef7d94f5c559c01407 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:26:02 +0000 Subject: [PATCH 12/18] nfp: bpf: don't depend on high order allocations for program image The translator pre-allocates a buffer of maximal program size. Due to HW/FW limitations the program buffer can't currently be longer than 128Kb, so we used to kmalloc() it, and then map for DMA directly. Now that the late branch resolution is copying the program image anyway, we can just kvmalloc() the buffer. While at it, after translation reallocate the buffer to save space. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 16 ++++++++++++++++ drivers/net/ethernet/netronome/nfp/bpf/offload.c | 5 +++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 3a5c747fd12b4..5deebbc18cfd9 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2676,6 +2676,20 @@ static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len) return 0; } +static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog) +{ + void *prog; + + prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL); + if (!prog) + return; + + nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64); + memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len); + kvfree(nfp_prog->prog); + nfp_prog->prog = prog; +} + int nfp_bpf_jit(struct nfp_prog *nfp_prog) { int ret; @@ -2691,6 +2705,8 @@ int nfp_bpf_jit(struct nfp_prog *nfp_prog) return -EINVAL; } + nfp_bpf_prog_trim(nfp_prog); + return ret; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 5f165e1d7648b..f635605507535 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -135,7 +136,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); nfp_prog->__prog_alloc_len = max_instr * sizeof(u64); - nfp_prog->prog = kmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); + nfp_prog->prog = kvmalloc(nfp_prog->__prog_alloc_len, GFP_KERNEL); if (!nfp_prog->prog) return -ENOMEM; @@ -147,7 +148,7 @@ int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; - kfree(nfp_prog->prog); + kvfree(nfp_prog->prog); nfp_prog_free(nfp_prog); return 0; From e84797fe159c8f849edd96ab72cc21eb890656fe Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:26:03 +0000 Subject: [PATCH 13/18] nfp: bpf: use a large constant in unresolved branches To make absolute relocated branches (branches which will be completely rewritten with br_set_offset()) distinguishable in user space dumps from normal jumps add a large offset to them. Signed-off-by: Jakub Kicinski Reviewed-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 14 +++++++------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 6 ++++++ 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 5deebbc18cfd9..b4ed6bca4ea9e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -739,7 +739,7 @@ construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size) imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog)); - emit_br_relo(nfp_prog, BR_BLO, 0, 0, RELO_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ return data_ld(nfp_prog, imm_b(nfp_prog), 0, size); @@ -752,7 +752,7 @@ static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) /* Check packet length */ tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog)); emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg); - emit_br_relo(nfp_prog, BR_BLO, 0, 0, RELO_BR_GO_ABORT); + emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT); /* Load data */ tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); @@ -2026,7 +2026,7 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - emit_br_relo(nfp_prog, BR_UNC, 0, 0, RELO_BR_GO_OUT); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT); return 0; } @@ -2175,7 +2175,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); @@ -2202,7 +2202,7 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); - emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); emit_shf(nfp_prog, reg_b(2), reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); @@ -2221,7 +2221,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) /* Target for aborts */ nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); - emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16); @@ -2242,7 +2242,7 @@ static void nfp_outro_xdp(struct nfp_prog *nfp_prog) emit_shf(nfp_prog, reg_b(2), reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); - emit_br_relo(nfp_prog, BR_UNC, 0, 2, RELO_BR_NEXT_PKT); + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT); wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS); emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 5deba5099618f..8d02c39b9b10d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -58,6 +58,12 @@ enum nfp_relo_type { RELO_BR_NEXT_PKT, }; +/* To make absolute relocated branches (branches other than RELO_BR_REL) + * distinguishable in user space dumps from normal jumps, add a large offset + * to them. + */ +#define BR_OFF_RELO 15000 + enum static_regs { STATIC_REG_IMM = 21, /* Bank AB */ STATIC_REG_STACK = 22, /* Bank A */ From af93d15ac6c40d097b08c18a65a0414f94110401 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 10 Jan 2018 12:26:04 +0000 Subject: [PATCH 14/18] nfp: hand over to BPF offload app at coarser granularity Instead of having an app callback per message type hand off all offload-related handling to apps with one "rest of ndo_bpf" callback. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 5 +- drivers/net/ethernet/netronome/nfp/bpf/main.h | 8 +--- .../net/ethernet/netronome/nfp/bpf/offload.c | 25 +++++++--- drivers/net/ethernet/netronome/nfp/nfp_app.h | 47 +++++-------------- .../ethernet/netronome/nfp/nfp_net_common.c | 10 +--- 5 files changed, 34 insertions(+), 61 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index 9ed76ccdd3c12..e8cfe300c8c4f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -345,9 +345,6 @@ const struct nfp_app_type app_bpf = { .setup_tc = nfp_bpf_setup_tc, .tc_busy = nfp_bpf_tc_busy, + .bpf = nfp_ndo_bpf, .xdp_offload = nfp_bpf_xdp_offload, - - .bpf_verifier_prep = nfp_bpf_verifier_prep, - .bpf_translate = nfp_bpf_translate, - .bpf_destroy = nfp_bpf_destroy, }; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 8d02c39b9b10d..66381afee2a93 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -256,15 +256,11 @@ struct netdev_bpf; struct nfp_app; struct nfp_net; +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf); int nfp_net_bpf_offload(struct nfp_net *nn, struct bpf_prog *prog, bool old_prog); -int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf); -int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); -int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index f635605507535..320b2250d29a3 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -87,8 +87,9 @@ static void nfp_prog_free(struct nfp_prog *nfp_prog) kfree(nfp_prog); } -int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) +static int +nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) { struct bpf_prog *prog = bpf->verifier.prog; struct nfp_prog *nfp_prog; @@ -118,8 +119,7 @@ int nfp_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, return ret; } -int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) +static int nfp_bpf_translate(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; unsigned int stack_size; @@ -143,8 +143,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, return nfp_bpf_jit(nfp_prog); } -int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) +static int nfp_bpf_destroy(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; @@ -154,6 +153,20 @@ int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, return 0; } +int nfp_ndo_bpf(struct nfp_app *app, struct nfp_net *nn, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case BPF_OFFLOAD_VERIFIER_PREP: + return nfp_bpf_verifier_prep(app, nn, bpf); + case BPF_OFFLOAD_TRANSLATE: + return nfp_bpf_translate(nn, bpf->offload.prog); + case BPF_OFFLOAD_DESTROY: + return nfp_bpf_destroy(nn, bpf->offload.prog); + default: + return -EINVAL; + } +} + static int nfp_net_bpf_load(struct nfp_net *nn, struct bpf_prog *prog) { struct nfp_prog *nfp_prog = prog->aux->offload->dev_priv; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_app.h b/drivers/net/ethernet/netronome/nfp/nfp_app.h index e6b59c28c4ca4..32ff46a00f703 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_app.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_app.h @@ -89,10 +89,8 @@ extern const struct nfp_app_type app_flower; * @ctrl_msg_rx: control message handler * @setup_tc: setup TC ndo * @tc_busy: TC HW offload busy (rules loaded) + * @bpf: BPF ndo offload-related calls * @xdp_offload: offload an XDP program - * @bpf_verifier_prep: verifier prep for dev-specific BPF programs - * @bpf_translate: translate call for dev-specific BPF programs - * @bpf_destroy: destroy for dev-specific BPF programs * @eswitch_mode_get: get SR-IOV eswitch mode * @sriov_enable: app-specific sriov initialisation * @sriov_disable: app-specific sriov clean-up @@ -133,14 +131,10 @@ struct nfp_app_type { int (*setup_tc)(struct nfp_app *app, struct net_device *netdev, enum tc_setup_type type, void *type_data); bool (*tc_busy)(struct nfp_app *app, struct nfp_net *nn); + int (*bpf)(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *xdp); int (*xdp_offload)(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); - int (*bpf_verifier_prep)(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf); - int (*bpf_translate)(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); - int (*bpf_destroy)(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog); int (*sriov_enable)(struct nfp_app *app, int num_vfs); void (*sriov_disable)(struct nfp_app *app); @@ -316,6 +310,14 @@ static inline int nfp_app_setup_tc(struct nfp_app *app, return app->type->setup_tc(app, netdev, type, type_data); } +static inline int nfp_app_bpf(struct nfp_app *app, struct nfp_net *nn, + struct netdev_bpf *bpf) +{ + if (!app || !app->type->bpf) + return -EINVAL; + return app->type->bpf(app, nn, bpf); +} + static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog) { @@ -324,33 +326,6 @@ static inline int nfp_app_xdp_offload(struct nfp_app *app, struct nfp_net *nn, return app->type->xdp_offload(app, nn, prog); } -static inline int -nfp_app_bpf_verifier_prep(struct nfp_app *app, struct nfp_net *nn, - struct netdev_bpf *bpf) -{ - if (!app || !app->type->bpf_verifier_prep) - return -EOPNOTSUPP; - return app->type->bpf_verifier_prep(app, nn, bpf); -} - -static inline int -nfp_app_bpf_translate(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) -{ - if (!app || !app->type->bpf_translate) - return -EOPNOTSUPP; - return app->type->bpf_translate(app, nn, prog); -} - -static inline int -nfp_app_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, - struct bpf_prog *prog) -{ - if (!app || !app->type->bpf_destroy) - return -EOPNOTSUPP; - return app->type->bpf_destroy(app, nn, prog); -} - static inline bool nfp_app_ctrl_tx(struct nfp_app *app, struct sk_buff *skb) { trace_devlink_hwmsg(priv_to_devlink(app->pf), false, 0, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 33cbb9b35e68f..caee147fce043 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3414,16 +3414,8 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; xdp->prog_flags = nn->xdp_prog ? nn->xdp_flags : 0; return 0; - case BPF_OFFLOAD_VERIFIER_PREP: - return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); - case BPF_OFFLOAD_TRANSLATE: - return nfp_app_bpf_translate(nn->app, nn, - xdp->offload.prog); - case BPF_OFFLOAD_DESTROY: - return nfp_app_bpf_destroy(nn->app, nn, - xdp->offload.prog); default: - return -EINVAL; + return nfp_app_bpf(nn->app, nn, xdp); } } From c087aa8bbf83ddcd54e49f42cf463bcea5bd2b94 Mon Sep 17 00:00:00 2001 From: Nic Viljoen Date: Wed, 10 Jan 2018 12:26:05 +0000 Subject: [PATCH 15/18] nfp: bpf: add signed jump insns This patch adds signed jump instructions (jsgt, jsge, jslt, jsle) to the nfp jit. As well as adding the additional required raw assembler branch mask to nfp_asm.h Signed-off-by: Nic Viljoen Reviewed-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 48 ++++++++++++++++++++ drivers/net/ethernet/netronome/nfp/nfp_asm.h | 1 + 2 files changed, 49 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index b4ed6bca4ea9e..47c5224f8d6ff 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1914,6 +1914,26 @@ static int jle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); } +static int jsgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLT, true); +} + +static int jsge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BGE, false); +} + +static int jslt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLT, false); +} + +static int jsle_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BGE, true); +} + static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; @@ -2003,6 +2023,26 @@ static int jle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); } +static int jsgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLT, true); +} + +static int jsge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BGE, false); +} + +static int jslt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLT, false); +} + +static int jsle_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BGE, true); +} + static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); @@ -2087,6 +2127,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, [BPF_JMP | BPF_JLT | BPF_K] = jlt_imm, [BPF_JMP | BPF_JLE | BPF_K] = jle_imm, + [BPF_JMP | BPF_JSGT | BPF_K] = jsgt_imm, + [BPF_JMP | BPF_JSGE | BPF_K] = jsge_imm, + [BPF_JMP | BPF_JSLT | BPF_K] = jslt_imm, + [BPF_JMP | BPF_JSLE | BPF_K] = jsle_imm, [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, @@ -2094,6 +2138,10 @@ static const instr_cb_t instr_cb[256] = { [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, [BPF_JMP | BPF_JLT | BPF_X] = jlt_reg, [BPF_JMP | BPF_JLE | BPF_X] = jle_reg, + [BPF_JMP | BPF_JSGT | BPF_X] = jsgt_reg, + [BPF_JMP | BPF_JSGE | BPF_X] = jsge_reg, + [BPF_JMP | BPF_JSLT | BPF_X] = jslt_reg, + [BPF_JMP | BPF_JSLE | BPF_X] = jsle_reg, [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, [BPF_JMP | BPF_CALL] = call, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index a50240ab0ce25..20e51cb60e69b 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -81,6 +81,7 @@ enum br_mask { BR_BHS = 0x04, BR_BLO = 0x05, BR_BGE = 0x08, + BR_BLT = 0x09, BR_UNC = 0x18, }; From 430e68d10baf55e4c40d4dd1de8201c1caf5dddd Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 10 Jan 2018 12:26:06 +0000 Subject: [PATCH 16/18] bpf: export function to write into verifier log buffer Rename the BPF verifier `verbose()` to `bpf_verifier_log_write()` and export it, so that other components (in particular, drivers for BPF offload) can reuse the user buffer log to dump error messages at verification time. Renaming `verbose()` was necessary in order to avoid a name so generic to be exported to the global namespace. However to prevent too much pain for backports, the calls to `verbose()` in the kernel BPF verifier were not changed. Instead, use function aliasing to make `verbose` point to `bpf_verifier_log_write`. Another solution could consist in making a wrapper around `verbose()`, but since it is a variadic function, I don't see a clean way without creating two identical wrappers, one for the verifier and one to export. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 3 +++ kernel/bpf/verifier.c | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 2feb218c001d9..6b66cd1aa0b90 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -192,6 +192,9 @@ struct bpf_verifier_env { u32 subprog_cnt; }; +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...); + static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) { struct bpf_verifier_state *cur = env->cur_state; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d921ab387b0b0..3b2b476661803 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -169,11 +169,11 @@ struct bpf_call_arg_meta { static DEFINE_MUTEX(bpf_verifier_lock); /* log_level controls verbosity level of eBPF verifier. - * verbose() is used to dump the verification trace to the log, so the user - * can figure out what's wrong with the program + * bpf_verifier_log_write() is used to dump the verification trace to the log, + * so the user can figure out what's wrong with the program */ -static __printf(2, 3) void verbose(struct bpf_verifier_env *env, - const char *fmt, ...) +__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, + const char *fmt, ...) { struct bpf_verifer_log *log = &env->log; unsigned int n; @@ -197,6 +197,14 @@ static __printf(2, 3) void verbose(struct bpf_verifier_env *env, else log->ubuf = NULL; } +EXPORT_SYMBOL_GPL(bpf_verifier_log_write); +/* Historically bpf_verifier_log_write was called verbose, but the name was too + * generic for symbol export. The function was renamed, but not the calls in + * the verifier to avoid complicating backports. Hence the alias below. + */ +static __printf(2, 3) void verbose(struct bpf_verifier_env *env, + const char *fmt, ...) + __attribute__((alias("bpf_verifier_log_write"))); static bool type_is_pkt_pointer(enum bpf_reg_type type) { From ff627e3d07a07f7ed1105f459ee9586d4be7818e Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 10 Jan 2018 12:26:07 +0000 Subject: [PATCH 17/18] nfp: bpf: reuse verifier log for debug messages Now that `bpf_verifier_log_write()` is exported from the verifier and makes it possible to reuse the verifier log to print messages to the standard output, use this instead of the kernel logs in the nfp driver for printing error messages occurring at verification time. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- .../net/ethernet/netronome/nfp/bpf/verifier.c | 30 ++++++++++--------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index d8870c2f11f39..7890d95d4018b 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -31,8 +31,6 @@ * SOFTWARE. */ -#define pr_fmt(fmt) "NFP net bpf: " fmt - #include #include #include @@ -41,6 +39,9 @@ #include "fw.h" #include "main.h" +#define pr_vlog(env, fmt, ...) \ + bpf_verifier_log_write(env, "[nfp] " fmt, ##__VA_ARGS__) + struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns) @@ -116,18 +117,18 @@ nfp_bpf_check_call(struct nfp_prog *nfp_prog, struct bpf_verifier_env *env, switch (func_id) { case BPF_FUNC_xdp_adjust_head: if (!bpf->adjust_head.off_max) { - pr_warn("adjust_head not supported by FW\n"); + pr_vlog(env, "adjust_head not supported by FW\n"); return -EOPNOTSUPP; } if (!(bpf->adjust_head.flags & NFP_BPF_ADJUST_HEAD_NO_META)) { - pr_warn("adjust_head: FW requires shifting metadata, not supported by the driver\n"); + pr_vlog(env, "adjust_head: FW requires shifting metadata, not supported by the driver\n"); return -EOPNOTSUPP; } nfp_record_adjust_head(bpf, nfp_prog, meta, reg2); break; default: - pr_warn("unsupported function id: %d\n", func_id); + pr_vlog(env, "unsupported function id: %d\n", func_id); return -EOPNOTSUPP; } @@ -150,7 +151,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, char tn_buf[48]; tnum_strn(tn_buf, sizeof(tn_buf), reg0->var_off); - pr_info("unsupported exit state: %d, var_off: %s\n", + pr_vlog(env, "unsupported exit state: %d, var_off: %s\n", reg0->type, tn_buf); return -EINVAL; } @@ -160,7 +161,7 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, imm <= TC_ACT_REDIRECT && imm != TC_ACT_SHOT && imm != TC_ACT_STOLEN && imm != TC_ACT_QUEUED) { - pr_info("unsupported exit state: %d, imm: %llx\n", + pr_vlog(env, "unsupported exit state: %d, imm: %llx\n", reg0->type, imm); return -EINVAL; } @@ -171,12 +172,13 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, static int nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, - const struct bpf_reg_state *reg) + const struct bpf_reg_state *reg, + struct bpf_verifier_env *env) { s32 old_off, new_off; if (!tnum_is_const(reg->var_off)) { - pr_info("variable ptr stack access\n"); + pr_vlog(env, "variable ptr stack access\n"); return -EINVAL; } @@ -194,7 +196,7 @@ nfp_bpf_check_stack_access(struct nfp_prog *nfp_prog, if (old_off % 4 == new_off % 4) return 0; - pr_info("stack access changed location was:%d is:%d\n", + pr_vlog(env, "stack access changed location was:%d is:%d\n", old_off, new_off); return -EINVAL; } @@ -209,18 +211,18 @@ nfp_bpf_check_ptr(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, if (reg->type != PTR_TO_CTX && reg->type != PTR_TO_STACK && reg->type != PTR_TO_PACKET) { - pr_info("unsupported ptr type: %d\n", reg->type); + pr_vlog(env, "unsupported ptr type: %d\n", reg->type); return -EINVAL; } if (reg->type == PTR_TO_STACK) { - err = nfp_bpf_check_stack_access(nfp_prog, meta, reg); + err = nfp_bpf_check_stack_access(nfp_prog, meta, reg, env); if (err) return err; } if (meta->ptr.type != NOT_INIT && meta->ptr.type != reg->type) { - pr_info("ptr type changed for instruction %d -> %d\n", + pr_vlog(env, "ptr type changed for instruction %d -> %d\n", meta->ptr.type, reg->type); return -EINVAL; } @@ -241,7 +243,7 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (meta->insn.src_reg >= MAX_BPF_REG || meta->insn.dst_reg >= MAX_BPF_REG) { - pr_err("program uses extended registers - jit hardening?\n"); + pr_vlog(env, "program uses extended registers - jit hardening?\n"); return -EINVAL; } From 36e04a2d78d97cc3a02a168541dfa00c8e4b30f2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 10 Jan 2018 18:21:44 +0100 Subject: [PATCH 18/18] samples/bpf: xdp2skb_meta shows transferring info from XDP to SKB Creating a bpf sample that shows howto use the XDP 'data_meta' infrastructure, created by Daniel Borkmann. Very few drivers support this feature, but I wanted a functional sample to begin with, when working on adding driver support. XDP data_meta is about creating a communication channel between BPF programs. This can be XDP tail-progs, but also other SKB based BPF hooks, like in this case the TC clsact hook. In this sample I show that XDP can store info named "mark", and TC/clsact chooses to use this info and store it into the skb->mark. It is a bit annoying that XDP and TC samples uses different tools/libs when attaching their BPF hooks. As the XDP and TC programs need to cooperate and agree on a struct-layout, it is best/easiest if the two programs can be contained within the same BPF restricted-C file. As the bpf-loader, I choose to not use bpf_load.c (or libbpf), but instead wrote a bash shell scripted named xdp2skb_meta.sh, which demonstrate howto use the iproute cmdline tools 'tc' and 'ip' for loading BPF programs. To make it easy for first time users, the shell script have command line parsing, and support --verbose and --dry-run mode, if you just want to see/learn the tc+ip command syntax: # ./xdp2skb_meta.sh --dev ixgbe2 --dry-run # Dry-run mode: enable VERBOSE and don't call TC+IP tc qdisc del dev ixgbe2 clsact tc qdisc add dev ixgbe2 clsact tc filter add dev ixgbe2 ingress prio 1 handle 1 bpf da obj ./xdp2skb_meta_kern.o sec tc_mark # Flush XDP on device: ixgbe2 ip link set dev ixgbe2 xdp off ip link set dev ixgbe2 xdp obj ./xdp2skb_meta_kern.o sec xdp_mark Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- samples/bpf/Makefile | 1 + samples/bpf/xdp2skb_meta.sh | 220 ++++++++++++++++++++++++++++++++ samples/bpf/xdp2skb_meta_kern.c | 103 +++++++++++++++ 3 files changed, 324 insertions(+) create mode 100755 samples/bpf/xdp2skb_meta.sh create mode 100644 samples/bpf/xdp2skb_meta_kern.c diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 3ff7a05bea9a7..7f61a3d57fa71 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -142,6 +142,7 @@ always += xdp_redirect_map_kern.o always += xdp_redirect_cpu_kern.o always += xdp_monitor_kern.o always += xdp_rxq_info_kern.o +always += xdp2skb_meta_kern.o always += syscall_tp_kern.o HOSTCFLAGS += -I$(objtree)/usr/include diff --git a/samples/bpf/xdp2skb_meta.sh b/samples/bpf/xdp2skb_meta.sh new file mode 100755 index 0000000000000..b9c9549c4c272 --- /dev/null +++ b/samples/bpf/xdp2skb_meta.sh @@ -0,0 +1,220 @@ +#!/bin/bash +# +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. +# +# Bash-shell example on using iproute2 tools 'tc' and 'ip' to load +# eBPF programs, both for XDP and clsbpf. Shell script function +# wrappers and even long options parsing is illustrated, for ease of +# use. +# +# Related to sample/bpf/xdp2skb_meta_kern.c, which contains BPF-progs +# that need to collaborate between XDP and TC hooks. Thus, it is +# convenient that the same tool load both programs that need to work +# together. +# +BPF_FILE=xdp2skb_meta_kern.o +DIR=$(dirname $0) + +export TC=/usr/sbin/tc +export IP=/usr/sbin/ip + +function usage() { + echo "" + echo "Usage: $0 [-vfh] --dev ethX" + echo " -d | --dev : Network device (required)" + echo " --flush : Cleanup flush TC and XDP progs" + echo " --list : (\$LIST) List TC and XDP progs" + echo " -v | --verbose : (\$VERBOSE) Verbose" + echo " --dry-run : (\$DRYRUN) Dry-run only (echo commands)" + echo "" +} + +## -- General shell logging cmds -- +function err() { + local exitcode=$1 + shift + echo "ERROR: $@" >&2 + exit $exitcode +} + +function info() { + if [[ -n "$VERBOSE" ]]; then + echo "# $@" + fi +} + +## -- Helper function calls -- + +# Wrapper call for TC and IP +# - Will display the offending command on failure +function _call_cmd() { + local cmd="$1" + local allow_fail="$2" + shift 2 + if [[ -n "$VERBOSE" ]]; then + echo "$(basename $cmd) $@" + fi + if [[ -n "$DRYRUN" ]]; then + return + fi + $cmd "$@" + local status=$? + if (( $status != 0 )); then + if [[ "$allow_fail" == "" ]]; then + err 2 "Exec error($status) occurred cmd: \"$cmd $@\"" + fi + fi +} +function call_tc() { + _call_cmd "$TC" "" "$@" +} +function call_tc_allow_fail() { + _call_cmd "$TC" "allow_fail" "$@" +} +function call_ip() { + _call_cmd "$IP" "" "$@" +} + +## --- Parse command line arguments / parameters --- +# Using external program "getopt" to get --long-options +OPTIONS=$(getopt -o vfhd: \ + --long verbose,flush,help,list,dev:,dry-run -- "$@") +if (( $? != 0 )); then + err 4 "Error calling getopt" +fi +eval set -- "$OPTIONS" + +unset DEV +unset FLUSH +while true; do + case "$1" in + -d | --dev ) # device + DEV=$2 + info "Device set to: DEV=$DEV" >&2 + shift 2 + ;; + -v | --verbose) + VERBOSE=yes + # info "Verbose mode: VERBOSE=$VERBOSE" >&2 + shift + ;; + --dry-run ) + DRYRUN=yes + VERBOSE=yes + info "Dry-run mode: enable VERBOSE and don't call TC+IP" >&2 + shift + ;; + -f | --flush ) + FLUSH=yes + shift + ;; + --list ) + LIST=yes + shift + ;; + -- ) + shift + break + ;; + -h | --help ) + usage; + exit 0 + ;; + * ) + shift + break + ;; + esac +done + +FILE="$DIR/$BPF_FILE" +if [[ ! -e $FILE ]]; then + err 3 "Missing BPF object file ($FILE)" +fi + +if [[ -z $DEV ]]; then + usage + err 2 "Please specify network device -- required option --dev" +fi + +## -- Function calls -- + +function list_tc() +{ + local device="$1" + shift + info "Listing current TC ingress rules" + call_tc filter show dev $device ingress +} + +function list_xdp() +{ + local device="$1" + shift + info "Listing current XDP device($device) setting" + call_ip link show dev $device | grep --color=auto xdp +} + +function flush_tc() +{ + local device="$1" + shift + info "Flush TC on device: $device" + call_tc_allow_fail filter del dev $device ingress + call_tc_allow_fail qdisc del dev $device clsact +} + +function flush_xdp() +{ + local device="$1" + shift + info "Flush XDP on device: $device" + call_ip link set dev $device xdp off +} + +function attach_tc_mark() +{ + local device="$1" + local file="$2" + local prog="tc_mark" + shift 2 + + # Re-attach clsact to clear/flush existing role + call_tc_allow_fail qdisc del dev $device clsact 2> /dev/null + call_tc qdisc add dev $device clsact + + # Attach BPF prog + call_tc filter add dev $device ingress \ + prio 1 handle 1 bpf da obj $file sec $prog +} + +function attach_xdp_mark() +{ + local device="$1" + local file="$2" + local prog="xdp_mark" + shift 2 + + # Remove XDP prog in-case it's already loaded + # TODO: Need ip-link option to override/replace existing XDP prog + flush_xdp $device + + # Attach XDP/BPF prog + call_ip link set dev $device xdp obj $file sec $prog +} + +if [[ -n $FLUSH ]]; then + flush_tc $DEV + flush_xdp $DEV + exit 0 +fi + +if [[ -n $LIST ]]; then + list_tc $DEV + list_xdp $DEV + exit 0 +fi + +attach_tc_mark $DEV $FILE +attach_xdp_mark $DEV $FILE diff --git a/samples/bpf/xdp2skb_meta_kern.c b/samples/bpf/xdp2skb_meta_kern.c new file mode 100644 index 0000000000000..12e1024069c24 --- /dev/null +++ b/samples/bpf/xdp2skb_meta_kern.c @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 + * Copyright (c) 2018 Jesper Dangaard Brouer, Red Hat Inc. + * + * Example howto transfer info from XDP to SKB, e.g. skb->mark + * ----------------------------------------------------------- + * This uses the XDP data_meta infrastructure, and is a cooperation + * between two bpf-programs (1) XDP and (2) clsact at TC-ingress hook. + * + * Notice: This example does not use the BPF C-loader (bpf_load.c), + * but instead rely on the iproute2 TC tool for loading BPF-objects. + */ +#include +#include + +#include "bpf_helpers.h" + +/* + * This struct is stored in the XDP 'data_meta' area, which is located + * just in-front-of the raw packet payload data. The meaning is + * specific to these two BPF programs that use it as a communication + * channel. XDP adjust/increase the area via a bpf-helper, and TC use + * boundary checks to see if data have been provided. + * + * The struct must be 4 byte aligned, which here is enforced by the + * struct __attribute__((aligned(4))). + */ +struct meta_info { + __u32 mark; +} __attribute__((aligned(4))); + +SEC("xdp_mark") +int _xdp_mark(struct xdp_md *ctx) +{ + struct meta_info *meta; + void *data, *data_end; + int ret; + + /* Reserve space in-front data pointer for our meta info. + * (Notice drivers not supporting data_meta will fail here!) + */ + ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(*meta)); + if (ret < 0) + return XDP_ABORTED; + + /* For some unknown reason, these ctx pointers must be read + * after bpf_xdp_adjust_meta, else verifier will reject prog. + */ + data = (void *)(unsigned long)ctx->data; + + /* Check data_meta have room for meta_info struct */ + meta = (void *)(unsigned long)ctx->data_meta; + if (meta + 1 > data) + return XDP_ABORTED; + + meta->mark = 42; + + return XDP_PASS; +} + +SEC("tc_mark") +int _tc_mark(struct __sk_buff *ctx) +{ + void *data = (void *)(unsigned long)ctx->data; + void *data_end = (void *)(unsigned long)ctx->data_end; + void *data_meta = (void *)(unsigned long)ctx->data_meta; + struct meta_info *meta = data_meta; + + /* Check XDP gave us some data_meta */ + if (meta + 1 > data) { + ctx->mark = 41; + /* Skip "accept" if no data_meta is avail */ + return TC_ACT_OK; + } + + /* Hint: See func tc_cls_act_is_valid_access() for BPF_WRITE access */ + ctx->mark = meta->mark; /* Transfer XDP-mark to SKB-mark */ + + return TC_ACT_OK; +} + +/* Manually attaching these programs: +export DEV=ixgbe2 +export FILE=xdp2skb_meta_kern.o + +# via TC command +tc qdisc del dev $DEV clsact 2> /dev/null +tc qdisc add dev $DEV clsact +tc filter add dev $DEV ingress prio 1 handle 1 bpf da obj $FILE sec tc_mark +tc filter show dev $DEV ingress + +# XDP via IP command: +ip link set dev $DEV xdp off +ip link set dev $DEV xdp obj $FILE sec xdp_mark + +# Use iptable to "see" if SKBs are marked +iptables -I INPUT -p icmp -m mark --mark 41 # == 0x29 +iptables -I INPUT -p icmp -m mark --mark 42 # == 0x2a + +# Hint: catch XDP_ABORTED errors via +perf record -e xdp:* +perf script + +*/