Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Alexei Starovoitov says:

====================
pull-request: bpf-next 2020-12-03

The main changes are:

1) Support BTF in kernel modules, from Andrii.

2) Introduce preferred busy-polling, from Björn.

3) bpf_ima_inode_hash() and bpf_bprm_opts_set() helpers, from KP Singh.

4) Memcg-based memory accounting for bpf objects, from Roman.

5) Allow bpf_{s,g}etsockopt from cgroup bind{4,6} hooks, from Stanislav.

* https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (118 commits)
  selftests/bpf: Fix invalid use of strncat in test_sockmap
  libbpf: Use memcpy instead of strncpy to please GCC
  selftests/bpf: Add fentry/fexit/fmod_ret selftest for kernel module
  selftests/bpf: Add tp_btf CO-RE reloc test for modules
  libbpf: Support attachment of BPF tracing programs to kernel modules
  libbpf: Factor out low-level BPF program loading helper
  bpf: Allow to specify kernel module BTFs when attaching BPF programs
  bpf: Remove hard-coded btf_vmlinux assumption from BPF verifier
  selftests/bpf: Add CO-RE relocs selftest relying on kernel module BTF
  selftests/bpf: Add support for marking sub-tests as skipped
  selftests/bpf: Add bpf_testmod kernel module for testing
  libbpf: Add kernel module BTF support for CO-RE relocations
  libbpf: Refactor CO-RE relocs to not assume a single BTF object
  libbpf: Add internal helper to load BTF data by FD
  bpf: Keep module's btf_data_size intact after load
  bpf: Fix bpf_put_raw_tracepoint()'s use of __module_address()
  selftests/bpf: Add Userspace tests for TCP_WINDOW_CLAMP
  bpf: Adds support for setting window clamp
  samples/bpf: Fix spelling mistake "recieving" -> "receiving"
  bpf: Fix cold build of test_progs-no_alu32
  ...
====================

Link: https://lore.kernel.org/r/20201204021936.85653-1-alexei.starovoitov@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
  • Loading branch information
Jakub Kicinski committed Dec 4, 2020
2 parents 55fd59b + eceae70 commit a1dd1d8
Show file tree
Hide file tree
Showing 196 changed files with 4,330 additions and 2,569 deletions.
3 changes: 3 additions & 0 deletions arch/alpha/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@

#define SO_DETACH_REUSEPORT_BPF 68

#define SO_PREFER_BUSY_POLL 69
#define SO_BUSY_POLL_BUDGET 70

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
3 changes: 3 additions & 0 deletions arch/mips/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@

#define SO_DETACH_REUSEPORT_BPF 68

#define SO_PREFER_BUSY_POLL 69
#define SO_BUSY_POLL_BUDGET 70

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
3 changes: 3 additions & 0 deletions arch/parisc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@

#define SO_DETACH_REUSEPORT_BPF 0x4042

#define SO_PREFER_BUSY_POLL 0x4043
#define SO_BUSY_POLL_BUDGET 0x4044

#if !defined(__KERNEL__)

#if __BITS_PER_LONG == 64
Expand Down
3 changes: 3 additions & 0 deletions arch/sparc/include/uapi/asm/socket.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@

#define SO_DETACH_REUSEPORT_BPF 0x0047

#define SO_PREFER_BUSY_POLL 0x0048
#define SO_BUSY_POLL_BUDGET 0x0049

#if !defined(__KERNEL__)


Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/amazon/ena/ena_netdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring)
{
int rc;

rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid);
rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0);

if (rc) {
netif_err(rx_ring->adapter, ifup, rx_ring->netdev,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/broadcom/bnxt/bnxt.c
Original file line number Diff line number Diff line change
Expand Up @@ -2884,7 +2884,7 @@ static int bnxt_alloc_rx_rings(struct bnxt *bp)
if (rc)
return rc;

rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i);
rc = xdp_rxq_info_reg(&rxr->xdp_rxq, bp->dev, i, 0);
if (rc < 0)
return rc;

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/cavium/thunder/nicvf_queues.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
rq->caching = 1;

/* Driver have no proper error path for failed XDP RX-queue info reg */
WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx) < 0);
WARN_ON(xdp_rxq_info_reg(&rq->xdp_rxq, nic->netdev, qidx, 0) < 0);

/* Send a mailbox msg to PF to config RQ */
mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
Original file line number Diff line number Diff line change
Expand Up @@ -3334,7 +3334,7 @@ static int dpaa2_eth_setup_rx_flow(struct dpaa2_eth_priv *priv,
return 0;

err = xdp_rxq_info_reg(&fq->channel->xdp_rxq, priv->net_dev,
fq->flowid);
fq->flowid, 0);
if (err) {
dev_err(dev, "xdp_rxq_info_reg failed\n");
return err;
Expand Down
13 changes: 12 additions & 1 deletion drivers/net/ethernet/intel/i40e/i40e_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,8 @@ void i40e_free_tx_resources(struct i40e_ring *tx_ring)
i40e_clean_tx_ring(tx_ring);
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
kfree(tx_ring->xsk_descs);
tx_ring->xsk_descs = NULL;

if (tx_ring->desc) {
dma_free_coherent(tx_ring->dev, tx_ring->size,
Expand Down Expand Up @@ -1277,6 +1279,13 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
if (!tx_ring->tx_bi)
goto err;

if (ring_is_xdp(tx_ring)) {
tx_ring->xsk_descs = kcalloc(I40E_MAX_NUM_DESCRIPTORS, sizeof(*tx_ring->xsk_descs),
GFP_KERNEL);
if (!tx_ring->xsk_descs)
goto err;
}

u64_stats_init(&tx_ring->syncp);

/* round up to nearest 4K */
Expand All @@ -1300,6 +1309,8 @@ int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
return 0;

err:
kfree(tx_ring->xsk_descs);
tx_ring->xsk_descs = NULL;
kfree(tx_ring->tx_bi);
tx_ring->tx_bi = NULL;
return -ENOMEM;
Expand Down Expand Up @@ -1436,7 +1447,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
/* XDP RX-queue info only needed for RX rings exposed to XDP */
if (rx_ring->vsi->type == I40E_VSI_MAIN) {
err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->queue_index);
rx_ring->queue_index, rx_ring->q_vector->napi.napi_id);
if (err < 0)
return err;
}
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/intel/i40e/i40e_txrx.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ struct i40e_ring {
struct i40e_channel *ch;
struct xdp_rxq_info xdp_rxq;
struct xsk_buff_pool *xsk_pool;
struct xdp_desc *xsk_descs; /* For storing descriptors in the AF_XDP ZC path */
} ____cacheline_internodealigned_in_smp;

static inline bool ring_uses_build_skb(struct i40e_ring *ring)
Expand Down
123 changes: 84 additions & 39 deletions drivers/net/ethernet/intel/i40e/i40e_xsk.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
/* Copyright(c) 2018 Intel Corporation. */

#include <linux/bpf_trace.h>
#include <linux/stringify.h>
#include <net/xdp_sock_drv.h>
#include <net/xdp.h>

Expand Down Expand Up @@ -380,58 +381,102 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
return failure ? budget : (int)total_rx_packets;
}

/**
* i40e_xmit_zc - Performs zero-copy Tx AF_XDP
* @xdp_ring: XDP Tx ring
* @budget: NAPI budget
*
* Returns true if the work is finished.
**/
static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
static void i40e_xmit_pkt(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
unsigned int *total_bytes)
{
unsigned int sent_frames = 0, total_bytes = 0;
struct i40e_tx_desc *tx_desc = NULL;
struct i40e_tx_buffer *tx_bi;
struct xdp_desc desc;
struct i40e_tx_desc *tx_desc;
dma_addr_t dma;

while (budget-- > 0) {
if (!xsk_tx_peek_desc(xdp_ring->xsk_pool, &desc))
break;
dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len);

dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc.addr);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma,
desc.len);
tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use++);
tx_desc->buffer_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC | I40E_TX_DESC_CMD_EOP,
0, desc->len, 0);

tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
tx_bi->bytecount = desc.len;
*total_bytes += desc->len;
}

tx_desc = I40E_TX_DESC(xdp_ring, xdp_ring->next_to_use);
tx_desc->buffer_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz =
build_ctob(I40E_TX_DESC_CMD_ICRC
| I40E_TX_DESC_CMD_EOP,
0, desc.len, 0);
static void i40e_xmit_pkt_batch(struct i40e_ring *xdp_ring, struct xdp_desc *desc,
unsigned int *total_bytes)
{
u16 ntu = xdp_ring->next_to_use;
struct i40e_tx_desc *tx_desc;
dma_addr_t dma;
u32 i;

sent_frames++;
total_bytes += tx_bi->bytecount;
loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) {
dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc[i].addr);
xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc[i].len);

xdp_ring->next_to_use++;
if (xdp_ring->next_to_use == xdp_ring->count)
xdp_ring->next_to_use = 0;
tx_desc = I40E_TX_DESC(xdp_ring, ntu++);
tx_desc->buffer_addr = cpu_to_le64(dma);
tx_desc->cmd_type_offset_bsz = build_ctob(I40E_TX_DESC_CMD_ICRC |
I40E_TX_DESC_CMD_EOP,
0, desc[i].len, 0);

*total_bytes += desc[i].len;
}

if (tx_desc) {
/* Request an interrupt for the last frame and bump tail ptr. */
tx_desc->cmd_type_offset_bsz |= (I40E_TX_DESC_CMD_RS <<
I40E_TXD_QW1_CMD_SHIFT);
i40e_xdp_ring_update_tail(xdp_ring);
xdp_ring->next_to_use = ntu;
}

static void i40e_fill_tx_hw_ring(struct i40e_ring *xdp_ring, struct xdp_desc *descs, u32 nb_pkts,
unsigned int *total_bytes)
{
u32 batched, leftover, i;

batched = nb_pkts & ~(PKTS_PER_BATCH - 1);
leftover = nb_pkts & (PKTS_PER_BATCH - 1);
for (i = 0; i < batched; i += PKTS_PER_BATCH)
i40e_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes);
for (i = batched; i < batched + leftover; i++)
i40e_xmit_pkt(xdp_ring, &descs[i], total_bytes);
}

xsk_tx_release(xdp_ring->xsk_pool);
i40e_update_tx_stats(xdp_ring, sent_frames, total_bytes);
static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
{
u16 ntu = xdp_ring->next_to_use ? xdp_ring->next_to_use - 1 : xdp_ring->count - 1;
struct i40e_tx_desc *tx_desc;

tx_desc = I40E_TX_DESC(xdp_ring, ntu);
tx_desc->cmd_type_offset_bsz |= (I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT);
}

/**
* i40e_xmit_zc - Performs zero-copy Tx AF_XDP
* @xdp_ring: XDP Tx ring
* @budget: NAPI budget
*
* Returns true if the work is finished.
**/
static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
{
struct xdp_desc *descs = xdp_ring->xsk_descs;
u32 nb_pkts, nb_processed = 0;
unsigned int total_bytes = 0;

nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, descs, budget);
if (!nb_pkts)
return false;

if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) {
nb_processed = xdp_ring->count - xdp_ring->next_to_use;
i40e_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes);
xdp_ring->next_to_use = 0;
}

return !!budget;
i40e_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed,
&total_bytes);

/* Request an interrupt for the last frame and bump tail ptr. */
i40e_set_rs_bit(xdp_ring);
i40e_xdp_ring_update_tail(xdp_ring);

i40e_update_tx_stats(xdp_ring, nb_pkts, total_bytes);

return true;
}

/**
Expand Down
16 changes: 16 additions & 0 deletions drivers/net/ethernet/intel/i40e/i40e_xsk.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@
#ifndef _I40E_XSK_H_
#define _I40E_XSK_H_

/* This value should match the pragma in the loop_unrolled_for
* macro. Why 4? It is strictly empirical. It seems to be a good
* compromise between the advantage of having simultaneous outstanding
* reads to the DMA array that can hide each others latency and the
* disadvantage of having a larger code path.
*/
#define PKTS_PER_BATCH 4

#ifdef __clang__
#define loop_unrolled_for _Pragma("clang loop unroll_count(4)") for
#elif __GNUC__ >= 8
#define loop_unrolled_for _Pragma("GCC unroll 4") for
#else
#define loop_unrolled_for for
#endif

struct i40e_vsi;
struct xsk_buff_pool;
struct zero_copy_allocator;
Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ethernet/intel/ice/ice_base.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
/* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq, ring->netdev,
ring->q_index);
ring->q_index, ring->q_vector->napi.napi_id);

ring->xsk_pool = ice_xsk_pool(ring);
if (ring->xsk_pool) {
Expand All @@ -333,7 +333,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
/* coverity[check_return] */
xdp_rxq_info_reg(&ring->xdp_rxq,
ring->netdev,
ring->q_index);
ring->q_index, ring->q_vector->napi.napi_id);

err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
MEM_TYPE_PAGE_SHARED,
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/ice/ice_txrx.c
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
if (rx_ring->vsi->type == ICE_VSI_PF &&
!xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->q_index))
rx_ring->q_index, rx_ring->q_vector->napi.napi_id))
goto err;
return 0;

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/igb/igb_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -4352,7 +4352,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring)

/* XDP RX-queue info */
if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
rx_ring->queue_index) < 0)
rx_ring->queue_index, 0) < 0)
goto err;

return 0;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -6577,7 +6577,7 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,

/* XDP RX-queue info */
if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
rx_ring->queue_index) < 0)
rx_ring->queue_index, rx_ring->q_vector->napi.napi_id) < 0)
goto err;

rx_ring->xdp_prog = adapter->xdp_prog;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -3493,7 +3493,7 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter,

/* XDP RX-queue info */
if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
rx_ring->queue_index) < 0)
rx_ring->queue_index, 0) < 0)
goto err;

rx_ring->xdp_prog = adapter->xdp_prog;
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/marvell/mvneta.c
Original file line number Diff line number Diff line change
Expand Up @@ -3243,7 +3243,7 @@ static int mvneta_create_page_pool(struct mvneta_port *pp,
return err;
}

err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id);
err = xdp_rxq_info_reg(&rxq->xdp_rxq, pp->dev, rxq->id, 0);
if (err < 0)
goto err_free_pp;

Expand Down
4 changes: 2 additions & 2 deletions drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -2614,11 +2614,11 @@ static int mvpp2_rxq_init(struct mvpp2_port *port,
mvpp2_rxq_status_update(port, rxq->id, 0, rxq->size);

if (priv->percpu_pools) {
err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->id);
err = xdp_rxq_info_reg(&rxq->xdp_rxq_short, port->dev, rxq->id, 0);
if (err < 0)
goto err_free_dma;

err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->id);
err = xdp_rxq_info_reg(&rxq->xdp_rxq_long, port->dev, rxq->id, 0);
if (err < 0)
goto err_unregister_rxq_short;

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx4/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
ring->log_stride = ffs(ring->stride) - 1;
ring->buf_size = ring->size * ring->stride + TXBB_SIZE;

if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index) < 0)
if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0)
goto err_ring;

tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
rq_xdp_ix = rq->ix;
if (xsk)
rq_xdp_ix += params->num_channels * MLX5E_RQ_GROUP_XSK;
err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix);
err = xdp_rxq_info_reg(&rq->xdp_rxq, rq->netdev, rq_xdp_ix, 0);
if (err < 0)
goto err_rq_xdp_prog;

Expand Down
Loading

0 comments on commit a1dd1d8

Please sign in to comment.