From b45d8b50b8264cac7b2f1245ca04a2f009038ac7 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 13 Feb 2017 18:41:30 +0200 Subject: [PATCH 01/17] net/mlx5e: Reorganize struct mlx5e_rq Bring fast-path fields together, and combine RX WQE mutual exclusive fields into a union. Page-reuse and XDP are mutually exclusive and cannot be used at the same time. Use a union to combine their footprints. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 16 +++++++++------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++----- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 13 ++++++------- 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 6c2abeccfa5a3..d964db286c955 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -527,20 +527,24 @@ struct mlx5e_rq { struct { struct mlx5e_wqe_frag_info *frag_info; u32 frag_sz; /* max possible skb frag_sz */ - bool page_reuse; - bool xdp_xmit; + union { + bool page_reuse; + bool xdp_xmit; + }; } wqe; struct { struct mlx5e_mpw_info *info; void *mtt_no_align; + u16 stride_sz; + u16 num_strides; } mpwqe; }; struct { - u8 page_order; u32 wqe_sz; /* wqe data buffer size */ + u16 headroom; + u8 page_order; u8 map_dir; /* dma map direction */ } buff; - __be32 mkey_be; struct device *pdev; struct net_device *netdev; @@ -555,7 +559,6 @@ struct mlx5e_rq { unsigned long state; int ix; - u16 rx_headroom; struct mlx5e_rx_am am; /* Adaptive Moderation */ @@ -565,9 +568,8 @@ struct mlx5e_rq { /* control */ struct mlx5_wq_ctrl wq_ctrl; + __be32 mkey_be; u8 wq_type; - u32 mpwqe_stride_sz; - u32 mpwqe_num_strides; u32 rqn; struct mlx5e_channel *channel; struct mlx5_core_dev *mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 85841e24c65b5..94761d0e1b333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -593,7 +593,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, } rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE; - rq->rx_headroom = params->rq_headroom; + rq->buff.headroom = params->rq_headroom; switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: @@ -615,10 +615,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe_stride_sz = BIT(params->mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(params->mpwqe_log_num_strides); + rq->mpwqe.stride_sz = BIT(params->mpwqe_log_stride_sz); + rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); - rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + rq->buff.wqe_sz = rq->mpwqe.stride_sz * rq->mpwqe.num_strides; byte_count = rq->buff.wqe_sz; err = mlx5e_create_rq_umr_mkey(mdev, rq); @@ -665,7 +665,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, byte_count = rq->buff.wqe_sz; /* calc the required page order */ - rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->rx_headroom + byte_count); + rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + byte_count); npages = DIV_ROUND_UP(rq->wqe.frag_sz, PAGE_SIZE); rq->buff.page_order = order_base_2(npages); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index be8197a75a634..1b50f1e7e48a3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -263,8 +263,7 @@ int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) wi->offset = 0; } - wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + - rq->rx_headroom); + wqe->data.addr = cpu_to_be64(wi->di.addr + wi->offset + rq->buff.headroom); return 0; } @@ -296,7 +295,7 @@ void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) { - return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; + return rq->mpwqe.num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, @@ -305,7 +304,7 @@ static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, u32 page_idx, u32 frag_offset, u32 len) { - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); + unsigned int truesize = ALIGN(len, rq->mpwqe.stride_sz); dma_sync_single_for_cpu(rq->pdev, wi->umr.dma_info[page_idx].addr + frag_offset, @@ -776,9 +775,9 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt) { struct mlx5e_dma_info *di = &wi->di; + u16 rx_headroom = rq->buff.headroom; struct sk_buff *skb; void *va, *data; - u16 rx_headroom = rq->rx_headroom; bool consumed; u32 frag_size; @@ -911,7 +910,7 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb) { u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); - u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; + u32 wqe_offset = stride_ix * rq->mpwqe.stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; u32 head_page_idx = page_idx; @@ -979,7 +978,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: - if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) + if (likely(wi->consumed_strides < rq->mpwqe.num_strides)) return; mlx5e_free_rx_mpwqe(rq, wi); From 89e89f7a9fded02b81f4ee21e30c64892116cf34 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 2 Jul 2017 19:02:05 +0300 Subject: [PATCH 02/17] net/mlx5e: Replace multiplication by stride size with a shift In RX data-path, use shift operations instead of a regular multiplication by stride size, as it is a power of two. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index d964db286c955..44bd8df905caf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -535,8 +535,8 @@ struct mlx5e_rq { struct { struct mlx5e_mpw_info *info; void *mtt_no_align; - u16 stride_sz; u16 num_strides; + u8 log_stride_sz; } mpwqe; }; struct { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 94761d0e1b333..7a25d952c9221 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -615,10 +615,10 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->mpwqe.stride_sz = BIT(params->mpwqe_log_stride_sz); + rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz; rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); - rq->buff.wqe_sz = rq->mpwqe.stride_sz * rq->mpwqe.num_strides; + rq->buff.wqe_sz = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; byte_count = rq->buff.wqe_sz; err = mlx5e_create_rq_umr_mkey(mdev, rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 1b50f1e7e48a3..aa5cc15908599 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -304,7 +304,7 @@ static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, u32 page_idx, u32 frag_offset, u32 len) { - unsigned int truesize = ALIGN(len, rq->mpwqe.stride_sz); + unsigned int truesize = ALIGN(len, BIT(rq->mpwqe.log_stride_sz)); dma_sync_single_for_cpu(rq->pdev, wi->umr.dma_info[page_idx].addr + frag_offset, @@ -910,7 +910,7 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, struct sk_buff *skb) { u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); - u32 wqe_offset = stride_ix * rq->mpwqe.stride_sz; + u32 wqe_offset = stride_ix << rq->mpwqe.log_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; u32 head_page_idx = page_idx; From b681c481f11bd2839d759f493b00008a6c690e61 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 3 Jul 2017 11:51:17 +0300 Subject: [PATCH 03/17] net/mlx5e: Remove unnecessary wqe_sz field from RQ buffer Field is used only locally within the RQ create function. The use of a local variable is sufficient. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 8 +++----- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 44bd8df905caf..ce8b4f648757a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -540,7 +540,6 @@ struct mlx5e_rq { } mpwqe; }; struct { - u32 wqe_sz; /* wqe data buffer size */ u16 headroom; u8 page_order; u8 map_dir; /* dma map direction */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 7a25d952c9221..591e0dca96712 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -618,8 +618,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, rq->mpwqe.log_stride_sz = params->mpwqe_log_stride_sz; rq->mpwqe.num_strides = BIT(params->mpwqe_log_num_strides); - rq->buff.wqe_sz = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; - byte_count = rq->buff.wqe_sz; + byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; err = mlx5e_create_rq_umr_mkey(mdev, rq); if (err) @@ -654,15 +653,14 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, goto err_rq_wq_destroy; } - rq->buff.wqe_sz = params->lro_en ? + byte_count = params->lro_en ? params->lro_wqe_sz : MLX5E_SW2HW_MTU(c->priv, c->netdev->mtu); #ifdef CONFIG_MLX5_EN_IPSEC if (MLX5_IPSEC_DEV(mdev)) - rq->buff.wqe_sz += MLX5E_METADATA_ETHER_LEN; + byte_count += MLX5E_METADATA_ETHER_LEN; #endif rq->wqe.page_reuse = !params->xdp_prog && !params->lro_en; - byte_count = rq->buff.wqe_sz; /* calc the required page order */ rq->wqe.frag_sz = MLX5_SKB_FRAG_SZ(rq->buff.headroom + byte_count); From 9bafe2adabd16e74e28920956680b15bf529c2eb Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 25 Jun 2017 15:23:35 +0300 Subject: [PATCH 04/17] net/mlx5e: Use memset to init skbs_frags array to zeros In RX data-path, use memset() instead of loop assignment to init the whole skbs_frags array. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index aa5cc15908599..db372dcecbe02 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -391,9 +391,9 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, goto err_unmap; wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); page_ref_add(dma_info->page, pg_strides); - wi->skbs_frags[i] = 0; } + memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE); wi->consumed_strides = 0; wqe->data.addr = cpu_to_be64(dma_offset); From 4c2af5cc2bee32685883b55328d8b5ec80e1781f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 25 Jun 2017 16:28:46 +0300 Subject: [PATCH 05/17] net/mlx5e: Small enhancements for RX MPWQE allocation and free The dma offset of a MPWQE (Multi-Packet WQE) in memory region is fixed for all rounds. Calculate it once on creation time, instead of in runtime. This also obsoletes the wqe argument in the function. In addition, optimize dma_info iterator calculation. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++++ .../net/ethernet/mellanox/mlx5/core/en_rx.c | 18 ++++++------------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 591e0dca96712..5aa4681f7c3c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -674,6 +674,12 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, for (i = 0; i < wq_sz; i++) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) { + u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, i) << PAGE_SHIFT; + + wqe->data.addr = cpu_to_be64(dma_offset); + } + wqe->data.byte_count = cpu_to_be32(byte_count); wqe->data.lkey = rq->mkey_be; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index db372dcecbe02..fb3b83609aea2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -374,18 +374,15 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) } static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, u16 ix) { struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int err; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { err = mlx5e_page_alloc_mapped(rq, dma_info); if (unlikely(err)) goto err_unmap; @@ -395,14 +392,12 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, memset(wi->skbs_frags, 0, sizeof(*wi->skbs_frags) * MLX5_MPWRQ_PAGES_PER_WQE); wi->consumed_strides = 0; - wqe->data.addr = cpu_to_be64(dma_offset); return 0; err_unmap: while (--i >= 0) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + dma_info--; page_ref_sub(dma_info->page, pg_strides); mlx5e_page_release(rq, dma_info, true); } @@ -413,11 +408,10 @@ static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[0]; int i; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; - + for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++, dma_info++) { page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); mlx5e_page_release(rq, dma_info, true); } @@ -447,7 +441,7 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; - err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); + err = mlx5e_alloc_rx_umr_mpwqe(rq, ix); if (unlikely(err)) return err; set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); From 4cbb7558013a4f600eb1ca9ad34959d8912240e6 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 19 Jun 2017 18:36:30 +0300 Subject: [PATCH 06/17] net/mlx5e: NAPI busy-poll when UMR post is in progress If a UMR post is in progress, it means that there's a missing WQE in RQ, and that a completion will be shortly available in ICO SQ completion queue. Prefer busy-poll to handle it as soon as possible. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index fb3b83609aea2..8af6577b7501e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -456,17 +456,16 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) mlx5e_free_rx_mpwqe(rq, wi); } -#define RQ_CANNOT_POST(rq) \ - (!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \ - test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; - if (unlikely(RQ_CANNOT_POST(rq))) + if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; + if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + return true; + while (!mlx5_wq_ll_is_full(wq)) { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); int err; From 4b7dfc9925143eb4a55bbb97c033d6da03b29bff Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 19 Jun 2017 18:11:30 +0300 Subject: [PATCH 07/17] net/mlx5e: Early-return on empty completion queues NAPI context handles different kinds of completion queues (RX, TX, and others). Hence, upon a poll trial, some of them might be empty. Here we early-return upon empty completion queues, as well as full rx buffer, and save unnecessary logic and memory barriers. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 40 +++++++++++-------- .../net/ethernet/mellanox/mlx5/core/en_tx.c | 16 ++++---- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 8af6577b7501e..ab1213a3615ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -459,16 +459,19 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; + int err; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) return false; + if (mlx5_wq_ll_is_full(wq)) + return false; + if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) return true; - while (!mlx5_wq_ll_is_full(wq)) { + do { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - int err; err = rq->alloc_wqe(rq, wqe, wq->head); if (err == -EBUSY) @@ -479,14 +482,14 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - } + } while (!mlx5_wq_ll_is_full(wq)); /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); mlx5_wq_ll_update_db_record(wq); - return !mlx5_wq_ll_is_full(wq); + return !!err; } static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, @@ -981,7 +984,8 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - struct mlx5e_xdpsq *xdpsq = &rq->xdpsq; + struct mlx5e_xdpsq *xdpsq; + struct mlx5_cqe64 *cqe; int work_done = 0; if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) @@ -990,12 +994,13 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) if (cq->decmprs_left) work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); - for (; work_done < budget; work_done++) { - struct mlx5_cqe64 *cqe = mlx5_cqwq_get_cqe(&cq->wq); + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return 0; - if (!cqe) - break; + xdpsq = &rq->xdpsq; + do { if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { work_done += mlx5e_decompress_cqes_start(rq, cq, @@ -1006,7 +1011,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) mlx5_cqwq_pop(&cq->wq); rq->handle_rx_cqe(rq, cqe); - } + } while ((++work_done < budget) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); if (xdpsq->db.doorbell) { mlx5e_xmit_xdp_doorbell(xdpsq); @@ -1024,6 +1029,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) { struct mlx5e_xdpsq *sq; + struct mlx5_cqe64 *cqe; struct mlx5e_rq *rq; u16 sqcc; int i; @@ -1033,6 +1039,10 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + rq = container_of(sq, struct mlx5e_rq, xdpsq); /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), @@ -1040,15 +1050,11 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) */ sqcc = sq->cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -1066,7 +1072,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) /* Recycle RX page */ mlx5e_page_release(rq, di, true); } while (!last_wqe); - } + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 31353e5c3c783..80d2121643ee6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -394,6 +394,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev) bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) { struct mlx5e_txqsq *sq; + struct mlx5_cqe64 *cqe; u32 dma_fifo_cc; u32 nbytes; u16 npkts; @@ -405,6 +406,10 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) return false; + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (!cqe) + return false; + npkts = 0; nbytes = 0; @@ -416,15 +421,11 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) /* avoid dirtying sq cache line every cqe */ dma_fifo_cc = sq->dma_fifo_cc; - for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { - struct mlx5_cqe64 *cqe; + i = 0; + do { u16 wqe_counter; bool last_wqe; - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (!cqe) - break; - mlx5_cqwq_pop(&cq->wq); wqe_counter = be16_to_cpu(cqe->wqe_counter); @@ -467,7 +468,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sqcc += wi->num_wqebbs; napi_consume_skb(skb, napi_budget); } while (!last_wqe); - } + + } while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq))); mlx5_cqwq_update_db_record(&cq->wq); From 604acb193b8383a3774673c324e576e4c6b4ffcd Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 5 Jun 2017 11:17:20 +0300 Subject: [PATCH 08/17] net/mlx5e: Refactor data-path lro header function Refactor function mlx5e_lro_update_hdr() to reduce number of branches. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 45 +++++++++---------- include/linux/mlx5/device.h | 2 +- 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ab1213a3615ef..9d9c13ae6b834 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -496,56 +496,51 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); - struct iphdr *ipv4; - struct ipv6hdr *ipv6; struct tcphdr *tcp; int network_depth = 0; __be16 proto; u16 tot_len; + void *ip_p; u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); - int tcp_ack = ((l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || - (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA)); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); skb->mac_len = ETH_HLEN; proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); - ipv4 = (struct iphdr *)(skb->data + network_depth); - ipv6 = (struct ipv6hdr *)(skb->data + network_depth); tot_len = cqe_bcnt - network_depth; + ip_p = skb->data + network_depth; if (proto == htons(ETH_P_IP)) { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct iphdr)); - ipv6 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - } else { - tcp = (struct tcphdr *)(skb->data + network_depth + - sizeof(struct ipv6hdr)); - ipv4 = NULL; - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; - } - - if (get_cqe_lro_tcppsh(cqe)) - tcp->psh = 1; + struct iphdr *ipv4 = ip_p; - if (tcp_ack) { - tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; - } + tcp = ip_p + sizeof(struct iphdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; - if (ipv4) { ipv4->ttl = cqe->lro_min_ttl; ipv4->tot_len = cpu_to_be16(tot_len); ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, ipv4->ihl); } else { + struct ipv6hdr *ipv6 = ip_p; + + tcp = ip_p + sizeof(struct ipv6hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + ipv6->hop_limit = cqe->lro_min_ttl; ipv6->payload_len = cpu_to_be16(tot_len - sizeof(struct ipv6hdr)); } + + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } } static inline void mlx5e_skb_set_hash(struct mlx5_cqe64 *cqe, diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 3c7442b564604..7031d655ec32c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -709,7 +709,7 @@ static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) return (cqe->op_own >> 2) & 0x3; } -static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) +static inline u8 get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; } From a1eaba4c5c29b6b5196b2237ce3a8d7d97622b2f Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 3 Jul 2017 11:27:20 +0300 Subject: [PATCH 09/17] net/mlx5e: Non-atomic indicator for ring enabled state Rings enabled state change occurs in control path only, and is always followed by a napi_sychronize(), so that following NAPIs read the new value. This read does not need to be atomic. The RQ auto-moderation bit is not set/cleared in data-path. No need for atomic read, a regular read operation is sufficient. In RQ creation time as well, there's no multiple threads trying to access it yet, hence a regular read can be used. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 8 ++++---- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 4 ++-- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index ce8b4f648757a..0c4f1f30085aa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -295,6 +295,8 @@ enum { MLX5E_RQ_STATE_AM, }; +#define MLX5E_TEST_BIT(state, nr) (state & BIT(nr)) + struct mlx5e_cq { /* data path - accessed per cqe */ struct mlx5_cqwq wq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 5aa4681f7c3c2..411fb68794bcd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -929,7 +929,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, goto err_destroy_rq; if (params->rx_am_enabled) - set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); + c->rq.state |= BIT(MLX5E_RQ_STATE_AM); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 9d9c13ae6b834..a5522c3992a25 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -424,7 +424,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) { + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) { mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } @@ -461,7 +461,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) struct mlx5_wq_ll *wq = &rq->wq; int err; - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return false; if (mlx5_wq_ll_is_full(wq)) @@ -983,7 +983,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) struct mlx5_cqe64 *cqe; int work_done = 0; - if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) return 0; if (cq->decmprs_left) @@ -1031,7 +1031,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq) sq = container_of(cq, struct mlx5e_xdpsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) return false; cqe = mlx5_cqwq_get_cqe(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 80d2121643ee6..fee43e40fa16a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -403,7 +403,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) sq = container_of(cq, struct mlx5e_txqsq, cq); - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) return false; cqe = mlx5_cqwq_get_cqe(&cq->wq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 92db28a9ed43a..7311b937e4345 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -69,7 +69,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) struct mlx5_cqe64 *cqe; u16 sqcc; - if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state))) + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) return; cqe = mlx5_cqwq_get_cqe(&cq->wq); @@ -129,7 +129,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); - if (test_bit(MLX5E_RQ_STATE_AM, &c->rq.state)) + if (MLX5E_TEST_BIT(c->rq.state, MLX5E_RQ_STATE_AM)) mlx5e_rx_am(&c->rq); mlx5e_cq_arm(&c->rq.cq); From a071cb9f25bef32ec309ca9d123022c52175341b Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 3 Jul 2017 10:18:19 +0300 Subject: [PATCH 10/17] net/mlx5e: Non-atomic RQ state indicator for UMR WQE in progress The indication for a UMR WQE in progress is needed only within the NAPI context, and hence no races possible and no need for the use of atomic operations. The only place the flag is read outside of NAPI context is in closure flow, after RQ is disabled flag is no more accessed in NAPI. Use a boolean instead of a bit in ring state, so that its non-atomic set operations do not race with the atomic sets of the other bits. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 10 +++++----- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0c4f1f30085aa..bce2080eb86a1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -291,7 +291,6 @@ struct mlx5e_tstamp { enum { MLX5E_RQ_STATE_ENABLED, - MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, MLX5E_RQ_STATE_AM, }; @@ -539,6 +538,7 @@ struct mlx5e_rq { void *mtt_no_align; u16 num_strides; u8 log_stride_sz; + bool umr_in_progress; } mpwqe; }; struct { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 411fb68794bcd..2767a3ee81bc4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -886,7 +886,8 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) u16 wqe_ix; /* UMR WQE (if in progress) is always at wq->head */ - if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) + if (rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && + rq->mpwqe.umr_in_progress) mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index a5522c3992a25..11dba99400290 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -422,7 +422,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + rq->mpwqe.umr_in_progress = false; if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) { mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); @@ -441,10 +441,13 @@ int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; + if (rq->mpwqe.umr_in_progress) + return -EBUSY; + err = mlx5e_alloc_rx_umr_mpwqe(rq, ix); if (unlikely(err)) return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + rq->mpwqe.umr_in_progress = true; mlx5e_post_umr_wqe(rq, ix); return -EBUSY; } @@ -467,9 +470,6 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) if (mlx5_wq_ll_is_full(wq)) return false; - if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - return true; - do { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); From 7cc6d77bb56de3a428ed7cde91a09fffbdbef794 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 17 Jul 2017 12:27:26 +0300 Subject: [PATCH 11/17] net/mlx5e: Type-specific optimizations for RX post WQEs function Separate the RX post WQEs function of the different RQ types. This enables RQ type-specific optimizations in data-path. Poll the ICOSQ completion queue only for Striding RQ, and only when a UMR post completion could be possibly available. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 10 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 4 +- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 101 +++++++++++++++--- .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 64 +---------- 4 files changed, 92 insertions(+), 87 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index bce2080eb86a1..8d29a6eb94061 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -517,7 +517,7 @@ struct mlx5e_page_cache { struct mlx5e_rq; typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq*, struct mlx5_cqe64*); -typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq*, struct mlx5e_rx_wqe*, u16); +typedef bool (*mlx5e_fp_post_rx_wqes)(struct mlx5e_rq *rq); typedef void (*mlx5e_fp_dealloc_wqe)(struct mlx5e_rq*, u16); struct mlx5e_rq { @@ -547,6 +547,7 @@ struct mlx5e_rq { u8 map_dir; /* dma map direction */ } buff; + struct mlx5e_channel *channel; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; @@ -555,7 +556,7 @@ struct mlx5e_rq { struct mlx5e_page_cache page_cache; mlx5e_fp_handle_rx_cqe handle_rx_cqe; - mlx5e_fp_alloc_wqe alloc_wqe; + mlx5e_fp_post_rx_wqes post_wqes; mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; @@ -572,7 +573,6 @@ struct mlx5e_rq { __be32 mkey_be; u8 wq_type; u32 rqn; - struct mlx5e_channel *channel; struct mlx5_core_dev *mdev; struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; @@ -853,11 +853,9 @@ void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); void mlx5e_rx_am(struct mlx5e_rq *rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2767a3ee81bc4..162ba6ab749a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -598,7 +598,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->post_wqes = mlx5e_post_rx_mpwqes; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; rq->handle_rx_cqe = c->priv->profile->rx_handlers.handle_rx_cqe_mpwqe; @@ -637,7 +637,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, err = -ENOMEM; goto err_rq_wq_destroy; } - rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->post_wqes = mlx5e_post_rx_wqes; rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; #ifdef CONFIG_MLX5_EN_IPSEC diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 11dba99400290..b236dfd71c182 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -252,7 +252,7 @@ static inline bool mlx5e_page_reuse(struct mlx5e_rq *rq, !mlx5e_page_is_reserved(wi->di.page); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct mlx5e_wqe_frag_info *wi = &rq->wqe.frag_info[ix]; @@ -417,18 +417,13 @@ void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) } } -void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) +static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); rq->mpwqe.umr_in_progress = false; - if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) { - mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); - return; - } - mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); /* ensure wqes are visible to device before updating doorbell record */ @@ -437,19 +432,18 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { int err; - if (rq->mpwqe.umr_in_progress) - return -EBUSY; - err = mlx5e_alloc_rx_umr_mpwqe(rq, ix); - if (unlikely(err)) + if (unlikely(err)) { + rq->stats.buff_alloc_err++; return err; + } rq->mpwqe.umr_in_progress = true; mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; + return 0; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) @@ -473,9 +467,7 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) do { struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); - err = rq->alloc_wqe(rq, wqe, wq->head); - if (err == -EBUSY) - return true; + err = mlx5e_alloc_rx_wqe(rq, wqe, wq->head); if (unlikely(err)) { rq->stats.buff_alloc_err++; break; @@ -492,6 +484,83 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) return !!err; } +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5e_rq *rq, + struct mlx5_cqe64 *cqe, + u16 *sqcc) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + + mlx5_cqwq_pop(&cq->wq); + *sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + return; + } + + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); + return; + } + + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); +} + +static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) +{ + struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); + struct mlx5_cqe64 *cqe; + u16 sqcc; + + if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) + return; + + cqe = mlx5_cqwq_get_cqe(&cq->wq); + if (likely(!cqe)) + return; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe, &sqcc); + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; +} + +bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) +{ + struct mlx5_wq_ll *wq = &rq->wq; + + if (unlikely(!MLX5E_TEST_BIT(rq->state, MLX5E_RQ_STATE_ENABLED))) + return false; + + mlx5e_poll_ico_cq(&rq->channel->icosq.cq, rq); + + if (mlx5_wq_ll_is_full(wq)) + return false; + + if (!rq->mpwqe.umr_in_progress) + mlx5e_alloc_rx_mpwqe(rq, wq->head); + + return true; +} + static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 7311b937e4345..439ba1f2ffbc3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -32,66 +32,6 @@ #include "en.h" -static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, - struct mlx5e_icosq *sq, - struct mlx5_cqe64 *cqe, - u16 *sqcc) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - struct mlx5e_rq *rq = &sq->channel->rq; - - prefetch(rq); - mlx5_cqwq_pop(&cq->wq); - *sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - return; - } - - if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { - mlx5e_post_rx_mpwqe(rq); - return; - } - - if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); -} - -static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) -{ - struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); - struct mlx5_cqe64 *cqe; - u16 sqcc; - - if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) - return; - - cqe = mlx5_cqwq_get_cqe(&cq->wq); - if (likely(!cqe)) - return; - - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - - /* by design, there's only a single cqe */ - mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); - - mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; -} - int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -111,9 +51,7 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; - mlx5e_poll_ico_cq(&c->icosq.cq); - - busy |= mlx5e_post_rx_wqes(&c->rq); + busy |= c->rq.post_wqes(&c->rq); if (busy) return budget; From 3b56f7b2af86033f421d0f0fd10cf5c3f74872e0 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Mon, 17 Jul 2017 14:31:39 +0300 Subject: [PATCH 12/17] net/mlx5e: Remove unnecessary fields in ICO SQ As of current design, in each NAPI, only a single UMR WQE completion could be available in the completion queue of the the internal control operations (ICO) send queue, in addition to nop operations that require no actions upon completion. This renders the consume index obsolete, as the wqe_counter field in CQE is sufficient. This helps removing a memory barrier, and obsoletes the need for tracking the num_wqebbs to update the consumer counter. In addition, remove other unused fields in icosq struct: pdev, dma_fifo_pc, and prev_cc. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 7 ------- .../net/ethernet/mellanox/mlx5/core/en_main.c | 2 -- .../net/ethernet/mellanox/mlx5/core/en_rx.c | 19 ++----------------- 3 files changed, 2 insertions(+), 26 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 8d29a6eb94061..e55d9439bc12b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -343,7 +343,6 @@ enum { struct mlx5e_sq_wqe_info { u8 opcode; - u8 num_wqebbs; }; struct mlx5e_txqsq { @@ -419,13 +418,8 @@ struct mlx5e_xdpsq { struct mlx5e_icosq { /* data path */ - /* dirtied @completion */ - u16 cc; - /* dirtied @xmit */ u16 pc ____cacheline_aligned_in_smp; - u32 dma_fifo_pc; - u16 prev_cc; struct mlx5e_cq cq; @@ -439,7 +433,6 @@ struct mlx5e_icosq { void __iomem *uar_map; u32 sqn; u16 edge; - struct device *pdev; __be32 mkey_be; unsigned long state; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 162ba6ab749a6..a4c9a0a2c4089 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -950,7 +950,6 @@ static void mlx5e_activate_rq(struct mlx5e_rq *rq) set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; nopwqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc); mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &nopwqe->ctrl); } @@ -1052,7 +1051,6 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, struct mlx5_core_dev *mdev = c->mdev; int err; - sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; sq->uar_map = mdev->mlx5e_res.bfreg.map; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b236dfd71c182..88a8749c67d64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -357,7 +357,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; - sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_post_nop(wq, sq->sqn, &sq->pc); } @@ -368,7 +367,6 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) MLX5_OPCODE_UMR); sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; - sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl); } @@ -487,15 +485,13 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, struct mlx5e_icosq *sq, struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 *sqcc) + struct mlx5_cqe64 *cqe) { struct mlx5_wq_cyc *wq = &sq->wq; u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); - *sqcc += icowi->num_wqebbs; if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", @@ -518,7 +514,6 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); struct mlx5_cqe64 *cqe; - u16 sqcc; if (unlikely(!MLX5E_TEST_BIT(sq->state, MLX5E_SQ_STATE_ENABLED))) return; @@ -527,20 +522,10 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq, struct mlx5e_rq *rq) if (likely(!cqe)) return; - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), - * otherwise a cq overrun may occur - */ - sqcc = sq->cc; - /* by design, there's only a single cqe */ - mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe, &sqcc); + mlx5e_poll_ico_single_cqe(cq, sq, rq, cqe); mlx5_cqwq_update_db_record(&cq->wq); - - /* ensure cq space is freed before enabling more cqes */ - wmb(); - - sq->cc = sqcc; } bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) From 70871f1ec4a7e2d52683a53a5ee596790080a2c3 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 13 Jul 2017 18:26:40 +0300 Subject: [PATCH 13/17] net/mlx5e: Don't recycle page if moved to far NUMA Avoid recycling an RX page if it moved to another NUMA node. Add an ethtool counter to count such events. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 6 ++++-- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 4 ++++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a4c9a0a2c4089..2da2ea222aaa8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -208,6 +208,7 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_cache_full += rq_stats->cache_full; s->rx_cache_empty += rq_stats->cache_empty; s->rx_cache_busy += rq_stats->cache_busy; + s->rx_cache_waive += rq_stats->cache_waive; for (j = 0; j < priv->channels.params.num_tc; j++) { sq_stats = &c->sq[j].stats; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 88a8749c67d64..f1dd638384d38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -163,7 +163,7 @@ static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, static inline bool mlx5e_page_is_reserved(struct page *page) { - return page_is_pfmemalloc(page) || page_to_nid(page) != numa_node_id(); + return page_is_pfmemalloc(page) || page_to_nid(page) != numa_mem_id(); } static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, @@ -177,8 +177,10 @@ static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, return false; } - if (unlikely(page_is_pfmemalloc(dma_info->page))) + if (unlikely(mlx5e_page_is_reserved(dma_info->page))) { + rq->stats.cache_waive++; return false; + } cache->page_cache[cache->tail] = *dma_info; cache->tail = tail_next; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 6761796e803cd..6d199ffb1c0b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -84,6 +84,7 @@ struct mlx5e_sw_stats { u64 rx_cache_full; u64 rx_cache_empty; u64 rx_cache_busy; + u64 rx_cache_waive; /* Special handling counters */ u64 link_down_events_phy; @@ -123,6 +124,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_waive) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -354,6 +356,7 @@ struct mlx5e_rq_stats { u64 cache_full; u64 cache_empty; u64 cache_busy; + u64 cache_waive; }; static const struct counter_desc rq_stats_desc[] = { @@ -377,6 +380,7 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_waive) }, }; struct mlx5e_sq_stats { From 29c2849e0d204bdc892842e2aae744d2219f07f2 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 2 Jul 2017 17:33:59 +0300 Subject: [PATCH 14/17] net/mlx5e: Slightly increase RX page-cache size In XDP_TX flow, we now get back quicker to each page in page-cache, and on some occasions refcount does not get back to 1 on time, causing some costly page allocations. Slightly increase the size of RX page-cache to significantly decrease the chances for this to happen. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e55d9439bc12b..9c0c07ffe5575 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -501,7 +501,7 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ */ #define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) -#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +#define MLX5E_CACHE_SIZE (4 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) struct mlx5e_page_cache { u32 head; u32 tail; From 7b33aaeaae1acfec01cbb0cf41fa8c07aea55609 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 2 Jul 2017 12:56:12 +0300 Subject: [PATCH 15/17] net/mlx5e: Use kernel's mechanism to avoid missing NAPIs We used a channel state bit MLX5E_CHANNEL_NAPI_SCHED to make sure no NAPI is missed when a channel's napi_schedule() is called for completion events of the different channel's resources/rings while NAPI is currently running. Now, as similar mechanism is implemented in kernel, ("39e6c8208d7b net: solve a NAPI race"), we obsolete our own implementation and rely on the return value of napi_complete_done(). This patch removes a redundant overhead of atomic bit operations. Signed-off-by: Tariq Toukan Cc: Eric Dumazet Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 5 ----- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 1 - drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 10 +--------- 3 files changed, 1 insertion(+), 15 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 9c0c07ffe5575..7c046ae8b18ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -570,10 +570,6 @@ struct mlx5e_rq { struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; -enum channel_flags { - MLX5E_CHANNEL_NAPI_SCHED = 1, -}; - struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; @@ -585,7 +581,6 @@ struct mlx5e_channel { struct net_device *netdev; __be32 mkey_be; u8 num_tc; - unsigned long flags; /* control */ struct mlx5e_priv *priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2da2ea222aaa8..b2f689ec0d729 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3697,7 +3697,6 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state); /* napi_schedule in case we have missed anything */ - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); napi_schedule(&c->napi); if (old_prog) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 439ba1f2ffbc3..92d9aa1cddd64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -40,8 +40,6 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) int work_done; int i; - clear_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); - for (i = 0; i < c->num_tc; i++) busy |= mlx5e_poll_tx_cq(&c->sq[i].cq, budget); @@ -56,13 +54,8 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) if (busy) return budget; - napi_complete_done(napi, work_done); - - /* avoid losing completion event during/after polling cqs */ - if (test_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags)) { - napi_schedule(napi); + if (unlikely(!napi_complete_done(napi, work_done))) return work_done; - } for (i = 0; i < c->num_tc; i++) mlx5e_cq_arm(&c->sq[i].cq); @@ -81,7 +74,6 @@ void mlx5e_completion_event(struct mlx5_core_cq *mcq) struct mlx5e_cq *cq = container_of(mcq, struct mlx5e_cq, mcq); cq->event_ctr++; - set_bit(MLX5E_CHANNEL_NAPI_SCHED, &cq->channel->flags); napi_schedule(cq->napi); } From a8c2eb15797a0f0bf17734d365da7bdc3e263155 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Sun, 2 Jul 2017 13:17:42 +0300 Subject: [PATCH 16/17] net/mlx5e: Stop NAPI when irq balancer changes affinity NAPI context keeps rescheduling on same CPU as long as it's busy. This doesn't give the oppurtunity for changes in irq affinities to take effect. Fix that by calling napi_complete_done() upon a change in affinity. This would stop the NAPI and reschedule it on the new CPU. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +++ .../net/ethernet/mellanox/mlx5/core/en_main.c | 5 +++++ .../net/ethernet/mellanox/mlx5/core/en_txrx.c | 20 +++++++++++++++++-- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 7c046ae8b18ee..1388a1e2f835f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -582,6 +582,9 @@ struct mlx5e_channel { __be32 mkey_be; u8 num_tc; + /* data path - accessed per napi poll */ + struct irq_desc *irq_desc; + /* control */ struct mlx5e_priv *priv; struct mlx5_core_dev *mdev; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index b2f689ec0d729..20f34131d4e55 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1761,7 +1761,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct net_device *netdev = priv->netdev; int cpu = mlx5e_get_cpu(priv, ix); struct mlx5e_channel *c; + unsigned int irq; int err; + int eqn; c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu)); if (!c) @@ -1778,6 +1780,9 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->num_tc = params->num_tc; c->xdp = !!params->xdp_prog; + mlx5_vector2eqn(priv->mdev, ix, &eqn, &irq); + c->irq_desc = irq_to_desc(irq); + netif_napi_add(netdev, &c->napi, mlx5e_napi_poll, 64); err = mlx5e_open_cq(c, icocq_moder, &cparam->icosq_cq, &c->icosq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 92d9aa1cddd64..e906b754415c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -30,8 +30,20 @@ * SOFTWARE. */ +#include #include "en.h" +static inline bool mlx5e_channel_no_affinity_change(struct mlx5e_channel *c) +{ + int current_cpu = smp_processor_id(); + const struct cpumask *aff; + struct irq_data *idata; + + idata = irq_desc_get_irq_data(c->irq_desc); + aff = irq_data_get_affinity_mask(idata); + return cpumask_test_cpu(current_cpu, aff); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -51,8 +63,12 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) busy |= c->rq.post_wqes(&c->rq); - if (busy) - return budget; + if (busy) { + if (likely(mlx5e_channel_no_affinity_change(c))) + return budget; + if (work_done == budget) + work_done--; + } if (unlikely(!napi_complete_done(napi, work_done))) return work_done; From d4b6c48800dda97f5a0824305d7c8175a127d414 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 7 Jun 2017 13:55:34 +0300 Subject: [PATCH 17/17] net/mlx5e: Distribute RSS table among all RX rings In default, uniformly distribute the RSS indirection table entries among all RX rings, rather than restricting this only to the rings on the close NUMA node. irqbalancer would anyway dynamically override the default affinities set to the RX rings. This gives better multi-stream performance and CPU util. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 3 +-- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +-- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 15 ++------------- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 1388a1e2f835f..8b7d83bcc11a5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -925,8 +925,7 @@ void mlx5e_switch_priv_channels(struct mlx5e_priv *priv, void mlx5e_activate_priv_channels(struct mlx5e_priv *priv); void mlx5e_deactivate_priv_channels(struct mlx5e_priv *priv); -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels); int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 6127e0d2f310c..d12e9fc0d76b8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -663,8 +663,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, new_channels.params = priv->channels.params; new_channels.params.num_channels = count; if (!netif_is_rxfh_configured(priv->netdev)) - mlx5e_build_default_indir_rqt(priv->mdev, - new_channels.params.indirection_rqt, + mlx5e_build_default_indir_rqt(new_channels.params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, count); if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 20f34131d4e55..77068609a153d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3833,22 +3833,11 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev) 2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/; } -void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, - u32 *indirection_rqt, int len, +void mlx5e_build_default_indir_rqt(u32 *indirection_rqt, int len, int num_channels) { - int node = mdev->priv.numa_node; - int node_num_of_cores; int i; - if (node == -1) - node = first_online_node; - - node_num_of_cores = cpumask_weight(cpumask_of_node(node)); - - if (node_num_of_cores) - num_channels = min_t(int, num_channels, node_num_of_cores); - for (i = 0; i < len; i++) indirection_rqt[i] = i % num_channels; } @@ -3987,7 +3976,7 @@ void mlx5e_build_nic_params(struct mlx5_core_dev *mdev, /* RSS */ params->rss_hfunc = ETH_RSS_HASH_XOR; netdev_rss_key_fill(params->toeplitz_hash_key, sizeof(params->toeplitz_hash_key)); - mlx5e_build_default_indir_rqt(mdev, params->indirection_rqt, + mlx5e_build_default_indir_rqt(params->indirection_rqt, MLX5E_INDIR_RQT_SIZE, max_channels); }