From 1c1b522808a18402f043c1418b4e48c7355480cc Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Nov 2016 17:59:37 +0200 Subject: [PATCH 1/7] net/mlx5e: Implement Fragmented Work Queue (WQ) Add new type of struct mlx5_frag_buf which is used to allocate fragmented buffers rather than contiguous, and make the Completion Queues (CQs) use it as they are big (default of 2MB per CQ in Striding RQ). This fixes the failures of type: "mlx5e_open_locked: mlx5e_open_channels failed, -12" due to dma_zalloc_coherent insufficient contiguous coherent memory to satisfy the driver's request when the user tries to setup more or larger rings. Signed-off-by: Tariq Toukan Reported-by: Sebastian Ott Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/alloc.c | 66 +++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- .../net/ethernet/mellanox/mlx5/core/en_main.c | 10 +-- drivers/net/ethernet/mellanox/mlx5/core/wq.c | 26 +++++--- drivers/net/ethernet/mellanox/mlx5/core/wq.h | 18 ++++- include/linux/mlx5/driver.h | 11 ++++ 6 files changed, 116 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c index 2c6e3c7b74179..44791de5afe6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c @@ -106,6 +106,63 @@ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf) } EXPORT_SYMBOL_GPL(mlx5_buf_free); +int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node) +{ + int i; + + buf->size = size; + buf->npages = 1 << get_order(size); + buf->page_shift = PAGE_SHIFT; + buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list), + GFP_KERNEL); + if (!buf->frags) + goto err_out; + + for (i = 0; i < buf->npages; i++) { + struct mlx5_buf_list *frag = &buf->frags[i]; + int frag_sz = min_t(int, size, PAGE_SIZE); + + frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz, + &frag->map, node); + if (!frag->buf) + goto err_free_buf; + if (frag->map & ((1 << buf->page_shift) - 1)) { + dma_free_coherent(&dev->pdev->dev, frag_sz, + buf->frags[i].buf, buf->frags[i].map); + mlx5_core_warn(dev, "unexpected map alignment: 0x%p, page_shift=%d\n", + (void *)frag->map, buf->page_shift); + goto err_free_buf; + } + size -= frag_sz; + } + + return 0; + +err_free_buf: + while (i--) + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf, + buf->frags[i].map); + kfree(buf->frags); +err_out: + return -ENOMEM; +} + +void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf) +{ + int size = buf->size; + int i; + + for (i = 0; i < buf->npages; i++) { + int frag_sz = min_t(int, size, PAGE_SIZE); + + dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf, + buf->frags[i].map); + size -= frag_sz; + } + kfree(buf->frags); +} + static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev, int node) { @@ -230,3 +287,12 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas) } } EXPORT_SYMBOL_GPL(mlx5_fill_page_array); + +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas) +{ + int i; + + for (i = 0; i < buf->npages; i++) + pas[i] = cpu_to_be64(buf->frags[i].map); +} +EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 442dbc3e6be4a..f16f7fbd20447 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -286,7 +286,7 @@ struct mlx5e_cq { u16 decmprs_wqe_counter; /* control */ - struct mlx5_wq_ctrl wq_ctrl; + struct mlx5_frag_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; struct mlx5e_rq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6b492ca17d7ee..ba25cd361bb22 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1201,7 +1201,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, static void mlx5e_destroy_cq(struct mlx5e_cq *cq) { - mlx5_wq_destroy(&cq->wq_ctrl); + mlx5_cqwq_destroy(&cq->wq_ctrl); } static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) @@ -1218,7 +1218,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) int err; inlen = MLX5_ST_SZ_BYTES(create_cq_in) + - sizeof(u64) * cq->wq_ctrl.buf.npages; + sizeof(u64) * cq->wq_ctrl.frag_buf.npages; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; @@ -1227,15 +1227,15 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) memcpy(cqc, param->cqc, sizeof(param->cqc)); - mlx5_fill_page_array(&cq->wq_ctrl.buf, - (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); + mlx5_fill_page_frag_array(&cq->wq_ctrl.frag_buf, + (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used); MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); - MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - + MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index 821a087c7ae22..921673c42bc98 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -101,13 +101,15 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, - struct mlx5_wq_ctrl *wq_ctrl) + struct mlx5_frag_wq_ctrl *wq_ctrl) { int err; - wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz); - wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size); - wq->sz_m1 = (1 << wq->log_sz) - 1; + wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz); + wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size); + wq->sz_m1 = (1 << wq->log_sz) - 1; + wq->log_frag_strides = PAGE_SHIFT - wq->log_stride; + wq->frag_sz_m1 = (1 << wq->log_frag_strides) - 1; err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { @@ -115,14 +117,16 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, return err; } - err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), - &wq_ctrl->buf, param->buf_numa_node); + err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), + &wq_ctrl->frag_buf, + param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n", + err); goto err_db_free; } - wq->buf = wq_ctrl->buf.direct.buf; + wq->frag_buf = wq_ctrl->frag_buf; wq->db = wq_ctrl->db.db; wq_ctrl->mdev = mdev; @@ -184,3 +188,9 @@ void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl) mlx5_buf_free(wq_ctrl->mdev, &wq_ctrl->buf); mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db); } + +void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl) +{ + mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->frag_buf); + mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 6c2a8f95093c6..d8afed898c31d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -47,6 +47,12 @@ struct mlx5_wq_ctrl { struct mlx5_db db; }; +struct mlx5_frag_wq_ctrl { + struct mlx5_core_dev *mdev; + struct mlx5_frag_buf frag_buf; + struct mlx5_db db; +}; + struct mlx5_wq_cyc { void *buf; __be32 *db; @@ -55,12 +61,14 @@ struct mlx5_wq_cyc { }; struct mlx5_cqwq { - void *buf; + struct mlx5_frag_buf frag_buf; __be32 *db; u32 sz_m1; + u32 frag_sz_m1; u32 cc; /* consumer counter */ u8 log_sz; u8 log_stride; + u8 log_frag_strides; }; struct mlx5_wq_ll { @@ -81,7 +89,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq); int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, void *cqc, struct mlx5_cqwq *wq, - struct mlx5_wq_ctrl *wq_ctrl); + struct mlx5_frag_wq_ctrl *wq_ctrl); u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq); int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, @@ -90,6 +98,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq); void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl); +void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl); static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr) { @@ -116,7 +125,10 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { - return wq->buf + (ix << wq->log_stride); + unsigned int frag = (ix >> wq->log_frag_strides); + + return wq->frag_buf.frags[frag].buf + + ((wq->frag_sz_m1 & ix) << wq->log_stride); } static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 68b85efc3908e..0ae55361e674b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -318,6 +318,13 @@ struct mlx5_buf { u8 page_shift; }; +struct mlx5_frag_buf { + struct mlx5_buf_list *frags; + int npages; + int size; + u8 page_shift; +}; + struct mlx5_eq_tasklet { struct list_head list; struct list_head process_list; @@ -822,6 +829,9 @@ int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf, int node); int mlx5_buf_alloc(struct mlx5_core_dev *dev, int size, struct mlx5_buf *buf); void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf); +int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size, + struct mlx5_frag_buf *buf, int node); +void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf); struct mlx5_cmd_mailbox *mlx5_alloc_cmd_mailbox_chain(struct mlx5_core_dev *dev, gfp_t flags, int npages); void mlx5_free_cmd_mailbox_chain(struct mlx5_core_dev *dev, @@ -866,6 +876,7 @@ void mlx5_unregister_debugfs(void); int mlx5_eq_init(struct mlx5_core_dev *dev); void mlx5_eq_cleanup(struct mlx5_core_dev *dev); void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas); +void mlx5_fill_page_frag_array(struct mlx5_frag_buf *frag_buf, __be64 *pas); void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING From 3608ae77c098dfe134103a9dec4c78687896708e Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Nov 2016 17:59:38 +0200 Subject: [PATCH 2/7] net/mlx5e: Move function mlx5e_create_umr_mkey In next patch we are going to create a UMR MKey per RQ, we need mlx5e_create_umr_mkey declared before mlx5e_create_rq. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_main.c | 74 +++++++++---------- 1 file changed, 37 insertions(+), 37 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index ba25cd361bb22..49ca30bf1b6f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -471,6 +471,43 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } +static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev), + BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; + int err; + + in = mlx5_vzalloc(inlen); + if (!in) + return -ENOMEM; + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages); + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT); + MLX5_SET(mkc, mkc, translations_octword_size, + MLX5_MTT_OCTW(npages)); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + + err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen); + + kvfree(in); + return err; +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -3625,43 +3662,6 @@ static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter); } -static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) -{ - struct mlx5_core_dev *mdev = priv->mdev; - u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev), - BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)); - int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); - void *mkc; - u32 *in; - int err; - - in = mlx5_vzalloc(inlen); - if (!in) - return -ENOMEM; - - mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - - npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages); - - MLX5_SET(mkc, mkc, free, 1); - MLX5_SET(mkc, mkc, umr_en, 1); - MLX5_SET(mkc, mkc, lw, 1); - MLX5_SET(mkc, mkc, lr, 1); - MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); - - MLX5_SET(mkc, mkc, qpn, 0xffffff); - MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); - MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT); - MLX5_SET(mkc, mkc, translations_octword_size, - MLX5_MTT_OCTW(npages)); - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); - - err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen); - - kvfree(in); - return err; -} - static void mlx5e_nic_init(struct mlx5_core_dev *mdev, struct net_device *netdev, const struct mlx5e_profile *profile, From ec8b9981ad3f3eeb5dcc4f237266e897c363f896 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Wed, 30 Nov 2016 17:59:39 +0200 Subject: [PATCH 3/7] net/mlx5e: Create UMR MKey per RQ In Striding RQ implementation, we used a single UMR (User-Mode Memory Registration) memory key for all RQs. When the product of RQs number*size gets high, we hit a limitation of u16 field size in FW. Here we move to using a UMR memory key per RQ, so we can scale to any number of rings, with the maximum buffer size in each. Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 12 ++--- .../ethernet/mellanox/mlx5/core/en_ethtool.c | 12 +---- .../net/ethernet/mellanox/mlx5/core/en_main.c | 53 ++++++++++--------- 3 files changed, 35 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index f16f7fbd20447..63dd6390b1615 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -77,9 +77,9 @@ MLX5_MPWRQ_WQE_PAGE_ORDER) #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) -#define MLX5E_REQUIRED_MTTS(rqs, wqes)\ - (rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) -#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX) +#define MLX5E_REQUIRED_MTTS(wqes) \ + (wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8)) +#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX) #define MLX5_UMR_ALIGN (2048) #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128) @@ -347,7 +347,6 @@ struct mlx5e_rq { struct { struct mlx5e_mpw_info *info; void *mtt_no_align; - u32 mtt_offset; } mpwqe; }; struct { @@ -382,6 +381,7 @@ struct mlx5e_rq { u32 rqn; struct mlx5e_channel *channel; struct mlx5e_priv *priv; + struct mlx5_core_mkey umr_mkey; } ____cacheline_aligned_in_smp; struct mlx5e_umr_dma_info { @@ -689,7 +689,6 @@ struct mlx5e_priv { unsigned long state; struct mutex state_lock; /* Protects Interface state */ - struct mlx5_core_mkey umr_mkey; struct mlx5e_rq drop_rq; struct mlx5e_channel **channel; @@ -838,8 +837,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) { - return rq->mpwqe.mtt_offset + - wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); + return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); } static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index aa963d74e14d3..352462af8d51a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -499,8 +499,7 @@ static int mlx5e_set_ringparam(struct net_device *dev, return -EINVAL; } - num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels, - rx_pending_wqes); + num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes); if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && !MLX5E_VALID_NUM_MTTS(num_mtts)) { netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n", @@ -565,7 +564,6 @@ static int mlx5e_set_channels(struct net_device *dev, unsigned int count = ch->combined_count; bool arfs_enabled; bool was_opened; - u32 num_mtts; int err = 0; if (!count) { @@ -584,14 +582,6 @@ static int mlx5e_set_channels(struct net_device *dev, return -EINVAL; } - num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size)); - if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ && - !MLX5E_VALID_NUM_MTTS(num_mtts)) { - netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n", - __func__, count); - return -EINVAL; - } - if (priv->params.num_channels == count) return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 49ca30bf1b6f1..84a4adb7bbb0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -471,24 +471,25 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) kfree(rq->mpwqe.info); } -static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) +static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv, + u64 npages, u8 page_shift, + struct mlx5_core_mkey *umr_mkey) { struct mlx5_core_dev *mdev = priv->mdev; - u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev), - BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)); int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); void *mkc; u32 *in; int err; + if (!MLX5E_VALID_NUM_MTTS(npages)) + return -EINVAL; + in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages); - MLX5_SET(mkc, mkc, free, 1); MLX5_SET(mkc, mkc, umr_en, 1); MLX5_SET(mkc, mkc, lw, 1); @@ -497,17 +498,25 @@ static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); - MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT); + MLX5_SET64(mkc, mkc, len, npages << page_shift); MLX5_SET(mkc, mkc, translations_octword_size, MLX5_MTT_OCTW(npages)); - MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + MLX5_SET(mkc, mkc, log_page_size, page_shift); - err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen); + err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen); kvfree(in); return err; } +static int mlx5e_create_rq_umr_mkey(struct mlx5e_rq *rq) +{ + struct mlx5e_priv *priv = rq->priv; + u64 num_mtts = MLX5E_REQUIRED_MTTS(BIT(priv->params.log_rq_size)); + + return mlx5e_create_umr_mkey(priv, num_mtts, PAGE_SHIFT, &rq->umr_mkey); +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -564,18 +573,20 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - rq->mpwqe.mtt_offset = c->ix * - MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); - rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; byte_count = rq->buff.wqe_sz; - rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); - err = mlx5e_rq_alloc_mpwqe_info(rq, c); + + err = mlx5e_create_rq_umr_mkey(rq); if (err) goto err_rq_wq_destroy; + rq->mkey_be = cpu_to_be32(rq->umr_mkey.key); + + err = mlx5e_rq_alloc_mpwqe_info(rq, c); + if (err) + goto err_destroy_umr_mkey; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), @@ -626,6 +637,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, return 0; +err_destroy_umr_mkey: + mlx5_core_destroy_mkey(mdev, &rq->umr_mkey); + err_rq_wq_destroy: if (rq->xdp_prog) bpf_prog_put(rq->xdp_prog); @@ -644,6 +658,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq) switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: mlx5e_rq_free_mpwqe_info(rq); + mlx5_core_destroy_mkey(rq->priv->mdev, &rq->umr_mkey); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ kfree(rq->dma_info); @@ -3868,15 +3883,9 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) profile = priv->profile; clear_bit(MLX5E_STATE_DESTROYING, &priv->state); - err = mlx5e_create_umr_mkey(priv); - if (err) { - mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto out; - } - err = profile->init_tx(priv); if (err) - goto err_destroy_umr_mkey; + goto out; err = mlx5e_open_drop_rq(priv); if (err) { @@ -3916,9 +3925,6 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) err_cleanup_tx: profile->cleanup_tx(priv); -err_destroy_umr_mkey: - mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); - out: return err; } @@ -3967,7 +3973,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) profile->cleanup_rx(priv); mlx5e_close_drop_rq(priv); profile->cleanup_tx(priv); - mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); cancel_delayed_work_sync(&priv->update_stats_work); } From 53636068d8d6a08ca99949e2094b2b04ececa2d8 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 30 Nov 2016 17:59:40 +0200 Subject: [PATCH 4/7] net/mlx5e: Remove redundant hashtable lookup in configure flower We will never find a flow with the same cookie as cls_flower always allocates a new flow and the cookie is the allocated memory address. Fixes: e3a2b7ed018e ("net/mlx5e: Support offload cls_flower with drop action") Signed-off-by: Roi Dayan Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 26 +++++-------------- 1 file changed, 7 insertions(+), 19 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4d06fab842d65..dd6d95473ade4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -915,25 +915,17 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; - struct mlx5_flow_handle *old = NULL; - struct mlx5_esw_flow_attr *old_attr = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; if (esw && esw->mode == SRIOV_OFFLOADS) fdb_flow = true; - flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, - tc->ht_params); - if (flow) { - old = flow->rule; - old_attr = flow->attr; - } else { - if (fdb_flow) - flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), - GFP_KERNEL); - else - flow = kzalloc(sizeof(*flow), GFP_KERNEL); - } + if (fdb_flow) + flow = kzalloc(sizeof(*flow) + + sizeof(struct mlx5_esw_flow_attr), + GFP_KERNEL); + else + flow = kzalloc(sizeof(*flow), GFP_KERNEL); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec || !flow) { @@ -970,17 +962,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err) goto err_del_rule; - if (old) - mlx5e_tc_del_flow(priv, old, old_attr); - goto out; err_del_rule: mlx5_del_flow_rules(flow->rule); err_free: - if (!old) - kfree(flow); + kfree(flow); out: kvfree(spec); return err; From 86a33ae1ca06a58629c4d0d80ac6d099ff932ae0 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 30 Nov 2016 17:59:41 +0200 Subject: [PATCH 5/7] net/mlx5e: Correct cleanup order when deleting offloaded TC rules According to the reverse unwinding principle, on delete time we should first handle deletion of the steering rule and later handle the vlan deletion from the eswitch. Fixes: 8b32580df1cb ("net/mlx5e: Add TC vlan action for SRIOV offloads") Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dd6d95473ade4..4d71445d4a91a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -151,11 +151,11 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, counter = mlx5_flow_rule_counter(rule); + mlx5_del_flow_rules(rule); + if (esw && esw->mode == SRIOV_OFFLOADS) mlx5_eswitch_del_vlan_action(esw, attr); - mlx5_del_flow_rules(rule); - mlx5_fc_destroy(priv->mdev, counter); if (!mlx5e_tc_num_filters(priv) && (priv->fs.tc.t)) { From 961e8979ec3578ef4c441b69a5b7b9febc89fd18 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 30 Nov 2016 17:59:42 +0200 Subject: [PATCH 6/7] net/mlx5e: Refactor tc del flow to accept mlx5e_tc_flow instance Change the function that deletes offloaded TC rule to get struct mlx5e_tc_flow instance which contains both the flow handle and flow attributes. This is a cleanup needed for downstream patches, it doesn't change any functionality. Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 4d71445d4a91a..3875c1cf52fd8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -143,18 +143,17 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_handle *rule, - struct mlx5_esw_flow_attr *attr) + struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; - counter = mlx5_flow_rule_counter(rule); + counter = mlx5_flow_rule_counter(flow->rule); - mlx5_del_flow_rules(rule); + mlx5_del_flow_rules(flow->rule); if (esw && esw->mode == SRIOV_OFFLOADS) - mlx5_eswitch_del_vlan_action(esw, attr); + mlx5_eswitch_del_vlan_action(esw, flow->attr); mlx5_fc_destroy(priv->mdev, counter); @@ -1005,7 +1004,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); - mlx5e_tc_del_flow(priv, flow->rule, flow->attr); + mlx5e_tc_del_flow(priv, flow); if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) mlx5e_detach_encap(priv, flow); @@ -1065,7 +1064,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) struct mlx5e_tc_flow *flow = ptr; struct mlx5e_priv *priv = arg; - mlx5e_tc_del_flow(priv, flow->rule, flow->attr); + mlx5e_tc_del_flow(priv, flow); kfree(flow); } From 5067b6020770ef7c8102f47079c9e577d175ef2c Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Wed, 30 Nov 2016 17:59:43 +0200 Subject: [PATCH 7/7] net/mlx5e: Remove flow encap entry in the correct place Handling flow encap entry should be inside tc del flow and is only relevant for offloaded eswitch TC rules. Fixes: 11a457e9b6c1 ("net/mlx5e: Add basic TC tunnel set action for SRIOV offloads") Signed-off-by: Roi Dayan Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_tc.c | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 3875c1cf52fd8..f07ef8c7da559 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -142,6 +142,24 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } +static void mlx5e_detach_encap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) { + struct list_head *next = flow->encap.next; + + list_del(&flow->encap); + if (list_empty(next)) { + struct mlx5_encap_entry *e; + + e = list_entry(next, struct mlx5_encap_entry, flows); + if (e->n) { + mlx5_encap_dealloc(priv->mdev, e->encap_id); + neigh_release(e->n); + } + hlist_del_rcu(&e->encap_hlist); + kfree(e); + } +} + static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow) { @@ -152,8 +170,11 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, mlx5_del_flow_rules(flow->rule); - if (esw && esw->mode == SRIOV_OFFLOADS) + if (esw && esw->mode == SRIOV_OFFLOADS) { mlx5_eswitch_del_vlan_action(esw, flow->attr); + if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) + mlx5e_detach_encap(priv, flow); + } mlx5_fc_destroy(priv->mdev, counter); @@ -973,24 +994,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, return err; } -static void mlx5e_detach_encap(struct mlx5e_priv *priv, - struct mlx5e_tc_flow *flow) { - struct list_head *next = flow->encap.next; - - list_del(&flow->encap); - if (list_empty(next)) { - struct mlx5_encap_entry *e; - - e = list_entry(next, struct mlx5_encap_entry, flows); - if (e->n) { - mlx5_encap_dealloc(priv->mdev, e->encap_id); - neigh_release(e->n); - } - hlist_del_rcu(&e->encap_hlist); - kfree(e); - } -} - int mlx5e_delete_flower(struct mlx5e_priv *priv, struct tc_cls_flower_offload *f) { @@ -1006,8 +1009,6 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, mlx5e_tc_del_flow(priv, flow); - if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) - mlx5e_detach_encap(priv, flow); kfree(flow);