Skip to content

Commit

Permalink
net/mlx5: Use order-0 allocations for all WQ types
Browse files Browse the repository at this point in the history
Complete the transition of all WQ types to use fragmented
order-0 coherent memory instead of high-order allocations.

CQ-WQ already uses order-0.
Here we do the same for cyclic and linked-list WQs.

This allows the driver to load cleanly on systems with a highly
fragmented coherent memory.

Performance tests:
ConnectX-5 100Gbps, CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
Packet rate of 64B packets, single transmit ring, size 8K.

No degradation is sensed.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
  • Loading branch information
Tariq Toukan authored and Saeed Mahameed committed May 25, 2018
1 parent 549322f commit 3a2f703
Show file tree
Hide file tree
Showing 9 changed files with 123 additions and 94 deletions.
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/en.h
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ struct mlx5e_cq {

/* control */
struct mlx5_core_dev *mdev;
struct mlx5_frag_wq_ctrl wq_ctrl;
struct mlx5_wq_ctrl wq_ctrl;
} ____cacheline_aligned_in_smp;

struct mlx5e_tx_wqe_info {
Expand Down
15 changes: 8 additions & 7 deletions drivers/net/ethernet/mellanox/mlx5/core/en_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -646,8 +646,8 @@ static int mlx5e_create_rq(struct mlx5e_rq *rq,
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, rq->wq_ctrl.db.dma);

mlx5_fill_page_array(&rq->wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(wq, wq, pas));
mlx5_fill_page_frag_array(&rq->wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(wq, wq, pas));

err = mlx5_core_create_rq(mdev, in, inlen, &rq->rqn);

Expand Down Expand Up @@ -1096,7 +1096,8 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev,
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma);

mlx5_fill_page_array(&csp->wq_ctrl->buf, (__be64 *)MLX5_ADDR_OF(wq, wq, pas));
mlx5_fill_page_frag_array(&csp->wq_ctrl->buf,
(__be64 *)MLX5_ADDR_OF(wq, wq, pas));

err = mlx5_core_create_sq(mdev, in, inlen, sqn);

Expand Down Expand Up @@ -1538,7 +1539,7 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,

static void mlx5e_free_cq(struct mlx5e_cq *cq)
{
mlx5_cqwq_destroy(&cq->wq_ctrl);
mlx5_wq_destroy(&cq->wq_ctrl);
}

static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
Expand All @@ -1554,7 +1555,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
int err;

inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
sizeof(u64) * cq->wq_ctrl.frag_buf.npages;
sizeof(u64) * cq->wq_ctrl.buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
Expand All @@ -1563,15 +1564,15 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)

memcpy(cqc, param->cqc, sizeof(param->cqc));

mlx5_fill_page_frag_array(&cq->wq_ctrl.frag_buf,
mlx5_fill_page_frag_array(&cq->wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));

mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);

MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode);
MLX5_SET(cqc, cqc, c_eqn, eqn);
MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index);
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift -
MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);

Expand Down
17 changes: 9 additions & 8 deletions drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
Original file line number Diff line number Diff line change
Expand Up @@ -383,16 +383,16 @@ static inline u16 mlx5e_icosq_wrap_cnt(struct mlx5e_icosq *sq)
return sq->pc >> MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
}

static inline void mlx5e_fill_icosq_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
u16 pi)
static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
struct mlx5_wq_cyc *wq,
u16 pi, u16 frag_pi)
{
struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;

edge_wi = wi + nnops;

/* fill sq edge with nops to avoid wqe wrapping two pages */
/* fill sq frag edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
wi->opcode = MLX5_OPCODE_NOP;
mlx5e_post_nop(wq, sq->sqn, &sq->pc);
Expand All @@ -407,14 +407,15 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
struct mlx5_wq_cyc *wq = &sq->wq;
struct mlx5e_umr_wqe *umr_wqe;
u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
u16 pi;
u16 pi, frag_pi;
int err;
int i;

pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);

if (unlikely(pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_size(wq))) {
mlx5e_fill_icosq_edge(sq, wq, pi);
if (unlikely(frag_pi + MLX5E_UMR_WQEBBS > mlx5_wq_cyc_get_frag_size(wq))) {
mlx5e_fill_icosq_frag_edge(sq, wq, pi, frag_pi);
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
}

Expand Down
24 changes: 13 additions & 11 deletions drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -296,16 +296,16 @@ mlx5e_txwqe_build_dsegs(struct mlx5e_txqsq *sq, struct sk_buff *skb,
return -ENOMEM;
}

static inline void mlx5e_fill_sq_edge(struct mlx5e_txqsq *sq,
struct mlx5_wq_cyc *wq,
u16 pi)
static inline void mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq,
struct mlx5_wq_cyc *wq,
u16 pi, u16 frag_pi)
{
struct mlx5e_tx_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
u8 nnops = mlx5_wq_cyc_get_size(wq) - pi;
u8 nnops = mlx5_wq_cyc_get_frag_size(wq) - frag_pi;

edge_wi = wi + nnops;

/* fill sq edge with nops to avoid wqe wrap around */
/* fill sq frag edge with nops to avoid wqe wrapping two pages */
for (; wi < edge_wi; wi++) {
wi->skb = NULL;
wi->num_wqebbs = 1;
Expand Down Expand Up @@ -358,8 +358,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
unsigned char *skb_data = skb->data;
unsigned int skb_len = skb->len;
u16 ds_cnt, ds_cnt_inl = 0;
u16 headlen, ihs, frag_pi;
u8 num_wqebbs, opcode;
u16 headlen, ihs;
u32 num_bytes;
int num_dma;
__be16 mss;
Expand Down Expand Up @@ -395,8 +395,9 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}

num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
if (unlikely(pi + num_wqebbs > mlx5_wq_cyc_get_size(wq))) {
mlx5e_fill_sq_edge(sq, wq, pi);
frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
mlx5e_sq_fetch_wqe(sq, &wqe, &pi);
}

Expand Down Expand Up @@ -642,9 +643,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,

unsigned char *skb_data = skb->data;
unsigned int skb_len = skb->len;
u16 headlen, ihs, pi, frag_pi;
u16 ds_cnt, ds_cnt_inl = 0;
u8 num_wqebbs, opcode;
u16 headlen, ihs, pi;
u32 num_bytes;
int num_dma;
__be16 mss;
Expand Down Expand Up @@ -680,8 +681,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
}

num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
if (unlikely(pi + num_wqebbs > mlx5_wq_cyc_get_size(wq))) {
mlx5e_fill_sq_edge(sq, wq, pi);
frag_pi = mlx5_wq_cyc_ctr2fragix(wq, sq->pc);
if (unlikely(frag_pi + num_wqebbs > mlx5_wq_cyc_get_frag_size(wq))) {
mlx5e_fill_sq_frag_edge(sq, wq, pi, frag_pi);
mlx5i_sq_fetch_wqe(sq, &wqe, &pi);
}

Expand Down
14 changes: 7 additions & 7 deletions drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
}

inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
sizeof(u64) * conn->cq.wq_ctrl.frag_buf.npages;
sizeof(u64) * conn->cq.wq_ctrl.buf.npages;
in = kvzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
Expand All @@ -469,12 +469,12 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
MLX5_SET(cqc, cqc, log_cq_size, ilog2(cq_size));
MLX5_SET(cqc, cqc, c_eqn, eqn);
MLX5_SET(cqc, cqc, uar_page, fdev->conn_res.uar->index);
MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.frag_buf.page_shift -
MLX5_SET(cqc, cqc, log_page_size, conn->cq.wq_ctrl.buf.page_shift -
MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(cqc, cqc, dbr_addr, conn->cq.wq_ctrl.db.dma);

pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.frag_buf, pas);
mlx5_fill_page_frag_array(&conn->cq.wq_ctrl.buf, pas);

err = mlx5_core_create_cq(mdev, &conn->cq.mcq, in, inlen);
kvfree(in);
Expand All @@ -500,7 +500,7 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
goto out;

err_cqwq:
mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
mlx5_wq_destroy(&conn->cq.wq_ctrl);
out:
return err;
}
Expand All @@ -510,7 +510,7 @@ static void mlx5_fpga_conn_destroy_cq(struct mlx5_fpga_conn *conn)
tasklet_disable(&conn->cq.tasklet);
tasklet_kill(&conn->cq.tasklet);
mlx5_core_destroy_cq(conn->fdev->mdev, &conn->cq.mcq);
mlx5_cqwq_destroy(&conn->cq.wq_ctrl);
mlx5_wq_destroy(&conn->cq.wq_ctrl);
}

static int mlx5_fpga_conn_create_wq(struct mlx5_fpga_conn *conn, void *qpc)
Expand Down Expand Up @@ -591,8 +591,8 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);

mlx5_fill_page_array(&conn->qp.wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
(__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));

err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
if (err)
Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ struct mlx5_fpga_conn {
/* CQ */
struct {
struct mlx5_cqwq wq;
struct mlx5_frag_wq_ctrl wq_ctrl;
struct mlx5_wq_ctrl wq_ctrl;
struct mlx5_core_cq mcq;
struct tasklet_struct tasklet;
} cq;
Expand Down
Loading

0 comments on commit 3a2f703

Please sign in to comment.