Skip to content

Commit

Permalink
net/mlx4_en: using non collapsed CQ on TX
Browse files Browse the repository at this point in the history
Moving to regular Completion Queue implementation (not collapsed)
Completion for each transmitted packet is written to new entry.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Yevgeny Petrilin authored and David S. Miller committed Nov 27, 2011
1 parent 0d9fdaa commit f0ab34f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 38 deletions.
7 changes: 2 additions & 5 deletions drivers/net/ethernet/mellanox/mlx4/en_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
int err;

cq->size = entries;
if (mode == RX)
cq->buf_size = cq->size * sizeof(struct mlx4_cqe);
else
cq->buf_size = sizeof(struct mlx4_cqe);
cq->buf_size = cq->size * sizeof(struct mlx4_cqe);

cq->ring = ring;
cq->is_tx = mode;
Expand Down Expand Up @@ -120,7 +117,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
cq->size = priv->rx_ring[cq->ring].actual_size;

err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt, &mdev->priv_uar,
cq->wqres.db.dma, &cq->mcq, cq->vector, cq->is_tx);
cq->wqres.db.dma, &cq->mcq, cq->vector, 0);
if (err)
return err;

Expand Down
67 changes: 34 additions & 33 deletions drivers/net/ethernet/mellanox/mlx4/en_tx.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,59 +307,60 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
return cnt;
}


static void mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq)
{
struct mlx4_en_priv *priv = netdev_priv(dev);
struct mlx4_cq *mcq = &cq->mcq;
struct mlx4_en_tx_ring *ring = &priv->tx_ring[cq->ring];
struct mlx4_cqe *cqe = cq->buf;
struct mlx4_cqe *cqe;
u16 index;
u16 new_index;
u16 new_index, ring_index;
u32 txbbs_skipped = 0;
u32 cq_last_sav;

/* index always points to the first TXBB of the last polled descriptor */
index = ring->cons & ring->size_mask;
new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
if (index == new_index)
return;
u32 cons_index = mcq->cons_index;
int size = cq->size;
u32 size_mask = ring->size_mask;
struct mlx4_cqe *buf = cq->buf;

if (!priv->port_up)
return;

/*
* We use a two-stage loop:
* - the first samples the HW-updated CQE
* - the second frees TXBBs until the last sample
* This lets us amortize CQE cache misses, while still polling the CQ
* until is quiescent.
*/
cq_last_sav = mcq->cons_index;
do {
index = cons_index & size_mask;
cqe = &buf[index];
ring_index = ring->cons & size_mask;

/* Process all completed CQEs */
while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
cons_index & size)) {
/*
* make sure we read the CQE after we read the
* ownership bit
*/
rmb();

/* Skip over last polled CQE */
new_index = be16_to_cpu(cqe->wqe_index) & size_mask;

do {
/* Skip over last polled CQE */
index = (index + ring->last_nr_txbb) & ring->size_mask;
txbbs_skipped += ring->last_nr_txbb;

/* Poll next CQE */
ring_index = (ring_index + ring->last_nr_txbb) & size_mask;
/* free next descriptor */
ring->last_nr_txbb = mlx4_en_free_tx_desc(
priv, ring, index,
!!((ring->cons + txbbs_skipped) &
ring->size));
++mcq->cons_index;

} while (index != new_index);
priv, ring, ring_index,
!!((ring->cons + txbbs_skipped) &
ring->size));
} while (ring_index != new_index);

++cons_index;
index = cons_index & size_mask;
cqe = &buf[index];
}

new_index = be16_to_cpu(cqe->wqe_index) & ring->size_mask;
} while (index != new_index);
AVG_PERF_COUNTER(priv->pstats.tx_coal_avg,
(u32) (mcq->cons_index - cq_last_sav));

/*
* To prevent CQ overflow we first update CQ consumer and only then
* the ring consumer.
*/
mcq->cons_index = cons_index;
mlx4_cq_set_ci(mcq);
wmb();
ring->cons += txbbs_skipped;
Expand Down

0 comments on commit f0ab34f

Please sign in to comment.