Skip to content

Commit

Permalink
net/mlx5e: Support RX multi-packet WQE (Striding RQ)
Browse files Browse the repository at this point in the history
Introduce the feature of multi-packet WQE (RX Work Queue Element)
referred to as (MPWQE or Striding RQ), in which WQEs are larger
and serve multiple packets each.

Every WQE consists of many strides of the same size, every received
packet is aligned to a beginning of a stride and is written to
consecutive strides within a WQE.

In the regular approach, each regular WQE is big enough to be capable
of serving one received packet of any size up to MTU or 64K in case of
device LRO is enabled, making it very wasteful when dealing with
small packets or device LRO is enabled.

For its flexibility, MPWQE allows a better memory utilization
(implying improvements in CPU utilization and packet rate) as packets
consume strides according to their size, preserving the rest of
the WQE to be available for other packets.

MPWQE default configuration:
	Num of WQEs	= 16
	Strides Per WQE = 2048
	Stride Size	= 64 byte

The default WQEs memory footprint went from 1024*mtu (~1.5MB) to
16 * 2048 * 64 = 2MB per ring.
However, HW LRO can now be supported at no additional cost in memory
footprint, and hence we turn it on by default and get an even better
performance.

Performance tested on ConnectX4-Lx 50G.
To isolate the feature under test, the numbers below were measured with
HW LRO turned off. We verified that the performance just improves when
LRO is turned back on.

* Netperf single TCP stream:
- BW raised by 10-15% for representative packet sizes:
  default, 64B, 1024B, 1478B, 65536B.

* Netperf multi TCP stream:
- No degradation, line rate reached.

* Pktgen: packet rate raised by 2-10% for traffic of different message
sizes: 64B, 128B, 256B, 1024B, and 1500B.

* Pktgen: packet loss in bursts of small messages (64byte),
single stream:
- | num packets | packets loss before | packets loss after
  |     2K      |       ~ 1K          |       0
  |     8K      |       ~ 6K          |       0
  |     16K     |       ~13K          |       0
  |     32K     |       ~28K          |       0
  |     64K     |       ~57K          |     ~24K

As expected as the driver can receive as many small packets (<=64B) as
the number of total strides in the ring (default = 2048 * 16) vs. 1024
(default ring size regardless of packets size) before this feature.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Achiad Shochat <achiad@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Tariq Toukan authored and David S. Miller committed Apr 21, 2016
1 parent 2f48af1 commit 461017c
Show file tree
Hide file tree
Showing 5 changed files with 349 additions and 44 deletions.
77 changes: 74 additions & 3 deletions drivers/net/ethernet/mellanox/mlx5/core/en.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,30 @@
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE 0xa
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd

#define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1
#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4
#define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6

#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */
#define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */
#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES)
#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE)
#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\
MLX5_MPWRQ_LOG_STRIDE_SIZE)
#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
#define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MPWRQ_STRIDES_PER_PAGE (MLX5_MPWRQ_NUM_STRIDES >> \
MLX5_MPWRQ_WQE_PAGE_ORDER)
#define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD (128)

#define MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ (64 * 1024)
#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10
#define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_PKTS 0x20
#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_USEC 0x10
#define MLX5E_PARAMS_DEFAULT_TX_CQ_MODERATION_PKTS 0x20
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES 0x80
#define MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW 0x2

#define MLX5E_LOG_INDIR_RQT_SIZE 0x7
#define MLX5E_INDIR_RQT_SIZE BIT(MLX5E_LOG_INDIR_RQT_SIZE)
Expand All @@ -74,6 +92,38 @@
#define MLX5E_NUM_MAIN_GROUPS 9
#define MLX5E_NET_IP_ALIGN 2

static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size)
{
switch (wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES_MPW,
wq_size / 2);
default:
return min_t(u16, MLX5E_PARAMS_DEFAULT_MIN_RX_WQES,
wq_size / 2);
}
}

static inline int mlx5_min_log_rq_size(int wq_type)
{
switch (wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW;
default:
return MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE;
}
}

static inline int mlx5_max_log_rq_size(int wq_type)
{
switch (wq_type) {
case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW;
default:
return MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
}
}

struct mlx5e_tx_wqe {
struct mlx5_wqe_ctrl_seg ctrl;
struct mlx5_wqe_eth_seg eth;
Expand Down Expand Up @@ -128,6 +178,7 @@ static const char vport_strings[][ETH_GSTRING_LEN] = {
"tx_queue_wake",
"tx_queue_dropped",
"rx_wqe_err",
"rx_mpwqe_filler",
};

struct mlx5e_vport_stats {
Expand Down Expand Up @@ -169,8 +220,9 @@ struct mlx5e_vport_stats {
u64 tx_queue_wake;
u64 tx_queue_dropped;
u64 rx_wqe_err;
u64 rx_mpwqe_filler;

#define NUM_VPORT_COUNTERS 35
#define NUM_VPORT_COUNTERS 36
};

static const char pport_strings[][ETH_GSTRING_LEN] = {
Expand Down Expand Up @@ -263,7 +315,8 @@ static const char rq_stats_strings[][ETH_GSTRING_LEN] = {
"csum_sw",
"lro_packets",
"lro_bytes",
"wqe_err"
"wqe_err",
"mpwqe_filler",
};

struct mlx5e_rq_stats {
Expand All @@ -274,7 +327,8 @@ struct mlx5e_rq_stats {
u64 lro_packets;
u64 lro_bytes;
u64 wqe_err;
#define NUM_RQ_STATS 7
u64 mpwqe_filler;
#define NUM_RQ_STATS 8
};

static const char sq_stats_strings[][ETH_GSTRING_LEN] = {
Expand Down Expand Up @@ -318,6 +372,7 @@ struct mlx5e_stats {

struct mlx5e_params {
u8 log_sq_size;
u8 rq_wq_type;
u8 log_rq_size;
u16 num_channels;
u8 num_tc;
Expand Down Expand Up @@ -374,11 +429,23 @@ typedef void (*mlx5e_fp_handle_rx_cqe)(struct mlx5e_rq *rq,
typedef int (*mlx5e_fp_alloc_wqe)(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe,
u16 ix);

struct mlx5e_dma_info {
struct page *page;
dma_addr_t addr;
};

struct mlx5e_mpw_info {
struct mlx5e_dma_info dma_info;
u16 consumed_strides;
u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE];
};

struct mlx5e_rq {
/* data path */
struct mlx5_wq_ll wq;
u32 wqe_sz;
struct sk_buff **skb;
struct mlx5e_mpw_info *wqe_info;

struct device *pdev;
struct net_device *netdev;
Expand All @@ -393,6 +460,7 @@ struct mlx5e_rq {

/* control */
struct mlx5_wq_ctrl wq_ctrl;
u8 wq_type;
u32 rqn;
struct mlx5e_channel *channel;
struct mlx5e_priv *priv;
Expand Down Expand Up @@ -649,9 +717,12 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event);
int mlx5e_napi_poll(struct napi_struct *napi, int budget);
bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget);
int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget);

void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq);
int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix);
struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq);

void mlx5e_update_stats(struct mlx5e_priv *priv);
Expand Down
15 changes: 8 additions & 7 deletions drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
Original file line number Diff line number Diff line change
Expand Up @@ -273,8 +273,9 @@ static void mlx5e_get_ringparam(struct net_device *dev,
struct ethtool_ringparam *param)
{
struct mlx5e_priv *priv = netdev_priv(dev);
int rq_wq_type = priv->params.rq_wq_type;

param->rx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE;
param->rx_max_pending = 1 << mlx5_max_log_rq_size(rq_wq_type);
param->tx_max_pending = 1 << MLX5E_PARAMS_MAXIMUM_LOG_SQ_SIZE;
param->rx_pending = 1 << priv->params.log_rq_size;
param->tx_pending = 1 << priv->params.log_sq_size;
Expand All @@ -285,6 +286,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,
{
struct mlx5e_priv *priv = netdev_priv(dev);
bool was_opened;
int rq_wq_type = priv->params.rq_wq_type;
u16 min_rx_wqes;
u8 log_rq_size;
u8 log_sq_size;
Expand All @@ -300,16 +302,16 @@ static int mlx5e_set_ringparam(struct net_device *dev,
__func__);
return -EINVAL;
}
if (param->rx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE)) {
if (param->rx_pending < (1 << mlx5_min_log_rq_size(rq_wq_type))) {
netdev_info(dev, "%s: rx_pending (%d) < min (%d)\n",
__func__, param->rx_pending,
1 << MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE);
1 << mlx5_min_log_rq_size(rq_wq_type));
return -EINVAL;
}
if (param->rx_pending > (1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE)) {
if (param->rx_pending > (1 << mlx5_max_log_rq_size(rq_wq_type))) {
netdev_info(dev, "%s: rx_pending (%d) > max (%d)\n",
__func__, param->rx_pending,
1 << MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE);
1 << mlx5_max_log_rq_size(rq_wq_type));
return -EINVAL;
}
if (param->tx_pending < (1 << MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE)) {
Expand All @@ -327,8 +329,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,

log_rq_size = order_base_2(param->rx_pending);
log_sq_size = order_base_2(param->tx_pending);
min_rx_wqes = min_t(u16, param->rx_pending - 1,
MLX5E_PARAMS_DEFAULT_MIN_RX_WQES);
min_rx_wqes = mlx5_min_rx_wqes(rq_wq_type, param->rx_pending);

if (log_rq_size == priv->params.log_rq_size &&
log_sq_size == priv->params.log_sq_size &&
Expand Down
Loading

0 comments on commit 461017c

Please sign in to comment.