Skip to content

Commit

Permalink
RDMA/mlx5: Set lag tx affinity according to slave
Browse files Browse the repository at this point in the history
The patch sets the lag tx affinity of the data QPs and the GSI QPs
according to the LAG xmit slave.

For GSI QPs, in case the link layer is Ethenet (RoCE) we create two GSI
QPs, one for each physical port. When the driver selects the GSI QP, it
will consider the port affinity result.  For connected QPs, the driver
sets the affinity of the xmit slave.

The above, ensures that RC QP and it's corresponding GSI QP will transmit
from the same physical port.

Link: https://lore.kernel.org/r/20200430192146.12863-17-maorg@mellanox.com
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
  • Loading branch information
Maor Gottlieb authored and Jason Gunthorpe committed May 2, 2020
1 parent 5163b27 commit cfc1a89
Show file tree
Hide file tree
Showing 7 changed files with 76 additions and 27 deletions.
9 changes: 7 additions & 2 deletions drivers/infiniband/hw/mlx5/ah.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@
#include "mlx5_ib.h"

static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
struct rdma_ah_attr *ah_attr)
struct rdma_ah_init_attr *init_attr)
{
struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
enum ib_gid_type gid_type;

if (rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH) {
Expand All @@ -51,6 +52,10 @@ static void create_ib_ah(struct mlx5_ib_dev *dev, struct mlx5_ib_ah *ah,
ah->av.stat_rate_sl = (rdma_ah_get_static_rate(ah_attr) << 4);

if (ah_attr->type == RDMA_AH_ATTR_TYPE_ROCE) {
if (init_attr->xmit_slave)
ah->xmit_port =
mlx5_lag_get_slave_port(dev->mdev,
init_attr->xmit_slave);
gid_type = ah_attr->grh.sgid_attr->gid_type;

memcpy(ah->av.rmac, ah_attr->roce.dmac,
Expand Down Expand Up @@ -98,7 +103,7 @@ int mlx5_ib_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr,
return err;
}

create_ib_ah(dev, ah, ah_attr);
create_ib_ah(dev, ah, init_attr);
return 0;
}

Expand Down
33 changes: 26 additions & 7 deletions drivers/infiniband/hw/mlx5/gsi.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,10 +119,17 @@ struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
struct mlx5_ib_gsi_qp *gsi;
struct ib_qp_init_attr hw_init_attr = *init_attr;
const u8 port_num = init_attr->port_num;
const int num_pkeys = pd->device->attrs.max_pkeys;
const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
int num_qps = 0;
int ret;

if (mlx5_ib_deth_sqpn_cap(dev)) {
if (MLX5_CAP_GEN(dev->mdev,
port_type) == MLX5_CAP_PORT_TYPE_IB)
num_qps = pd->device->attrs.max_pkeys;
else if (dev->lag_active)
num_qps = MLX5_MAX_PORTS;
}

gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
if (!gsi)
return ERR_PTR(-ENOMEM);
Expand Down Expand Up @@ -261,7 +268,7 @@ static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
}

static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
u16 qp_index)
u16 pkey_index)
{
struct mlx5_ib_dev *dev = to_mdev(qp->device);
struct ib_qp_attr attr;
Expand All @@ -270,7 +277,7 @@ static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,

mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
attr.qp_state = IB_QPS_INIT;
attr.pkey_index = qp_index;
attr.pkey_index = pkey_index;
attr.qkey = IB_QP1_QKEY;
attr.port_num = gsi->port_num;
ret = ib_modify_qp(qp, &attr, mask);
Expand Down Expand Up @@ -304,12 +311,17 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
{
struct ib_device *device = gsi->rx_qp->device;
struct mlx5_ib_dev *dev = to_mdev(device);
int pkey_index = qp_index;
struct mlx5_ib_qp *mqp;
struct ib_qp *qp;
unsigned long flags;
u16 pkey;
int ret;

ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
if (MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_IB)
pkey_index = 0;

ret = ib_query_pkey(device, gsi->port_num, pkey_index, &pkey);
if (ret) {
mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
gsi->port_num, qp_index);
Expand Down Expand Up @@ -338,7 +350,10 @@ static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
return;
}

ret = modify_to_rts(gsi, qp, qp_index);
mqp = to_mqp(qp);
if (dev->lag_active)
mqp->gsi_lag_port = qp_index + 1;
ret = modify_to_rts(gsi, qp, pkey_index);
if (ret)
goto err_destroy_qp;

Expand Down Expand Up @@ -457,11 +472,15 @@ static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
{
struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
struct mlx5_ib_ah *ah = to_mah(wr->ah);
int qp_index = wr->pkey_index;

if (!mlx5_ib_deth_sqpn_cap(dev))
if (!gsi->num_qps)
return gsi->rx_qp;

if (dev->lag_active && ah->xmit_port)
qp_index = ah->xmit_port - 1;

if (qp_index >= gsi->num_qps)
return NULL;

Expand Down
2 changes: 2 additions & 0 deletions drivers/infiniband/hw/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
#include <linux/list.h>
#include <rdma/ib_smi.h>
#include <rdma/ib_umem.h>
#include <rdma/lag.h>
#include <linux/in.h>
#include <linux/etherdevice.h>
#include "mlx5_ib.h"
Expand Down Expand Up @@ -6567,6 +6568,7 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
dev->ib_dev.phys_port_cnt = dev->num_ports;
dev->ib_dev.num_comp_vectors = mlx5_comp_vectors_count(mdev);
dev->ib_dev.dev.parent = mdev->device;
dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES;

mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,7 @@ struct mlx5_ib_qp {
* but not take effective
*/
u32 counter_pending;
u16 gsi_lag_port;
};

struct mlx5_ib_cq_buf {
Expand Down
52 changes: 35 additions & 17 deletions drivers/infiniband/hw/mlx5/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3218,28 +3218,33 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_PRI_PORT,
MLX5_QP_OPTPAR_PRI_PORT |
MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_PRI_PORT,
MLX5_QP_OPTPAR_PRI_PORT |
MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY |
MLX5_QP_OPTPAR_PRI_PORT,
[MLX5_QP_ST_XRC] = MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_PRI_PORT,
MLX5_QP_OPTPAR_PRI_PORT |
MLX5_QP_OPTPAR_LAG_TX_AFF,
},
[MLX5_QP_STATE_RTR] = {
[MLX5_QP_ST_RC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX,
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UC] = MLX5_QP_OPTPAR_ALT_ADDR_PATH |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX,
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_LAG_TX_AFF,
[MLX5_QP_ST_UD] = MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_Q_KEY,
[MLX5_QP_ST_MLX] = MLX5_QP_OPTPAR_PKEY_INDEX |
Expand All @@ -3248,7 +3253,8 @@ static enum mlx5_qp_optpar opt_mask[MLX5_QP_NUM_STATE][MLX5_QP_NUM_STATE][MLX5_Q
MLX5_QP_OPTPAR_RRE |
MLX5_QP_OPTPAR_RAE |
MLX5_QP_OPTPAR_RWE |
MLX5_QP_OPTPAR_PKEY_INDEX,
MLX5_QP_OPTPAR_PKEY_INDEX |
MLX5_QP_OPTPAR_LAG_TX_AFF,
},
},
[MLX5_QP_STATE_RTR] = {
Expand Down Expand Up @@ -3601,11 +3607,8 @@ static unsigned int get_tx_affinity_rr(struct mlx5_ib_dev *dev,

static bool qp_supports_affinity(struct ib_qp *qp)
{
struct mlx5_ib_qp *mqp = to_mqp(qp);

if ((qp->qp_type == IB_QPT_RC) ||
(qp->qp_type == IB_QPT_UD &&
!(mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)) ||
(qp->qp_type == IB_QPT_UD) ||
(qp->qp_type == IB_QPT_UC) ||
(qp->qp_type == IB_QPT_RAW_PACKET) ||
(qp->qp_type == IB_QPT_XRC_INI) ||
Expand All @@ -3614,7 +3617,9 @@ static bool qp_supports_affinity(struct ib_qp *qp)
return false;
}

static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init,
static unsigned int get_tx_affinity(struct ib_qp *qp,
const struct ib_qp_attr *attr,
int attr_mask, u8 init,
struct ib_udata *udata)
{
struct mlx5_ib_ucontext *ucontext = rdma_udata_to_drv_context(
Expand All @@ -3624,10 +3629,18 @@ static unsigned int get_tx_affinity(struct ib_qp *qp, u8 init,
struct mlx5_ib_qp_base *qp_base;
unsigned int tx_affinity;

if (!(dev->lag_active && init && qp_supports_affinity(qp)))
if (!(dev->lag_active && qp_supports_affinity(qp)))
return 0;

tx_affinity = get_tx_affinity_rr(dev, udata);
if (mqp->flags & MLX5_IB_QP_CREATE_SQPN_QP1)
tx_affinity = mqp->gsi_lag_port;
else if (init)
tx_affinity = get_tx_affinity_rr(dev, udata);
else if ((attr_mask & IB_QP_AV) && attr->xmit_slave)
tx_affinity =
mlx5_lag_get_slave_port(dev->mdev, attr->xmit_slave);
else
return 0;

qp_base = &mqp->trans_qp.base;
if (ucontext)
Expand Down Expand Up @@ -3712,7 +3725,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
struct mlx5_qp_context *context;
struct mlx5_ib_pd *pd;
enum mlx5_qp_state mlx5_cur, mlx5_new;
enum mlx5_qp_optpar optpar;
enum mlx5_qp_optpar optpar = 0;
u32 set_id = 0;
int mlx5_st;
int err;
Expand Down Expand Up @@ -3746,10 +3759,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}
}

tx_affinity = get_tx_affinity(ibqp,
tx_affinity = get_tx_affinity(ibqp, attr, attr_mask,
cur_state == IB_QPS_RESET &&
new_state == IB_QPS_INIT, udata);
context->flags |= cpu_to_be32(tx_affinity << 24);
if (tx_affinity) {
context->flags |= cpu_to_be32(tx_affinity << 24);
if (new_state == IB_QPS_RTR &&
MLX5_CAP_GEN(dev->mdev, init2_lag_tx_port_affinity))
optpar |= MLX5_QP_OPTPAR_LAG_TX_AFF;
}

if (is_sqp(ibqp->qp_type)) {
context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
Expand Down Expand Up @@ -3886,7 +3904,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
}

op = optab[mlx5_cur][mlx5_new];
optpar = ib_mask_to_mlx5_opt(attr_mask);
optpar |= ib_mask_to_mlx5_opt(attr_mask);
optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];

if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET ||
Expand Down
4 changes: 3 additions & 1 deletion include/linux/mlx5/mlx5_ifc.h
Original file line number Diff line number Diff line change
Expand Up @@ -1321,7 +1321,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 stat_rate_support[0x10];
u8 reserved_at_1f0[0x1];
u8 pci_sync_for_fw_update_event[0x1];
u8 reserved_at_1f2[0xa];
u8 reserved_at_1f2[0x6];
u8 init2_lag_tx_port_affinity[0x1];
u8 reserved_at_1fa[0x3];
u8 cqe_version[0x4];

u8 compact_address_vector[0x1];
Expand Down
2 changes: 2 additions & 0 deletions include/linux/mlx5/qp.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ enum mlx5_qp_optpar {
MLX5_QP_OPTPAR_RETRY_COUNT = 1 << 12,
MLX5_QP_OPTPAR_RNR_RETRY = 1 << 13,
MLX5_QP_OPTPAR_ACK_TIMEOUT = 1 << 14,
MLX5_QP_OPTPAR_LAG_TX_AFF = 1 << 15,
MLX5_QP_OPTPAR_PRI_PORT = 1 << 16,
MLX5_QP_OPTPAR_SRQN = 1 << 18,
MLX5_QP_OPTPAR_CQN_RCV = 1 << 19,
Expand Down Expand Up @@ -321,6 +322,7 @@ struct mlx5_av {
struct mlx5_ib_ah {
struct ib_ah ibah;
struct mlx5_av av;
u8 xmit_port;
};

static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah)
Expand Down

0 comments on commit cfc1a89

Please sign in to comment.