Skip to content

Commit

Permalink
Delay mlx5_ib internal resources allocations
Browse files Browse the repository at this point in the history
From: Leon Romanovsky <leonro@nvidia.com>

Internal mlx5_ib resources are created during mlx5_ib module load. This
behavior is not optimal because it consumes resources that are not
needed when SFs are created. This patch series delays the creation of
mlx5_ib internal resources to the stage when they actually used.

Signed-off-by: Leon Romanovsky <leon@kernel.org>
  • Loading branch information
Leon Romanovsky committed Jun 16, 2024
2 parents ef55135 + d98995b commit ae6f6dd
Show file tree
Hide file tree
Showing 8 changed files with 451 additions and 234 deletions.
19 changes: 3 additions & 16 deletions drivers/infiniband/hw/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1810,7 +1810,7 @@ static int set_ucontext_resp(struct ib_ucontext *uctx,
}

resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
if (dev->wc_support)
if (mlx5_wc_support_get(dev->mdev))
resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev,
log_bf_reg_size);
resp->cache_line_size = cache_line_size();
Expand Down Expand Up @@ -2337,7 +2337,7 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm
switch (command) {
case MLX5_IB_MMAP_WC_PAGE:
case MLX5_IB_MMAP_ALLOC_WC:
if (!dev->wc_support)
if (!mlx5_wc_support_get(dev->mdev))
return -EPERM;
fallthrough;
case MLX5_IB_MMAP_NC_PAGE:
Expand Down Expand Up @@ -3612,7 +3612,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_UAR_OBJ_ALLOC)(
alloc_type != MLX5_IB_UAPI_UAR_ALLOC_TYPE_NC)
return -EOPNOTSUPP;

if (!to_mdev(c->ibucontext.device)->wc_support &&
if (!mlx5_wc_support_get(to_mdev(c->ibucontext.device)->mdev) &&
alloc_type == MLX5_IB_UAPI_UAR_ALLOC_TYPE_BF)
return -EOPNOTSUPP;

Expand Down Expand Up @@ -3766,18 +3766,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev)
return err;
}

static int mlx5_ib_enable_driver(struct ib_device *dev)
{
struct mlx5_ib_dev *mdev = to_mdev(dev);
int ret;

ret = mlx5_ib_test_wc(mdev);
mlx5_ib_dbg(mdev, "Write-Combining %s",
mdev->wc_support ? "supported" : "not supported");

return ret;
}

static const struct ib_device_ops mlx5_ib_dev_ops = {
.owner = THIS_MODULE,
.driver_id = RDMA_DRIVER_MLX5,
Expand Down Expand Up @@ -3808,7 +3796,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = {
.drain_rq = mlx5_ib_drain_rq,
.drain_sq = mlx5_ib_drain_sq,
.device_group = &mlx5_attr_group,
.enable_driver = mlx5_ib_enable_driver,
.get_dev_fw_str = get_dev_fw_str,
.get_dma_mr = mlx5_ib_get_dma_mr,
.get_link_layer = mlx5_ib_port_link_layer,
Expand Down
198 changes: 0 additions & 198 deletions drivers/infiniband/hw/mlx5/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,8 @@
* SOFTWARE.
*/

#include <linux/io.h>
#include <rdma/ib_umem_odp.h>
#include "mlx5_ib.h"
#include <linux/jiffies.h>

/*
* Fill in a physical address list. ib_umem_num_dma_blocks() entries will be
Expand Down Expand Up @@ -95,199 +93,3 @@ unsigned long __mlx5_umem_find_best_quantized_pgoff(
return 0;
return page_size;
}

#define WR_ID_BF 0xBF
#define WR_ID_END 0xBAD
#define TEST_WC_NUM_WQES 255
#define TEST_WC_POLLING_MAX_TIME_JIFFIES msecs_to_jiffies(100)
static int post_send_nop(struct mlx5_ib_dev *dev, struct ib_qp *ibqp, u64 wr_id,
bool signaled)
{
struct mlx5_ib_qp *qp = to_mqp(ibqp);
struct mlx5_wqe_ctrl_seg *ctrl;
struct mlx5_bf *bf = &qp->bf;
__be32 mmio_wqe[16] = {};
unsigned long flags;
unsigned int idx;

if (unlikely(dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR))
return -EIO;

spin_lock_irqsave(&qp->sq.lock, flags);

idx = qp->sq.cur_post & (qp->sq.wqe_cnt - 1);
ctrl = mlx5_frag_buf_get_wqe(&qp->sq.fbc, idx);

memset(ctrl, 0, sizeof(struct mlx5_wqe_ctrl_seg));
ctrl->fm_ce_se = signaled ? MLX5_WQE_CTRL_CQ_UPDATE : 0;
ctrl->opmod_idx_opcode =
cpu_to_be32(((u32)(qp->sq.cur_post) << 8) | MLX5_OPCODE_NOP);
ctrl->qpn_ds = cpu_to_be32((sizeof(struct mlx5_wqe_ctrl_seg) / 16) |
(qp->trans_qp.base.mqp.qpn << 8));

qp->sq.wrid[idx] = wr_id;
qp->sq.w_list[idx].opcode = MLX5_OPCODE_NOP;
qp->sq.wqe_head[idx] = qp->sq.head + 1;
qp->sq.cur_post += DIV_ROUND_UP(sizeof(struct mlx5_wqe_ctrl_seg),
MLX5_SEND_WQE_BB);
qp->sq.w_list[idx].next = qp->sq.cur_post;
qp->sq.head++;

memcpy(mmio_wqe, ctrl, sizeof(*ctrl));
((struct mlx5_wqe_ctrl_seg *)&mmio_wqe)->fm_ce_se |=
MLX5_WQE_CTRL_CQ_UPDATE;

/* Make sure that descriptors are written before
* updating doorbell record and ringing the doorbell
*/
wmb();

qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);

/* Make sure doorbell record is visible to the HCA before
* we hit doorbell
*/
wmb();
__iowrite64_copy(bf->bfreg->map + bf->offset, mmio_wqe,
sizeof(mmio_wqe) / 8);

bf->offset ^= bf->buf_size;

spin_unlock_irqrestore(&qp->sq.lock, flags);

return 0;
}

static int test_wc_poll_cq_result(struct mlx5_ib_dev *dev, struct ib_cq *cq)
{
int ret;
struct ib_wc wc = {};
unsigned long end = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES;

do {
ret = ib_poll_cq(cq, 1, &wc);
if (ret < 0 || wc.status)
return ret < 0 ? ret : -EINVAL;
if (ret)
break;
} while (!time_after(jiffies, end));

if (!ret)
return -ETIMEDOUT;

if (wc.wr_id != WR_ID_BF)
ret = 0;

return ret;
}

static int test_wc_do_send(struct mlx5_ib_dev *dev, struct ib_qp *qp)
{
int err, i;

for (i = 0; i < TEST_WC_NUM_WQES; i++) {
err = post_send_nop(dev, qp, WR_ID_BF, false);
if (err)
return err;
}

return post_send_nop(dev, qp, WR_ID_END, true);
}

int mlx5_ib_test_wc(struct mlx5_ib_dev *dev)
{
struct ib_cq_init_attr cq_attr = { .cqe = TEST_WC_NUM_WQES + 1 };
int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
struct ib_qp_init_attr qp_init_attr = {
.cap = { .max_send_wr = TEST_WC_NUM_WQES },
.qp_type = IB_QPT_UD,
.sq_sig_type = IB_SIGNAL_REQ_WR,
.create_flags = MLX5_IB_QP_CREATE_WC_TEST,
};
struct ib_qp_attr qp_attr = { .port_num = 1 };
struct ib_device *ibdev = &dev->ib_dev;
struct ib_qp *qp;
struct ib_cq *cq;
struct ib_pd *pd;
int ret;

if (!MLX5_CAP_GEN(dev->mdev, bf))
return 0;

if (!dev->mdev->roce.roce_en &&
port_type_cap == MLX5_CAP_PORT_TYPE_ETH) {
if (mlx5_core_is_pf(dev->mdev))
dev->wc_support = arch_can_pci_mmap_wc();
return 0;
}

ret = mlx5_alloc_bfreg(dev->mdev, &dev->wc_bfreg, true, false);
if (ret)
goto print_err;

if (!dev->wc_bfreg.wc)
goto out1;

pd = ib_alloc_pd(ibdev, 0);
if (IS_ERR(pd)) {
ret = PTR_ERR(pd);
goto out1;
}

cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_attr);
if (IS_ERR(cq)) {
ret = PTR_ERR(cq);
goto out2;
}

qp_init_attr.recv_cq = cq;
qp_init_attr.send_cq = cq;
qp = ib_create_qp(pd, &qp_init_attr);
if (IS_ERR(qp)) {
ret = PTR_ERR(qp);
goto out3;
}

qp_attr.qp_state = IB_QPS_INIT;
ret = ib_modify_qp(qp, &qp_attr,
IB_QP_STATE | IB_QP_PORT | IB_QP_PKEY_INDEX |
IB_QP_QKEY);
if (ret)
goto out4;

qp_attr.qp_state = IB_QPS_RTR;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
if (ret)
goto out4;

qp_attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
if (ret)
goto out4;

ret = test_wc_do_send(dev, qp);
if (ret < 0)
goto out4;

ret = test_wc_poll_cq_result(dev, cq);
if (ret > 0) {
dev->wc_support = true;
ret = 0;
}

out4:
ib_destroy_qp(qp);
out3:
ib_destroy_cq(cq);
out2:
ib_dealloc_pd(pd);
out1:
mlx5_free_bfreg(dev->mdev, &dev->wc_bfreg);
print_err:
if (ret)
mlx5_ib_err(
dev,
"Error %d while trying to test write-combining support\n",
ret);
return ret;
}
3 changes: 0 additions & 3 deletions drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,6 @@ struct mlx5_ib_flow_db {
* rely on the range reserved for that use in the ib_qp_create_flags enum.
*/
#define MLX5_IB_QP_CREATE_SQPN_QP1 IB_QP_CREATE_RESERVED_START
#define MLX5_IB_QP_CREATE_WC_TEST (IB_QP_CREATE_RESERVED_START << 1)

struct wr_list {
u16 opcode;
Expand Down Expand Up @@ -1123,7 +1122,6 @@ struct mlx5_ib_dev {
u8 ib_active:1;
u8 is_rep:1;
u8 lag_active:1;
u8 wc_support:1;
u8 fill_delay;
struct umr_common umrc;
/* sync used page count stats
Expand All @@ -1149,7 +1147,6 @@ struct mlx5_ib_dev {
/* Array with num_ports elements */
struct mlx5_ib_port *port;
struct mlx5_sq_bfreg bfreg;
struct mlx5_sq_bfreg wc_bfreg;
struct mlx5_sq_bfreg fp_bfreg;
struct mlx5_ib_delay_drop delay_drop;
const struct mlx5_ib_profile *profile;
Expand Down
16 changes: 0 additions & 16 deletions drivers/infiniband/hw/mlx5/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1107,8 +1107,6 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,

if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
qp->bf.bfreg = &dev->fp_bfreg;
else if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
qp->bf.bfreg = &dev->wc_bfreg;
else
qp->bf.bfreg = &dev->bfreg;

Expand Down Expand Up @@ -2959,14 +2957,6 @@ static void process_create_flag(struct mlx5_ib_dev *dev, int *flags, int flag,
return;
}

if (flag == MLX5_IB_QP_CREATE_WC_TEST) {
/*
* Special case, if condition didn't meet, it won't be error,
* just different in-kernel flow.
*/
*flags &= ~MLX5_IB_QP_CREATE_WC_TEST;
return;
}
mlx5_ib_dbg(dev, "Verbs create QP flag 0x%X is not supported\n", flag);
}

Expand Down Expand Up @@ -3027,8 +3017,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
IB_QP_CREATE_PCI_WRITE_END_PADDING,
MLX5_CAP_GEN(mdev, end_pad), qp);

process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_WC_TEST,
qp_type != MLX5_IB_QPT_REG_UMR, qp);
process_create_flag(dev, &create_flags, MLX5_IB_QP_CREATE_SQPN_QP1,
true, qp);

Expand Down Expand Up @@ -4609,10 +4597,6 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
if (qp->type == IB_QPT_RAW_PACKET || qp->type == MLX5_IB_QPT_REG_UMR)
return true;

/* Internal QP used for wc testing, with NOPs in wq */
if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST)
return true;

return false;
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/net/ethernet/mellanox/mlx5/core/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \
lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \
diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \
fw_reset.o qos.o lib/tout.o lib/aso.o
fw_reset.o qos.o lib/tout.o lib/aso.o wc.o

#
# Netdev basic
Expand Down
2 changes: 2 additions & 0 deletions drivers/net/ethernet/mellanox/mlx5/core/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1819,6 +1819,7 @@ int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
mutex_init(&dev->intf_state_mutex);
lockdep_set_class(&dev->intf_state_mutex, &dev->lock_key);
mutex_init(&dev->mlx5e_res.uplink_netdev_lock);
mutex_init(&dev->wc_state_lock);

mutex_init(&priv->bfregs.reg_head.lock);
mutex_init(&priv->bfregs.wc_head.lock);
Expand Down Expand Up @@ -1916,6 +1917,7 @@ void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
mutex_destroy(&priv->alloc_mutex);
mutex_destroy(&priv->bfregs.wc_head.lock);
mutex_destroy(&priv->bfregs.reg_head.lock);
mutex_destroy(&dev->wc_state_lock);
mutex_destroy(&dev->mlx5e_res.uplink_netdev_lock);
mutex_destroy(&dev->intf_state_mutex);
lockdep_unregister_key(&dev->lock_key);
Expand Down
Loading

0 comments on commit ae6f6dd

Please sign in to comment.