Skip to content

Commit

Permalink
RDMA/mana_ib: Process QP error events in mana_ib
Browse files Browse the repository at this point in the history
Process QP fatal events from the error event queue.
For that, find the QP, using QPN from the event, and then call its
event_handler. To find the QPs, store created RC QPs in an xarray.

Signed-off-by: Konstantin Taranov <kotaranov@microsoft.com>
Link: https://lore.kernel.org/r/1717754897-19858-1-git-send-email-kotaranov@linux.microsoft.com
Reviewed-by: Wei Hu <weh@microsoft.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
  • Loading branch information
Konstantin Taranov authored and Leon Romanovsky committed Jun 9, 2024
1 parent aee2424 commit 2a1251e
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 2 deletions.
3 changes: 3 additions & 0 deletions drivers/infiniband/hw/mana/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
if (ret)
goto destroy_eqs;

xa_init_flags(&dev->qp_table_wq, XA_FLAGS_LOCK_IRQ);
ret = mana_ib_gd_config_mac(dev, ADDR_OP_ADD, mac_addr);
if (ret) {
ibdev_err(&dev->ib_dev, "Failed to add Mac address, ret %d",
Expand All @@ -143,6 +144,7 @@ static int mana_ib_probe(struct auxiliary_device *adev,
return 0;

destroy_rnic:
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
destroy_eqs:
mana_ib_destroy_eqs(dev);
Expand All @@ -158,6 +160,7 @@ static void mana_ib_remove(struct auxiliary_device *adev)
struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);

ib_unregister_device(&dev->ib_dev);
xa_destroy(&dev->qp_table_wq);
mana_ib_gd_destroy_rnic_adapter(dev);
mana_ib_destroy_eqs(dev);
mana_gd_deregister_device(dev->gdma_dev);
Expand Down
31 changes: 29 additions & 2 deletions drivers/infiniband/hw/mana/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -667,6 +667,33 @@ int mana_ib_gd_query_adapter_caps(struct mana_ib_dev *dev)
return 0;
}

static void
mana_ib_event_handler(void *ctx, struct gdma_queue *q, struct gdma_event *event)
{
struct mana_ib_dev *mdev = (struct mana_ib_dev *)ctx;
struct mana_ib_qp *qp;
struct ib_event ev;
u32 qpn;

switch (event->type) {
case GDMA_EQE_RNIC_QP_FATAL:
qpn = event->details[0];
qp = mana_get_qp_ref(mdev, qpn);
if (!qp)
break;
if (qp->ibqp.event_handler) {
ev.device = qp->ibqp.device;
ev.element.qp = &qp->ibqp;
ev.event = IB_EVENT_QP_FATAL;
qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
}
mana_put_qp_ref(qp);
break;
default:
break;
}
}

int mana_ib_create_eqs(struct mana_ib_dev *mdev)
{
struct gdma_context *gc = mdev_to_gc(mdev);
Expand All @@ -676,7 +703,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev)
spec.type = GDMA_EQ;
spec.monitor_avl_buf = false;
spec.queue_size = EQ_SIZE;
spec.eq.callback = NULL;
spec.eq.callback = mana_ib_event_handler;
spec.eq.context = mdev;
spec.eq.log2_throttle_limit = LOG2_EQ_THROTTLE;
spec.eq.msix_index = 0;
Expand All @@ -691,7 +718,7 @@ int mana_ib_create_eqs(struct mana_ib_dev *mdev)
err = -ENOMEM;
goto destroy_fatal_eq;
}

spec.eq.callback = NULL;
for (i = 0; i < mdev->ib_dev.num_comp_vectors; i++) {
spec.eq.msix_index = (i + 1) % gc->num_msix_usable;
err = mana_gd_create_mana_eq(mdev->gdma_dev, &spec, &mdev->eqs[i]);
Expand Down
24 changes: 24 additions & 0 deletions drivers/infiniband/hw/mana/mana_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ struct mana_ib_dev {
mana_handle_t adapter_handle;
struct gdma_queue *fatal_err_eq;
struct gdma_queue **eqs;
struct xarray qp_table_wq;
struct mana_ib_adapter_caps adapter_caps;
};

Expand Down Expand Up @@ -124,6 +125,9 @@ struct mana_ib_qp {

/* The port on the IB device, starting with 1 */
u32 port;

refcount_t refcount;
struct completion free;
};

struct mana_ib_ucontext {
Expand Down Expand Up @@ -333,6 +337,26 @@ static inline struct gdma_context *mdev_to_gc(struct mana_ib_dev *mdev)
return mdev->gdma_dev->gdma_context;
}

static inline struct mana_ib_qp *mana_get_qp_ref(struct mana_ib_dev *mdev,
uint32_t qid)
{
struct mana_ib_qp *qp;
unsigned long flag;

xa_lock_irqsave(&mdev->qp_table_wq, flag);
qp = xa_load(&mdev->qp_table_wq, qid);
if (qp)
refcount_inc(&qp->refcount);
xa_unlock_irqrestore(&mdev->qp_table_wq, flag);
return qp;
}

static inline void mana_put_qp_ref(struct mana_ib_qp *qp)
{
if (refcount_dec_and_test(&qp->refcount))
complete(&qp->free);
}

static inline struct net_device *mana_ib_get_netdev(struct ib_device *ibdev, u32 port)
{
struct mana_ib_dev *mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
Expand Down
22 changes: 22 additions & 0 deletions drivers/infiniband/hw/mana/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,22 @@ static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
return err;
}

static int mana_table_store_qp(struct mana_ib_dev *mdev, struct mana_ib_qp *qp)
{
refcount_set(&qp->refcount, 1);
init_completion(&qp->free);
return xa_insert_irq(&mdev->qp_table_wq, qp->ibqp.qp_num, qp,
GFP_KERNEL);
}

static void mana_table_remove_qp(struct mana_ib_dev *mdev,
struct mana_ib_qp *qp)
{
xa_erase_irq(&mdev->qp_table_wq, qp->ibqp.qp_num);
mana_put_qp_ref(qp);
wait_for_completion(&qp->free);
}

static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
struct ib_qp_init_attr *attr, struct ib_udata *udata)
{
Expand Down Expand Up @@ -460,6 +476,10 @@ static int mana_ib_create_rc_qp(struct ib_qp *ibqp, struct ib_pd *ibpd,
}
}

err = mana_table_store_qp(mdev, qp);
if (err)
goto destroy_qp;

return 0;

destroy_qp:
Expand Down Expand Up @@ -620,6 +640,8 @@ static int mana_ib_destroy_rc_qp(struct mana_ib_qp *qp, struct ib_udata *udata)
container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
int i;

mana_table_remove_qp(mdev, qp);

/* Ignore return code as there is not much we can do about it.
* The error message is printed inside.
*/
Expand Down
1 change: 1 addition & 0 deletions drivers/net/ethernet/microsoft/mana/gdma_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,7 @@ static void mana_gd_process_eqe(struct gdma_queue *eq)
case GDMA_EQE_HWC_INIT_EQ_ID_DB:
case GDMA_EQE_HWC_INIT_DATA:
case GDMA_EQE_HWC_INIT_DONE:
case GDMA_EQE_RNIC_QP_FATAL:
if (!eq->eq.callback)
break;

Expand Down
1 change: 1 addition & 0 deletions include/net/mana/gdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ enum gdma_eqe_type {
GDMA_EQE_HWC_INIT_DONE = 131,
GDMA_EQE_HWC_SOC_RECONFIG = 132,
GDMA_EQE_HWC_SOC_RECONFIG_DATA = 133,
GDMA_EQE_RNIC_QP_FATAL = 176,
};

enum {
Expand Down

0 comments on commit 2a1251e

Please sign in to comment.