Skip to content

Commit

Permalink
IB/mlx5: Reset flow support for IB kernel ULPs
Browse files Browse the repository at this point in the history
The driver exposes interfaces that directly relate to HW state.
Upon fatal error, consumers of these interfaces (ULPs) that rely
on completion of all their posted work-request could hang, thereby
introducing dependencies in shutdown order. To prevent this from
happening, we manage the relevant resources (CQs, QPs) that are used
by the device. Upon a fatal error, we now generate simulated
completions for outstanding WQEs that were not completed at the
time the HW was reset.

It includes invoking the completion event handler for all involved
CQs so that the ULPs will poll those CQs. When polled we return
simulated CQEs with IB_WC_WR_FLUSH_ERR return code enabling ULPs
to clean up their  resources and not wait forever for completions
upon receiving remove_one.

The above change requires an extra check in the data path to make
sure that when device is in error state, the simulated CQEs will
be returned and no further WQEs will be posted.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Doug Ledford <dledford@redhat.com>
  • Loading branch information
Maor Gottlieb authored and Doug Ledford committed Jun 23, 2016
1 parent 7c2344c commit 89ea94a
Show file tree
Hide file tree
Showing 7 changed files with 250 additions and 21 deletions.
87 changes: 86 additions & 1 deletion drivers/infiniband/hw/mlx5/cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,83 @@ static void get_sig_err_item(struct mlx5_sig_err_cqe *cqe,
item->key = be32_to_cpu(cqe->mkey);
}

static void sw_send_comp(struct mlx5_ib_qp *qp, int num_entries,
struct ib_wc *wc, int *npolled)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
unsigned int idx;
int np;
int i;

wq = &qp->sq;
cur = wq->head - wq->tail;
np = *npolled;

if (cur == 0)
return;

for (i = 0; i < cur && np < num_entries; i++) {
idx = wq->last_poll & (wq->wqe_cnt - 1);
wc->wr_id = wq->wrid[idx];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
np++;
wc->qp = &qp->ibqp;
wc++;
wq->last_poll = wq->w_list[idx].next;
}
*npolled = np;
}

static void sw_recv_comp(struct mlx5_ib_qp *qp, int num_entries,
struct ib_wc *wc, int *npolled)
{
struct mlx5_ib_wq *wq;
unsigned int cur;
int np;
int i;

wq = &qp->rq;
cur = wq->head - wq->tail;
np = *npolled;

if (cur == 0)
return;

for (i = 0; i < cur && np < num_entries; i++) {
wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
wc->status = IB_WC_WR_FLUSH_ERR;
wc->vendor_err = MLX5_CQE_SYNDROME_WR_FLUSH_ERR;
wq->tail++;
np++;
wc->qp = &qp->ibqp;
wc++;
}
*npolled = np;
}

static void mlx5_ib_poll_sw_comp(struct mlx5_ib_cq *cq, int num_entries,
struct ib_wc *wc, int *npolled)
{
struct mlx5_ib_qp *qp;

*npolled = 0;
/* Find uncompleted WQEs belonging to that cq and retrun mmics ones */
list_for_each_entry(qp, &cq->list_send_qp, cq_send_list) {
sw_send_comp(qp, num_entries, wc + *npolled, npolled);
if (*npolled >= num_entries)
return;
}

list_for_each_entry(qp, &cq->list_recv_qp, cq_recv_list) {
sw_recv_comp(qp, num_entries, wc + *npolled, npolled);
if (*npolled >= num_entries)
return;
}
}

static int mlx5_poll_one(struct mlx5_ib_cq *cq,
struct mlx5_ib_qp **cur_qp,
struct ib_wc *wc)
Expand Down Expand Up @@ -594,12 +671,18 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
{
struct mlx5_ib_cq *cq = to_mcq(ibcq);
struct mlx5_ib_qp *cur_qp = NULL;
struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
struct mlx5_core_dev *mdev = dev->mdev;
unsigned long flags;
int soft_polled = 0;
int npolled;
int err = 0;

spin_lock_irqsave(&cq->lock, flags);
if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
mlx5_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
goto out;
}

if (unlikely(!list_empty(&cq->wc_list)))
soft_polled = poll_soft_wc(cq, num_entries, wc);
Expand All @@ -612,7 +695,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)

if (npolled)
mlx5_cq_set_ci(&cq->mcq);

out:
spin_unlock_irqrestore(&cq->lock, flags);

if (err == 0 || err == -EAGAIN)
Expand Down Expand Up @@ -843,6 +926,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
cq->resize_buf = NULL;
cq->resize_umem = NULL;
cq->create_flags = attr->flags;
INIT_LIST_HEAD(&cq->list_send_qp);
INIT_LIST_HEAD(&cq->list_recv_qp);

if (context) {
err = create_cq_user(dev, udata, context, cq, entries,
Expand Down
62 changes: 62 additions & 0 deletions drivers/infiniband/hw/mlx5/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -1980,6 +1980,65 @@ static void pkey_change_handler(struct work_struct *work)
mutex_unlock(&ports->devr->mutex);
}

static void mlx5_ib_handle_internal_error(struct mlx5_ib_dev *ibdev)
{
struct mlx5_ib_qp *mqp;
struct mlx5_ib_cq *send_mcq, *recv_mcq;
struct mlx5_core_cq *mcq;
struct list_head cq_armed_list;
unsigned long flags_qp;
unsigned long flags_cq;
unsigned long flags;

INIT_LIST_HEAD(&cq_armed_list);

/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
spin_lock_irqsave(&mqp->sq.lock, flags_qp);
if (mqp->sq.tail != mqp->sq.head) {
send_mcq = to_mcq(mqp->ibqp.send_cq);
spin_lock_irqsave(&send_mcq->lock, flags_cq);
if (send_mcq->mcq.comp &&
mqp->ibqp.send_cq->comp_handler) {
if (!send_mcq->mcq.reset_notify_added) {
send_mcq->mcq.reset_notify_added = 1;
list_add_tail(&send_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
}
spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
spin_lock_irqsave(&mqp->rq.lock, flags_qp);
/* no handling is needed for SRQ */
if (!mqp->ibqp.srq) {
if (mqp->rq.tail != mqp->rq.head) {
recv_mcq = to_mcq(mqp->ibqp.recv_cq);
spin_lock_irqsave(&recv_mcq->lock, flags_cq);
if (recv_mcq->mcq.comp &&
mqp->ibqp.recv_cq->comp_handler) {
if (!recv_mcq->mcq.reset_notify_added) {
recv_mcq->mcq.reset_notify_added = 1;
list_add_tail(&recv_mcq->mcq.reset_notify,
&cq_armed_list);
}
}
spin_unlock_irqrestore(&recv_mcq->lock,
flags_cq);
}
}
spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
}
/*At that point all inflight post send were put to be executed as of we
* lock/unlock above locks Now need to arm all involved CQs.
*/
list_for_each_entry(mcq, &cq_armed_list, reset_notify) {
mcq->comp(mcq);
}
spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
}

static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
enum mlx5_dev_event event, unsigned long param)
{
Expand All @@ -1992,6 +2051,7 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
case MLX5_DEV_EVENT_SYS_ERROR:
ibdev->ib_active = false;
ibev.event = IB_EVENT_DEVICE_FATAL;
mlx5_ib_handle_internal_error(ibdev);
break;

case MLX5_DEV_EVENT_PORT_UP:
Expand Down Expand Up @@ -2595,6 +2655,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)

mutex_init(&dev->flow_db.lock);
mutex_init(&dev->cap_mask_mutex);
INIT_LIST_HEAD(&dev->qp_list);
spin_lock_init(&dev->reset_flow_resource_lock);

if (ll == IB_LINK_LAYER_ETHERNET) {
err = mlx5_enable_roce(dev);
Expand Down
8 changes: 8 additions & 0 deletions drivers/infiniband/hw/mlx5/mlx5_ib.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,9 @@ struct mlx5_ib_qp {
spinlock_t disable_page_faults_lock;
struct mlx5_ib_pfault pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
#endif
struct list_head qps_list;
struct list_head cq_recv_list;
struct list_head cq_send_list;
};

struct mlx5_ib_cq_buf {
Expand Down Expand Up @@ -441,6 +444,8 @@ struct mlx5_ib_cq {
struct mlx5_ib_cq_buf *resize_buf;
struct ib_umem *resize_umem;
int cqe_size;
struct list_head list_send_qp;
struct list_head list_recv_qp;
u32 create_flags;
struct list_head wc_list;
enum ib_cq_notify_flags notify_flags;
Expand Down Expand Up @@ -621,6 +626,9 @@ struct mlx5_ib_dev {
struct srcu_struct mr_srcu;
#endif
struct mlx5_ib_flow_db flow_db;
/* protect resources needed as part of reset flow */
spinlock_t reset_flow_resource_lock;
struct list_head qp_list;
};

static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
Expand Down
4 changes: 4 additions & 0 deletions drivers/infiniband/hw/mlx5/mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1193,12 +1193,16 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,

static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
struct mlx5_core_dev *mdev = dev->mdev;
struct umr_common *umrc = &dev->umrc;
struct mlx5_ib_umr_context umr_context;
struct mlx5_umr_wr umrwr = {};
struct ib_send_wr *bad;
int err;

if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
return 0;

mlx5_ib_init_umr_context(&umr_context);

umrwr.wr.wr_cqe = &umr_context.cqe;
Expand Down
Loading

0 comments on commit 89ea94a

Please sign in to comment.