Skip to content

Commit

Permalink
net/mlx4_core: Use tasklet for user-space CQ completion events
Browse files Browse the repository at this point in the history
Previously, we've fired all our completion callbacks straight from our ISR.

Some of those callbacks were lightweight (for example, mlx4_en's and
IPoIB napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over those
events could be so long, that we'll get into a hard lockup by the system
watchdog.

In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive completion
event to a per-EQ list and schedule a tasklet. In the tasklet context
we loop over all the CQs in the list and invoke the user callback.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
Matan Barak authored and David S. Miller committed Dec 11, 2014
1 parent 383677d commit 3dca0f4
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 2 deletions.
5 changes: 4 additions & 1 deletion drivers/infiniband/hw/mlx4/cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
if (err)
goto err_dbmap;

cq->mcq.comp = mlx4_ib_cq_comp;
if (context)
cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
else
cq->mcq.comp = mlx4_ib_cq_comp;
cq->mcq.event = mlx4_ib_cq_event;

if (context)
Expand Down
50 changes: 50 additions & 0 deletions drivers/net/ethernet/mellanox/mlx4/cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,51 @@
#define MLX4_CQ_STATE_ARMED_SOL ( 6 << 8)
#define MLX4_EQ_STATE_FIRED (10 << 8)

#define TASKLET_MAX_TIME 2
#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)

void mlx4_cq_tasklet_cb(unsigned long data)
{
unsigned long flags;
unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data;
struct mlx4_cq *mcq, *temp;

spin_lock_irqsave(&ctx->lock, flags);
list_splice_tail_init(&ctx->list, &ctx->process_list);
spin_unlock_irqrestore(&ctx->lock, flags);

list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) {
list_del_init(&mcq->tasklet_ctx.list);
mcq->tasklet_ctx.comp(mcq);
if (atomic_dec_and_test(&mcq->refcount))
complete(&mcq->free);
if (time_after(jiffies, end))
break;
}

if (!list_empty(&ctx->process_list))
tasklet_schedule(&ctx->task);
}

static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq)
{
unsigned long flags;
struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;

spin_lock_irqsave(&tasklet_ctx->lock, flags);
/* When migrating CQs between EQs will be implemented, please note
* that you need to sync this point. It is possible that
* while migrating a CQ, completions on the old EQs could
* still arrive.
*/
if (list_empty_careful(&cq->tasklet_ctx.list)) {
atomic_inc(&cq->refcount);
list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
}
spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
}

void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
{
struct mlx4_cq *cq;
Expand Down Expand Up @@ -292,6 +337,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
cq->uar = uar;
atomic_set(&cq->refcount, 1);
init_completion(&cq->free);
cq->comp = mlx4_add_cq_to_tasklet;
cq->tasklet_ctx.priv =
&priv->eq_table.eq[cq->vector].tasklet_ctx;
INIT_LIST_HEAD(&cq->tasklet_ctx.list);


cq->irq = priv->eq_table.eq[cq->vector].irq;
return 0;
Expand Down
16 changes: 15 additions & 1 deletion drivers/net/ethernet/mellanox/mlx4/eq.c
Original file line number Diff line number Diff line change
Expand Up @@ -450,7 +450,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
{
struct mlx4_priv *priv = mlx4_priv(dev);
struct mlx4_eqe *eqe;
int cqn;
int cqn = -1;
int eqes_found = 0;
int set_ci = 0;
int port;
Expand Down Expand Up @@ -758,6 +758,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)

eq_set_ci(eq, 1);

/* cqn is 24bit wide but is initialized such that its higher bits
* are ones too. Thus, if we got any event, cqn's high bits should be off
* and we need to schedule the tasklet.
*/
if (!(cqn & ~0xffffff))
tasklet_schedule(&eq->tasklet_ctx.task);

return eqes_found;
}

Expand Down Expand Up @@ -971,6 +978,12 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,

eq->cons_index = 0;

INIT_LIST_HEAD(&eq->tasklet_ctx.list);
INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
spin_lock_init(&eq->tasklet_ctx.lock);
tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb,
(unsigned long)&eq->tasklet_ctx);

return err;

err_out_free_mtt:
Expand Down Expand Up @@ -1027,6 +1040,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
}
}
synchronize_irq(eq->irq);
tasklet_disable(&eq->tasklet_ctx.task);

mlx4_mtt_cleanup(dev, &eq->mtt);
for (i = 0; i < npages; ++i)
Expand Down
12 changes: 12 additions & 0 deletions drivers/net/ethernet/mellanox/mlx4/mlx4.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
#include <linux/timer.h>
#include <linux/semaphore.h>
#include <linux/workqueue.h>
#include <linux/interrupt.h>
#include <linux/spinlock.h>

#include <linux/mlx4/device.h>
#include <linux/mlx4/driver.h>
Expand Down Expand Up @@ -373,6 +375,14 @@ struct mlx4_srq_context {
__be64 db_rec_addr;
};

struct mlx4_eq_tasklet {
struct list_head list;
struct list_head process_list;
struct tasklet_struct task;
/* lock on completion tasklet list */
spinlock_t lock;
};

struct mlx4_eq {
struct mlx4_dev *dev;
void __iomem *doorbell;
Expand All @@ -383,6 +393,7 @@ struct mlx4_eq {
int nent;
struct mlx4_buf_list *page_list;
struct mlx4_mtt mtt;
struct mlx4_eq_tasklet tasklet_ctx;
};

struct mlx4_slave_eqe {
Expand Down Expand Up @@ -1146,6 +1157,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev);
int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
unsigned long timeout);

void mlx4_cq_tasklet_cb(unsigned long data);
void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);

Expand Down
5 changes: 5 additions & 0 deletions include/linux/mlx4/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,11 @@ struct mlx4_cq {

atomic_t refcount;
struct completion free;
struct {
struct list_head list;
void (*comp)(struct mlx4_cq *);
void *priv;
} tasklet_ctx;
};

struct mlx4_qp {
Expand Down

0 comments on commit 3dca0f4

Please sign in to comment.