Skip to content

Commit

Permalink
RDMA/cxgb4: Add DB Overflow Avoidance
Browse files Browse the repository at this point in the history
Get FULL/EMPTY/DROP events from LLD.  On FULL event, disable normal
user mode DB rings.

Add modify_qp semantics to allow user processes to call into the
kernel to ring doobells without overflowing.

Add DB Full/Empty/Drop stats.

Mark queues when created indicating the doorbell state.

If we're in the middle of db overflow avoidance, then newly created
queues should start out in this mode.

Bump the C4IW_UVERBS_ABI_VERSION to 2 so the user mode library can
know if the driver supports the kernel mode db ringing.

Signed-off-by: Vipul Pandya <vipul@chelsio.com>
Signed-off-by: Steve Wise <swise@opengridcomputing.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
  • Loading branch information
Vipul Pandya authored and Roland Dreier committed May 18, 2012
1 parent 8d81ef3 commit 2c97478
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 12 deletions.
84 changes: 78 additions & 6 deletions drivers/infiniband/hw/cxgb4/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ MODULE_DESCRIPTION("Chelsio T4 RDMA Driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);

struct uld_ctx {
struct list_head entry;
struct cxgb4_lld_info lldi;
struct c4iw_dev *dev;
};

static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);

Expand Down Expand Up @@ -263,6 +269,9 @@ static int stats_show(struct seq_file *seq, void *v)
seq_printf(seq, " OCQPMEM: %10llu %10llu %10llu\n",
dev->rdev.stats.ocqp.total, dev->rdev.stats.ocqp.cur,
dev->rdev.stats.ocqp.max);
seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
return 0;
}

Expand All @@ -283,6 +292,9 @@ static ssize_t stats_clear(struct file *file, const char __user *buf,
dev->rdev.stats.pbl.max = 0;
dev->rdev.stats.rqt.max = 0;
dev->rdev.stats.ocqp.max = 0;
dev->rdev.stats.db_full = 0;
dev->rdev.stats.db_empty = 0;
dev->rdev.stats.db_drop = 0;
mutex_unlock(&dev->rdev.stats.lock);
return count;
}
Expand Down Expand Up @@ -443,12 +455,6 @@ static void c4iw_rdev_close(struct c4iw_rdev *rdev)
c4iw_destroy_resource(&rdev->resource);
}

struct uld_ctx {
struct list_head entry;
struct cxgb4_lld_info lldi;
struct c4iw_dev *dev;
};

static void c4iw_dealloc(struct uld_ctx *ctx)
{
c4iw_rdev_close(&ctx->dev->rdev);
Expand Down Expand Up @@ -514,6 +520,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
idr_init(&devp->mmidr);
spin_lock_init(&devp->lock);
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);

if (c4iw_debugfs_root) {
devp->debugfs_root = debugfs_create_dir(
Expand Down Expand Up @@ -659,11 +666,76 @@ static int c4iw_uld_state_change(void *handle, enum cxgb4_state new_state)
return 0;
}

static int disable_qp_db(int id, void *p, void *data)
{
struct c4iw_qp *qp = p;

t4_disable_wq_db(&qp->wq);
return 0;
}

static void stop_queues(struct uld_ctx *ctx)
{
spin_lock_irq(&ctx->dev->lock);
ctx->dev->db_state = FLOW_CONTROL;
idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
spin_unlock_irq(&ctx->dev->lock);
}

static int enable_qp_db(int id, void *p, void *data)
{
struct c4iw_qp *qp = p;

t4_enable_wq_db(&qp->wq);
return 0;
}

static void resume_queues(struct uld_ctx *ctx)
{
spin_lock_irq(&ctx->dev->lock);
ctx->dev->db_state = NORMAL;
idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
spin_unlock_irq(&ctx->dev->lock);
}

static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
{
struct uld_ctx *ctx = handle;

switch (control) {
case CXGB4_CONTROL_DB_FULL:
stop_queues(ctx);
mutex_lock(&ctx->dev->rdev.stats.lock);
ctx->dev->rdev.stats.db_full++;
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
case CXGB4_CONTROL_DB_EMPTY:
resume_queues(ctx);
mutex_lock(&ctx->dev->rdev.stats.lock);
ctx->dev->rdev.stats.db_empty++;
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
case CXGB4_CONTROL_DB_DROP:
printk(KERN_WARNING MOD "%s: Fatal DB DROP\n",
pci_name(ctx->lldi.pdev));
mutex_lock(&ctx->dev->rdev.stats.lock);
ctx->dev->rdev.stats.db_drop++;
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
default:
printk(KERN_WARNING MOD "%s: unknown control cmd %u\n",
pci_name(ctx->lldi.pdev), control);
break;
}
return 0;
}

static struct cxgb4_uld_info c4iw_uld_info = {
.name = DRV_NAME,
.add = c4iw_uld_add,
.rx_handler = c4iw_uld_rx_handler,
.state_change = c4iw_uld_state_change,
.control = c4iw_uld_control,
};

static int __init c4iw_init_module(void)
Expand Down
37 changes: 33 additions & 4 deletions drivers/infiniband/hw/cxgb4/iw_cxgb4.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ struct c4iw_stats {
struct c4iw_stat pbl;
struct c4iw_stat rqt;
struct c4iw_stat ocqp;
u64 db_full;
u64 db_empty;
u64 db_drop;
};

struct c4iw_rdev {
Expand Down Expand Up @@ -192,6 +195,12 @@ static inline int c4iw_wait_for_reply(struct c4iw_rdev *rdev,
return wr_waitp->ret;
}

enum db_state {
NORMAL = 0,
FLOW_CONTROL = 1,
RECOVERY = 2
};

struct c4iw_dev {
struct ib_device ibdev;
struct c4iw_rdev rdev;
Expand All @@ -200,7 +209,9 @@ struct c4iw_dev {
struct idr qpidr;
struct idr mmidr;
spinlock_t lock;
struct mutex db_mutex;
struct dentry *debugfs_root;
enum db_state db_state;
};

static inline struct c4iw_dev *to_c4iw_dev(struct ib_device *ibdev)
Expand Down Expand Up @@ -228,24 +239,38 @@ static inline struct c4iw_mr *get_mhp(struct c4iw_dev *rhp, u32 mmid)
return idr_find(&rhp->mmidr, mmid);
}

static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
void *handle, u32 id)
static inline int _insert_handle(struct c4iw_dev *rhp, struct idr *idr,
void *handle, u32 id, int lock)
{
int ret;
int newid;

do {
if (!idr_pre_get(idr, GFP_KERNEL))
return -ENOMEM;
spin_lock_irq(&rhp->lock);
if (lock)
spin_lock_irq(&rhp->lock);
ret = idr_get_new_above(idr, handle, id, &newid);
BUG_ON(newid != id);
spin_unlock_irq(&rhp->lock);
if (lock)
spin_unlock_irq(&rhp->lock);
} while (ret == -EAGAIN);

return ret;
}

static inline int insert_handle(struct c4iw_dev *rhp, struct idr *idr,
void *handle, u32 id)
{
return _insert_handle(rhp, idr, handle, id, 1);
}

static inline int insert_handle_nolock(struct c4iw_dev *rhp, struct idr *idr,
void *handle, u32 id)
{
return _insert_handle(rhp, idr, handle, id, 0);
}

static inline void remove_handle(struct c4iw_dev *rhp, struct idr *idr, u32 id)
{
spin_lock_irq(&rhp->lock);
Expand Down Expand Up @@ -370,6 +395,8 @@ struct c4iw_qp_attributes {
struct c4iw_ep *llp_stream_handle;
u8 layer_etype;
u8 ecode;
u16 sq_db_inc;
u16 rq_db_inc;
};

struct c4iw_qp {
Expand Down Expand Up @@ -444,6 +471,8 @@ static inline void insert_mmap(struct c4iw_ucontext *ucontext,

enum c4iw_qp_attr_mask {
C4IW_QP_ATTR_NEXT_STATE = 1 << 0,
C4IW_QP_ATTR_SQ_DB = 1<<1,
C4IW_QP_ATTR_RQ_DB = 1<<2,
C4IW_QP_ATTR_ENABLE_RDMA_READ = 1 << 7,
C4IW_QP_ATTR_ENABLE_RDMA_WRITE = 1 << 8,
C4IW_QP_ATTR_ENABLE_RDMA_BIND = 1 << 9,
Expand Down
51 changes: 50 additions & 1 deletion drivers/infiniband/hw/cxgb4/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@

#include "iw_cxgb4.h"

static int db_delay_usecs = 1;
module_param(db_delay_usecs, int, 0644);
MODULE_PARM_DESC(db_delay_usecs, "Usecs to delay awaiting db fifo to drain");

static int ocqp_support = 1;
module_param(ocqp_support, int, 0644);
MODULE_PARM_DESC(ocqp_support, "Support on-chip SQs (default=1)");
Expand Down Expand Up @@ -1128,6 +1132,29 @@ static int rdma_init(struct c4iw_dev *rhp, struct c4iw_qp *qhp)
return ret;
}

/*
* Called by the library when the qp has user dbs disabled due to
* a DB_FULL condition. This function will single-thread all user
* DB rings to avoid overflowing the hw db-fifo.
*/
static int ring_kernel_db(struct c4iw_qp *qhp, u32 qid, u16 inc)
{
int delay = db_delay_usecs;

mutex_lock(&qhp->rhp->db_mutex);
do {
if (cxgb4_dbfifo_count(qhp->rhp->rdev.lldi.ports[0], 1) < 768) {
writel(V_QID(qid) | V_PIDX(inc), qhp->wq.db);
break;
}
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(delay));
delay = min(delay << 1, 200000);
} while (1);
mutex_unlock(&qhp->rhp->db_mutex);
return 0;
}

int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
enum c4iw_qp_attr_mask mask,
struct c4iw_qp_attributes *attrs,
Expand Down Expand Up @@ -1176,6 +1203,15 @@ int c4iw_modify_qp(struct c4iw_dev *rhp, struct c4iw_qp *qhp,
qhp->attr = newattr;
}

if (mask & C4IW_QP_ATTR_SQ_DB) {
ret = ring_kernel_db(qhp, qhp->wq.sq.qid, attrs->sq_db_inc);
goto out;
}
if (mask & C4IW_QP_ATTR_RQ_DB) {
ret = ring_kernel_db(qhp, qhp->wq.rq.qid, attrs->rq_db_inc);
goto out;
}

if (!(mask & C4IW_QP_ATTR_NEXT_STATE))
goto out;
if (qhp->attr.state == attrs->next_state)
Expand Down Expand Up @@ -1469,7 +1505,11 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
init_waitqueue_head(&qhp->wait);
atomic_set(&qhp->refcnt, 1);

ret = insert_handle(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
spin_lock_irq(&rhp->lock);
if (rhp->db_state != NORMAL)
t4_disable_wq_db(&qhp->wq);
ret = insert_handle_nolock(rhp, &rhp->qpidr, qhp, qhp->wq.sq.qid);
spin_unlock_irq(&rhp->lock);
if (ret)
goto err2;

Expand Down Expand Up @@ -1613,6 +1653,15 @@ int c4iw_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
C4IW_QP_ATTR_ENABLE_RDMA_WRITE |
C4IW_QP_ATTR_ENABLE_RDMA_BIND) : 0;

/*
* Use SQ_PSN and RQ_PSN to pass in IDX_INC values for
* ringing the queue db when we're in DB_FULL mode.
*/
attrs.sq_db_inc = attr->sq_psn;
attrs.rq_db_inc = attr->rq_psn;
mask |= (attr_mask & IB_QP_SQ_PSN) ? C4IW_QP_ATTR_SQ_DB : 0;
mask |= (attr_mask & IB_QP_RQ_PSN) ? C4IW_QP_ATTR_RQ_DB : 0;

return c4iw_modify_qp(rhp, qhp, mask, &attrs, 0);
}

Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/cxgb4/user.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
#ifndef __C4IW_USER_H__
#define __C4IW_USER_H__

#define C4IW_UVERBS_ABI_VERSION 1
#define C4IW_UVERBS_ABI_VERSION 2

/*
* Make sure that all structs defined in this file remain laid out so
Expand Down

0 comments on commit 2c97478

Please sign in to comment.