Skip to content

Commit

Permalink
Merge branch 'cxgb4-next'
Browse files Browse the repository at this point in the history
Hariprasad Shenai says:

====================
Doorbell drop Avoidance Bug fix for iw_cxgb4

This patch series provides fixes for Chelsio T4/T5 adapters
related to DB Drop avoidance and other small fix related to keepalive on
iw-cxgb4.

The patches series is created against David Miller's 'net-next' tree.
And includes patches on cxgb4 and iw_cxgb4 driver.

We would like to request this patch series to get merged via David Miller's
'net-next' tree.

We have included all the maintainers of respective drivers. Kindly review the
change and let us know in case of any review comments.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
  • Loading branch information
David S. Miller committed Mar 15, 2014
2 parents 57a7744 + 05eb238 commit 1c79a5a
Show file tree
Hide file tree
Showing 11 changed files with 299 additions and 202 deletions.
24 changes: 12 additions & 12 deletions drivers/infiniband/hw/cxgb4/cm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1647,6 +1647,15 @@ static inline int act_open_has_tid(int status)
status != CPL_ERR_ARP_MISS;
}

/* Returns whether a CPL status conveys negative advice.
*/
static int is_neg_adv(unsigned int status)
{
return status == CPL_ERR_RTX_NEG_ADVICE ||
status == CPL_ERR_PERSIST_NEG_ADVICE ||
status == CPL_ERR_KEEPALV_NEG_ADVICE;
}

#define ACT_OPEN_RETRY_COUNT 2

static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
Expand Down Expand Up @@ -1835,7 +1844,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
status, status2errno(status));

if (status == CPL_ERR_RTX_NEG_ADVICE) {
if (is_neg_adv(status)) {
printk(KERN_WARNING MOD "Connection problems for atid %u\n",
atid);
return 0;
Expand Down Expand Up @@ -2265,15 +2274,6 @@ static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
return 0;
}

/*
* Returns whether an ABORT_REQ_RSS message is a negative advice.
*/
static int is_neg_adv_abort(unsigned int status)
{
return status == CPL_ERR_RTX_NEG_ADVICE ||
status == CPL_ERR_PERSIST_NEG_ADVICE;
}

static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
{
struct cpl_abort_req_rss *req = cplhdr(skb);
Expand All @@ -2287,7 +2287,7 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
unsigned int tid = GET_TID(req);

ep = lookup_tid(t, tid);
if (is_neg_adv_abort(req->status)) {
if (is_neg_adv(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
ep->hwtid);
return 0;
Expand Down Expand Up @@ -3570,7 +3570,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
kfree_skb(skb);
return 0;
}
if (is_neg_adv_abort(req->status)) {
if (is_neg_adv(req->status)) {
PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
ep->hwtid);
kfree_skb(skb);
Expand Down
177 changes: 107 additions & 70 deletions drivers/infiniband/hw/cxgb4/device.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ struct uld_ctx {
static LIST_HEAD(uld_ctx_list);
static DEFINE_MUTEX(dev_mutex);

#define DB_FC_RESUME_SIZE 64
#define DB_FC_RESUME_DELAY 1
#define DB_FC_DRAIN_THRESH 0

static struct dentry *c4iw_debugfs_root;

struct c4iw_debugfs_data {
Expand Down Expand Up @@ -282,7 +286,7 @@ static const struct file_operations stag_debugfs_fops = {
.llseek = default_llseek,
};

static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY"};
static char *db_state_str[] = {"NORMAL", "FLOW_CONTROL", "RECOVERY", "STOPPED"};

static int stats_show(struct seq_file *seq, void *v)
{
Expand Down Expand Up @@ -311,9 +315,10 @@ static int stats_show(struct seq_file *seq, void *v)
seq_printf(seq, " DB FULL: %10llu\n", dev->rdev.stats.db_full);
seq_printf(seq, " DB EMPTY: %10llu\n", dev->rdev.stats.db_empty);
seq_printf(seq, " DB DROP: %10llu\n", dev->rdev.stats.db_drop);
seq_printf(seq, " DB State: %s Transitions %llu\n",
seq_printf(seq, " DB State: %s Transitions %llu FC Interruptions %llu\n",
db_state_str[dev->db_state],
dev->rdev.stats.db_state_transitions);
dev->rdev.stats.db_state_transitions,
dev->rdev.stats.db_fc_interruptions);
seq_printf(seq, "TCAM_FULL: %10llu\n", dev->rdev.stats.tcam_full);
seq_printf(seq, "ACT_OFLD_CONN_FAILS: %10llu\n",
dev->rdev.stats.act_ofld_conn_fails);
Expand Down Expand Up @@ -643,6 +648,12 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
printk(KERN_ERR MOD "error %d initializing ocqp pool\n", err);
goto err4;
}
rdev->status_page = (struct t4_dev_status_page *)
__get_free_page(GFP_KERNEL);
if (!rdev->status_page) {
pr_err(MOD "error allocating status page\n");
goto err4;
}
return 0;
err4:
c4iw_rqtpool_destroy(rdev);
Expand All @@ -656,6 +667,7 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)

static void c4iw_rdev_close(struct c4iw_rdev *rdev)
{
free_page((unsigned long)rdev->status_page);
c4iw_pblpool_destroy(rdev);
c4iw_rqtpool_destroy(rdev);
c4iw_destroy_resource(&rdev->resource);
Expand Down Expand Up @@ -703,18 +715,6 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
pr_info("%s: On-Chip Queues not supported on this device.\n",
pci_name(infop->pdev));

if (!is_t4(infop->adapter_type)) {
if (!allow_db_fc_on_t5) {
db_fc_threshold = 100000;
pr_info("DB Flow Control Disabled.\n");
}

if (!allow_db_coalescing_on_t5) {
db_coalescing_threshold = -1;
pr_info("DB Coalescing Disabled.\n");
}
}

devp = (struct c4iw_dev *)ib_alloc_device(sizeof(*devp));
if (!devp) {
printk(KERN_ERR MOD "Cannot allocate ib device\n");
Expand Down Expand Up @@ -749,6 +749,7 @@ static struct c4iw_dev *c4iw_alloc(const struct cxgb4_lld_info *infop)
spin_lock_init(&devp->lock);
mutex_init(&devp->rdev.stats.lock);
mutex_init(&devp->db_mutex);
INIT_LIST_HEAD(&devp->db_fc_list);

if (c4iw_debugfs_root) {
devp->debugfs_root = debugfs_create_dir(
Expand Down Expand Up @@ -977,13 +978,16 @@ static int disable_qp_db(int id, void *p, void *data)

static void stop_queues(struct uld_ctx *ctx)
{
spin_lock_irq(&ctx->dev->lock);
if (ctx->dev->db_state == NORMAL) {
ctx->dev->rdev.stats.db_state_transitions++;
ctx->dev->db_state = FLOW_CONTROL;
unsigned long flags;

spin_lock_irqsave(&ctx->dev->lock, flags);
ctx->dev->rdev.stats.db_state_transitions++;
ctx->dev->db_state = STOPPED;
if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED)
idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
}
spin_unlock_irq(&ctx->dev->lock);
else
ctx->dev->rdev.status_page->db_off = 1;
spin_unlock_irqrestore(&ctx->dev->lock, flags);
}

static int enable_qp_db(int id, void *p, void *data)
Expand All @@ -994,15 +998,70 @@ static int enable_qp_db(int id, void *p, void *data)
return 0;
}

static void resume_rc_qp(struct c4iw_qp *qp)
{
spin_lock(&qp->lock);
t4_ring_sq_db(&qp->wq, qp->wq.sq.wq_pidx_inc);
qp->wq.sq.wq_pidx_inc = 0;
t4_ring_rq_db(&qp->wq, qp->wq.rq.wq_pidx_inc);
qp->wq.rq.wq_pidx_inc = 0;
spin_unlock(&qp->lock);
}

static void resume_a_chunk(struct uld_ctx *ctx)
{
int i;
struct c4iw_qp *qp;

for (i = 0; i < DB_FC_RESUME_SIZE; i++) {
qp = list_first_entry(&ctx->dev->db_fc_list, struct c4iw_qp,
db_fc_entry);
list_del_init(&qp->db_fc_entry);
resume_rc_qp(qp);
if (list_empty(&ctx->dev->db_fc_list))
break;
}
}

static void resume_queues(struct uld_ctx *ctx)
{
spin_lock_irq(&ctx->dev->lock);
if (ctx->dev->qpcnt <= db_fc_threshold &&
ctx->dev->db_state == FLOW_CONTROL) {
ctx->dev->db_state = NORMAL;
ctx->dev->rdev.stats.db_state_transitions++;
idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
if (ctx->dev->db_state != STOPPED)
goto out;
ctx->dev->db_state = FLOW_CONTROL;
while (1) {
if (list_empty(&ctx->dev->db_fc_list)) {
WARN_ON(ctx->dev->db_state != FLOW_CONTROL);
ctx->dev->db_state = NORMAL;
ctx->dev->rdev.stats.db_state_transitions++;
if (ctx->dev->rdev.flags & T4_STATUS_PAGE_DISABLED) {
idr_for_each(&ctx->dev->qpidr, enable_qp_db,
NULL);
} else {
ctx->dev->rdev.status_page->db_off = 0;
}
break;
} else {
if (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1)
< (ctx->dev->rdev.lldi.dbfifo_int_thresh <<
DB_FC_DRAIN_THRESH)) {
resume_a_chunk(ctx);
}
if (!list_empty(&ctx->dev->db_fc_list)) {
spin_unlock_irq(&ctx->dev->lock);
if (DB_FC_RESUME_DELAY) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(DB_FC_RESUME_DELAY);
}
spin_lock_irq(&ctx->dev->lock);
if (ctx->dev->db_state != FLOW_CONTROL)
break;
}
}
}
out:
if (ctx->dev->db_state != NORMAL)
ctx->dev->rdev.stats.db_fc_interruptions++;
spin_unlock_irq(&ctx->dev->lock);
}

Expand All @@ -1028,12 +1087,12 @@ static int count_qps(int id, void *p, void *data)
return 0;
}

static void deref_qps(struct qp_list qp_list)
static void deref_qps(struct qp_list *qp_list)
{
int idx;

for (idx = 0; idx < qp_list.idx; idx++)
c4iw_qp_rem_ref(&qp_list.qps[idx]->ibqp);
for (idx = 0; idx < qp_list->idx; idx++)
c4iw_qp_rem_ref(&qp_list->qps[idx]->ibqp);
}

static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
Expand All @@ -1044,30 +1103,40 @@ static void recover_lost_dbs(struct uld_ctx *ctx, struct qp_list *qp_list)
for (idx = 0; idx < qp_list->idx; idx++) {
struct c4iw_qp *qp = qp_list->qps[idx];

spin_lock_irq(&qp->rhp->lock);
spin_lock(&qp->lock);
ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
qp->wq.sq.qid,
t4_sq_host_wq_pidx(&qp->wq),
t4_sq_wq_size(&qp->wq));
if (ret) {
printk(KERN_ERR MOD "%s: Fatal error - "
pr_err(KERN_ERR MOD "%s: Fatal error - "
"DB overflow recovery failed - "
"error syncing SQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.sq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
return;
}
qp->wq.sq.wq_pidx_inc = 0;

ret = cxgb4_sync_txq_pidx(qp->rhp->rdev.lldi.ports[0],
qp->wq.rq.qid,
t4_rq_host_wq_pidx(&qp->wq),
t4_rq_wq_size(&qp->wq));

if (ret) {
printk(KERN_ERR MOD "%s: Fatal error - "
pr_err(KERN_ERR MOD "%s: Fatal error - "
"DB overflow recovery failed - "
"error syncing RQ qid %u\n",
pci_name(ctx->lldi.pdev), qp->wq.rq.qid);
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);
return;
}
qp->wq.rq.wq_pidx_inc = 0;
spin_unlock(&qp->lock);
spin_unlock_irq(&qp->rhp->lock);

/* Wait for the dbfifo to drain */
while (cxgb4_dbfifo_count(qp->rhp->rdev.lldi.ports[0], 1) > 0) {
Expand All @@ -1083,44 +1152,30 @@ static void recover_queues(struct uld_ctx *ctx)
struct qp_list qp_list;
int ret;

/* lock out kernel db ringers */
mutex_lock(&ctx->dev->db_mutex);

/* put all queues in to recovery mode */
spin_lock_irq(&ctx->dev->lock);
ctx->dev->db_state = RECOVERY;
ctx->dev->rdev.stats.db_state_transitions++;
idr_for_each(&ctx->dev->qpidr, disable_qp_db, NULL);
spin_unlock_irq(&ctx->dev->lock);

/* slow everybody down */
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(1000));

/* Wait for the dbfifo to completely drain. */
while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(10));
}

/* flush the SGE contexts */
ret = cxgb4_flush_eq_cache(ctx->dev->rdev.lldi.ports[0]);
if (ret) {
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
goto out;
return;
}

/* Count active queues so we can build a list of queues to recover */
spin_lock_irq(&ctx->dev->lock);
WARN_ON(ctx->dev->db_state != STOPPED);
ctx->dev->db_state = RECOVERY;
idr_for_each(&ctx->dev->qpidr, count_qps, &count);

qp_list.qps = kzalloc(count * sizeof *qp_list.qps, GFP_ATOMIC);
if (!qp_list.qps) {
printk(KERN_ERR MOD "%s: Fatal error - DB overflow recovery failed\n",
pci_name(ctx->lldi.pdev));
spin_unlock_irq(&ctx->dev->lock);
goto out;
return;
}
qp_list.idx = 0;

Expand All @@ -1133,29 +1188,13 @@ static void recover_queues(struct uld_ctx *ctx)
recover_lost_dbs(ctx, &qp_list);

/* we're almost done! deref the qps and clean up */
deref_qps(qp_list);
deref_qps(&qp_list);
kfree(qp_list.qps);

/* Wait for the dbfifo to completely drain again */
while (cxgb4_dbfifo_count(ctx->dev->rdev.lldi.ports[0], 1) > 0) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(usecs_to_jiffies(10));
}

/* resume the queues */
spin_lock_irq(&ctx->dev->lock);
if (ctx->dev->qpcnt > db_fc_threshold)
ctx->dev->db_state = FLOW_CONTROL;
else {
ctx->dev->db_state = NORMAL;
idr_for_each(&ctx->dev->qpidr, enable_qp_db, NULL);
}
ctx->dev->rdev.stats.db_state_transitions++;
WARN_ON(ctx->dev->db_state != RECOVERY);
ctx->dev->db_state = STOPPED;
spin_unlock_irq(&ctx->dev->lock);

out:
/* start up kernel db ringers again */
mutex_unlock(&ctx->dev->db_mutex);
}

static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
Expand All @@ -1165,9 +1204,7 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...)
switch (control) {
case CXGB4_CONTROL_DB_FULL:
stop_queues(ctx);
mutex_lock(&ctx->dev->rdev.stats.lock);
ctx->dev->rdev.stats.db_full++;
mutex_unlock(&ctx->dev->rdev.stats.lock);
break;
case CXGB4_CONTROL_DB_EMPTY:
resume_queues(ctx);
Expand Down
Loading

0 comments on commit 1c79a5a

Please sign in to comment.