Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "This is the second pull request for the rdma subsystem.  Most of the
  patches are small and obvious.  I took two patches in that are larger
  than I wanted this late in the cycle.

  The first is the hfi1 patch that implements a work queue to test the
  QSFP read state.  I originally rejected the first patch for this
  (which would have place up to 20 seconds worth of udelays in their
  probe routine).  They then rewrote it the way I wanted (use delayed
  work tasks to wait asynchronously up to 20 seconds for the QSFP to
  come alive), so I can't really complain about the size of getting what
  I asked for :-/.

  The second is large because it switches the rcu locking in the debugfs
  code.  Since a locking change like this is done all at once, the size
  it what it is.  It resolves a litany of debug messages from the
  kernel, so I pulled it in for -rc.

  The rest are all typical -rc worthy patches I think.

  There will still be a third -rc pull request from the rdma subsystem
  this release.  I hope to have that one ready to go by the end of this
  week or early next.

  Summary:

   - a smattering of small fixes across the core, ipoib, i40iw, isert,
     cxgb4, and mlx4

   - a slightly larger group of fixes to each of mlx5 and hfi1"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/hfi1: Rework debugfs to use SRCU
  IB/hfi1: Make n_krcvqs be an unsigned long integer
  IB/hfi1: Add QSFP sanity pre-check
  IB/hfi1: Fix AHG KDETH Intr shift
  IB/hfi1: Fix SGE length for misaligned PIO copy
  IB/mlx5: Don't return errors from poll_cq
  IB/mlx5: Use TIR number based on selector
  IB/mlx5: Simplify code by removing return variable
  IB/mlx5: Return EINVAL when caller specifies too many SGEs
  IB/mlx4: Don't return errors from poll_cq
  Revert "IB/mlx4: Return EAGAIN for any error in mlx4_ib_poll_one"
  IB/ipoib: Fix memory corruption in ipoib cm mode connect flow
  IB/core: Fix use after free in send_leave function
  IB/cxgb4: Make _free_qp static to silence build warning
  IB/isert: Properly release resources on DEVICE_REMOVAL
  IB/hfi1: Fix the size parameter to find_first_bit
  IB/mlx5: Fix the size parameter to find_first_bit
  IB/hfi1: Clean up type used and casting
  i40iw: Receive notification events correctly
  i40iw: Update hw_iwarp_state
  • Loading branch information
Linus Torvalds committed Sep 6, 2016
2 parents 46738ab + 16170d9 commit d060e0f
Show file tree
Hide file tree
Showing 23 changed files with 226 additions and 177 deletions.
13 changes: 2 additions & 11 deletions drivers/infiniband/core/multicast.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ struct mcast_group {
atomic_t refcount;
enum mcast_group_state state;
struct ib_sa_query *query;
int query_id;
u16 pkey_index;
u8 leave_state;
int retries;
Expand Down Expand Up @@ -340,11 +339,7 @@ static int send_join(struct mcast_group *group, struct mcast_member *member)
member->multicast.comp_mask,
3000, GFP_KERNEL, join_handler, group,
&group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
return (ret > 0) ? 0 : ret;
}

static int send_leave(struct mcast_group *group, u8 leave_state)
Expand All @@ -364,11 +359,7 @@ static int send_leave(struct mcast_group *group, u8 leave_state)
IB_SA_MCMEMBER_REC_JOIN_STATE,
3000, GFP_KERNEL, leave_handler,
group, &group->query);
if (ret >= 0) {
group->query_id = ret;
ret = 0;
}
return ret;
return (ret > 0) ? 0 : ret;
}

static void join_group(struct mcast_group *group, struct mcast_member *member,
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/cxgb4/qp.c
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ static int build_inv_stag(union t4_wr *wqe, struct ib_send_wr *wr,
return 0;
}

void _free_qp(struct kref *kref)
static void _free_qp(struct kref *kref)
{
struct c4iw_qp *qhp;

Expand Down
92 changes: 81 additions & 11 deletions drivers/infiniband/hw/hfi1/chip.c
Original file line number Diff line number Diff line change
Expand Up @@ -9490,6 +9490,78 @@ static void init_lcb(struct hfi1_devdata *dd)
write_csr(dd, DC_LCB_CFG_TX_FIFOS_RESET, 0x00);
}

/*
* Perform a test read on the QSFP. Return 0 on success, -ERRNO
* on error.
*/
static int test_qsfp_read(struct hfi1_pportdata *ppd)
{
int ret;
u8 status;

/* report success if not a QSFP */
if (ppd->port_type != PORT_TYPE_QSFP)
return 0;

/* read byte 2, the status byte */
ret = one_qsfp_read(ppd, ppd->dd->hfi1_id, 2, &status, 1);
if (ret < 0)
return ret;
if (ret != 1)
return -EIO;

return 0; /* success */
}

/*
* Values for QSFP retry.
*
* Give up after 10s (20 x 500ms). The overall timeout was empirically
* arrived at from experience on a large cluster.
*/
#define MAX_QSFP_RETRIES 20
#define QSFP_RETRY_WAIT 500 /* msec */

/*
* Try a QSFP read. If it fails, schedule a retry for later.
* Called on first link activation after driver load.
*/
static void try_start_link(struct hfi1_pportdata *ppd)
{
if (test_qsfp_read(ppd)) {
/* read failed */
if (ppd->qsfp_retry_count >= MAX_QSFP_RETRIES) {
dd_dev_err(ppd->dd, "QSFP not responding, giving up\n");
return;
}
dd_dev_info(ppd->dd,
"QSFP not responding, waiting and retrying %d\n",
(int)ppd->qsfp_retry_count);
ppd->qsfp_retry_count++;
queue_delayed_work(ppd->hfi1_wq, &ppd->start_link_work,
msecs_to_jiffies(QSFP_RETRY_WAIT));
return;
}
ppd->qsfp_retry_count = 0;

/*
* Tune the SerDes to a ballpark setting for optimal signal and bit
* error rate. Needs to be done before starting the link.
*/
tune_serdes(ppd);
start_link(ppd);
}

/*
* Workqueue function to start the link after a delay.
*/
void handle_start_link(struct work_struct *work)
{
struct hfi1_pportdata *ppd = container_of(work, struct hfi1_pportdata,
start_link_work.work);
try_start_link(ppd);
}

int bringup_serdes(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
Expand Down Expand Up @@ -9525,14 +9597,8 @@ int bringup_serdes(struct hfi1_pportdata *ppd)
set_qsfp_int_n(ppd, 1);
}

/*
* Tune the SerDes to a ballpark setting for
* optimal signal and bit error rate
* Needs to be done before starting the link
*/
tune_serdes(ppd);

return start_link(ppd);
try_start_link(ppd);
return 0;
}

void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
Expand All @@ -9549,6 +9615,10 @@ void hfi1_quiet_serdes(struct hfi1_pportdata *ppd)
ppd->driver_link_ready = 0;
ppd->link_enabled = 0;

ppd->qsfp_retry_count = MAX_QSFP_RETRIES; /* prevent more retries */
flush_delayed_work(&ppd->start_link_work);
cancel_delayed_work_sync(&ppd->start_link_work);

ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_SMA_DISABLED);
set_link_down_reason(ppd, OPA_LINKDOWN_REASON_SMA_DISABLED, 0,
Expand Down Expand Up @@ -12865,7 +12935,7 @@ static int set_up_interrupts(struct hfi1_devdata *dd)
*/
static int set_up_context_variables(struct hfi1_devdata *dd)
{
int num_kernel_contexts;
unsigned long num_kernel_contexts;
int total_contexts;
int ret;
unsigned ngroups;
Expand Down Expand Up @@ -12894,9 +12964,9 @@ static int set_up_context_variables(struct hfi1_devdata *dd)
*/
if (num_kernel_contexts > (dd->chip_send_contexts - num_vls - 1)) {
dd_dev_err(dd,
"Reducing # kernel rcv contexts to: %d, from %d\n",
"Reducing # kernel rcv contexts to: %d, from %lu\n",
(int)(dd->chip_send_contexts - num_vls - 1),
(int)num_kernel_contexts);
num_kernel_contexts);
num_kernel_contexts = dd->chip_send_contexts - num_vls - 1;
}
/*
Expand Down
1 change: 1 addition & 0 deletions drivers/infiniband/hw/hfi1/chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,7 @@ void handle_link_up(struct work_struct *work);
void handle_link_down(struct work_struct *work);
void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
Expand Down
Loading

0 comments on commit d060e0f

Please sign in to comment.