Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/dledford/rdma

Pull rdma fixes from Doug Ledford:
 "Second -rc update for 4.14.

  Both Mellanox and Intel had a series of -rc fixes that landed this
  week. The Mellanox bunch is spread throughout the stack and not just
  in their driver, where as the Intel bunch was mostly in the hfi1
  driver. And, several of the fixes in the hfi1 driver were more than
  just simple 5 line fixes. As a result, the hfi1 driver fixes has a
  sizable LOC count.

  Everything else is as one would expect in an RC cycle in terms of LOC
  count. One item that might jump out and make you think "That's not an
  rc item" is the fix that corrects a typo. But, that change fixes a
  typo in a user visible API that was just added in this merge window,
  so if we fix it now, we can fix it. If we don't, the typo is in the
  API forever. Another that might not appear to be a fix at first glance
  is the Simplify mlx5_ib_cont_pages patch, but the simplification
  allows them to fix a bug in the existing function whenever the length
  of an SGE exceeded page size. We also had to revert one patch from the
  merge window that was wrong.

  Summary:

   - a few core fixes
   - a few ipoib fixes
   - a few mlx5 fixes
   - a 7-patch hfi1 related series"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma:
  IB/hfi1: Unsuccessful PCIe caps tuning should not fail driver load
  IB/hfi1: On error, fix use after free during user context setup
  Revert "IB/ipoib: Update broadcast object if PKey value was changed in index 0"
  IB/hfi1: Return correct value in general interrupt handler
  IB/hfi1: Check eeprom config partition validity
  IB/hfi1: Only reset QSFP after link up and turn off AOC TX
  IB/hfi1: Turn off AOC TX after offline substates
  IB/mlx5: Fix NULL deference on mlx5_ib_update_xlt failure
  IB/mlx5: Simplify mlx5_ib_cont_pages
  IB/ipoib: Fix inconsistency with free_netdev and free_rdma_netdev
  IB/ipoib: Fix sysfs Pkey create<->remove possible deadlock
  IB: Correct MR length field to be 64-bit
  IB/core: Fix qp_sec use after free access
  IB/core: Fix typo in the name of the tag-matching cap struct
  • Loading branch information
Linus Torvalds committed Sep 28, 2017
2 parents 26e811c + 828bcbd commit 9173583
Show file tree
Hide file tree
Showing 19 changed files with 231 additions and 164 deletions.
4 changes: 3 additions & 1 deletion drivers/infiniband/core/security.c
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,10 @@ int ib_create_qp_security(struct ib_qp *qp, struct ib_device *dev)
atomic_set(&qp->qp_sec->error_list_count, 0);
init_completion(&qp->qp_sec->error_complete);
ret = security_ib_alloc_security(&qp->qp_sec->security);
if (ret)
if (ret) {
kfree(qp->qp_sec);
qp->qp_sec = NULL;
}

return ret;
}
Expand Down
14 changes: 7 additions & 7 deletions drivers/infiniband/core/uverbs_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -3869,15 +3869,15 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
resp.raw_packet_caps = attr.raw_packet_caps;
resp.response_length += sizeof(resp.raw_packet_caps);

if (ucore->outlen < resp.response_length + sizeof(resp.xrq_caps))
if (ucore->outlen < resp.response_length + sizeof(resp.tm_caps))
goto end;

resp.xrq_caps.max_rndv_hdr_size = attr.xrq_caps.max_rndv_hdr_size;
resp.xrq_caps.max_num_tags = attr.xrq_caps.max_num_tags;
resp.xrq_caps.max_ops = attr.xrq_caps.max_ops;
resp.xrq_caps.max_sge = attr.xrq_caps.max_sge;
resp.xrq_caps.flags = attr.xrq_caps.flags;
resp.response_length += sizeof(resp.xrq_caps);
resp.tm_caps.max_rndv_hdr_size = attr.tm_caps.max_rndv_hdr_size;
resp.tm_caps.max_num_tags = attr.tm_caps.max_num_tags;
resp.tm_caps.max_ops = attr.tm_caps.max_ops;
resp.tm_caps.max_sge = attr.tm_caps.max_sge;
resp.tm_caps.flags = attr.tm_caps.flags;
resp.response_length += sizeof(resp.tm_caps);
end:
err = ib_copy_to_udata(ucore, &resp, resp.response_length);
return err;
Expand Down
101 changes: 78 additions & 23 deletions drivers/infiniband/hw/hfi1/chip.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,8 @@ static int read_idle_sma(struct hfi1_devdata *dd, u64 *data);
static int thermal_init(struct hfi1_devdata *dd);

static void update_statusp(struct hfi1_pportdata *ppd, u32 state);
static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
int msecs);
static int wait_logical_linkstate(struct hfi1_pportdata *ppd, u32 state,
int msecs);
static void log_state_transition(struct hfi1_pportdata *ppd, u32 state);
Expand Down Expand Up @@ -8238,6 +8240,7 @@ static irqreturn_t general_interrupt(int irq, void *data)
u64 regs[CCE_NUM_INT_CSRS];
u32 bit;
int i;
irqreturn_t handled = IRQ_NONE;

this_cpu_inc(*dd->int_counter);

Expand All @@ -8258,9 +8261,10 @@ static irqreturn_t general_interrupt(int irq, void *data)
for_each_set_bit(bit, (unsigned long *)&regs[0],
CCE_NUM_INT_CSRS * 64) {
is_interrupt(dd, bit);
handled = IRQ_HANDLED;
}

return IRQ_HANDLED;
return handled;
}

static irqreturn_t sdma_interrupt(int irq, void *data)
Expand Down Expand Up @@ -9413,7 +9417,7 @@ static void set_qsfp_int_n(struct hfi1_pportdata *ppd, u8 enable)
write_csr(dd, dd->hfi1_id ? ASIC_QSFP2_MASK : ASIC_QSFP1_MASK, mask);
}

void reset_qsfp(struct hfi1_pportdata *ppd)
int reset_qsfp(struct hfi1_pportdata *ppd)
{
struct hfi1_devdata *dd = ppd->dd;
u64 mask, qsfp_mask;
Expand Down Expand Up @@ -9443,6 +9447,13 @@ void reset_qsfp(struct hfi1_pportdata *ppd)
* for alarms and warnings
*/
set_qsfp_int_n(ppd, 1);

/*
* After the reset, AOC transmitters are enabled by default. They need
* to be turned off to complete the QSFP setup before they can be
* enabled again.
*/
return set_qsfp_tx(ppd, 0);
}

static int handle_qsfp_error_conditions(struct hfi1_pportdata *ppd,
Expand Down Expand Up @@ -10305,6 +10316,7 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
{
struct hfi1_devdata *dd = ppd->dd;
u32 previous_state;
int offline_state_ret;
int ret;

update_lcb_cache(dd);
Expand All @@ -10326,28 +10338,11 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
ppd->offline_disabled_reason =
HFI1_ODR_MASK(OPA_LINKDOWN_REASON_TRANSIENT);

/*
* Wait for offline transition. It can take a while for
* the link to go down.
*/
ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 10000);
if (ret < 0)
return ret;

/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */

/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);

ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */
offline_state_ret = wait_phys_link_offline_substates(ppd, 10000);
if (offline_state_ret < 0)
return offline_state_ret;

/* Disabling AOC transmitters */
if (ppd->port_type == PORT_TYPE_QSFP &&
ppd->qsfp_info.limiting_active &&
qsfp_mod_present(ppd)) {
Expand All @@ -10364,6 +10359,30 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
}
}

/*
* Wait for the offline.Quiet transition if it hasn't happened yet. It
* can take a while for the link to go down.
*/
if (offline_state_ret != PLS_OFFLINE_QUIET) {
ret = wait_physical_linkstate(ppd, PLS_OFFLINE, 30000);
if (ret < 0)
return ret;
}

/*
* Now in charge of LCB - must be after the physical state is
* offline.quiet and before host_link_state is changed.
*/
set_host_lcb_access(dd);
write_csr(dd, DC_LCB_ERR_EN, ~0ull); /* watch LCB errors */

/* make sure the logical state is also down */
ret = wait_logical_linkstate(ppd, IB_PORT_DOWN, 1000);
if (ret)
force_logical_link_state_down(ppd);

ppd->host_link_state = HLS_LINK_COOLDOWN; /* LCB access allowed */

/*
* The LNI has a mandatory wait time after the physical state
* moves to Offline.Quiet. The wait time may be different
Expand Down Expand Up @@ -10396,6 +10415,9 @@ static int goto_offline(struct hfi1_pportdata *ppd, u8 rem_reason)
& (HLS_DN_POLL | HLS_VERIFY_CAP | HLS_GOING_UP)) {
/* went down while attempting link up */
check_lni_states(ppd);

/* The QSFP doesn't need to be reset on LNI failure */
ppd->qsfp_info.reset_needed = 0;
}

/* the active link width (downgrade) is 0 on link down */
Expand Down Expand Up @@ -12804,6 +12826,39 @@ static int wait_physical_linkstate(struct hfi1_pportdata *ppd, u32 state,
return 0;
}

/*
* wait_phys_link_offline_quiet_substates - wait for any offline substate
* @ppd: port device
* @msecs: the number of milliseconds to wait
*
* Wait up to msecs milliseconds for any offline physical link
* state change to occur.
* Returns 0 if at least one state is reached, otherwise -ETIMEDOUT.
*/
static int wait_phys_link_offline_substates(struct hfi1_pportdata *ppd,
int msecs)
{
u32 read_state;
unsigned long timeout;

timeout = jiffies + msecs_to_jiffies(msecs);
while (1) {
read_state = read_physical_state(ppd->dd);
if ((read_state & 0xF0) == PLS_OFFLINE)
break;
if (time_after(jiffies, timeout)) {
dd_dev_err(ppd->dd,
"timeout waiting for phy link offline.quiet substates. Read state 0x%x, %dms\n",
read_state, msecs);
return -ETIMEDOUT;
}
usleep_range(1950, 2050); /* sleep 2ms-ish */
}

log_state_transition(ppd, read_state);
return read_state;
}

#define CLEAR_STATIC_RATE_CONTROL_SMASK(r) \
(r &= ~SEND_CTXT_CHECK_ENABLE_DISALLOW_PBC_STATIC_RATE_CONTROL_SMASK)

Expand Down
3 changes: 2 additions & 1 deletion drivers/infiniband/hw/hfi1/chip.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@
#define PLS_OFFLINE_READY_TO_QUIET_LT 0x92
#define PLS_OFFLINE_REPORT_FAILURE 0x93
#define PLS_OFFLINE_READY_TO_QUIET_BCC 0x94
#define PLS_OFFLINE_QUIET_DURATION 0x95
#define PLS_POLLING 0x20
#define PLS_POLLING_QUIET 0x20
#define PLS_POLLING_ACTIVE 0x21
Expand Down Expand Up @@ -722,7 +723,7 @@ void handle_link_downgrade(struct work_struct *work);
void handle_link_bounce(struct work_struct *work);
void handle_start_link(struct work_struct *work);
void handle_sma_message(struct work_struct *work);
void reset_qsfp(struct hfi1_pportdata *ppd);
int reset_qsfp(struct hfi1_pportdata *ppd);
void qsfp_event(struct work_struct *work);
void start_freeze_handling(struct hfi1_pportdata *ppd, int flags);
int send_idle_sma(struct hfi1_devdata *dd, u64 message);
Expand Down
20 changes: 15 additions & 5 deletions drivers/infiniband/hw/hfi1/eprom.c
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,10 @@ int eprom_init(struct hfi1_devdata *dd)
return ret;
}

/* magic character sequence that trails an image */
/* magic character sequence that begins an image */
#define IMAGE_START_MAGIC "APO="

/* magic character sequence that might trail an image */
#define IMAGE_TRAIL_MAGIC "egamiAPO"

/* EPROM file types */
Expand Down Expand Up @@ -250,6 +253,7 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
{
void *buffer;
void *p;
u32 length;
int ret;

buffer = kmalloc(P1_SIZE, GFP_KERNEL);
Expand All @@ -262,15 +266,21 @@ static int read_partition_platform_config(struct hfi1_devdata *dd, void **data,
return ret;
}

/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
if (!p) {
/* config partition is valid only if it starts with IMAGE_START_MAGIC */
if (memcmp(buffer, IMAGE_START_MAGIC, strlen(IMAGE_START_MAGIC))) {
kfree(buffer);
return -ENOENT;
}

/* scan for image magic that may trail the actual data */
p = strnstr(buffer, IMAGE_TRAIL_MAGIC, P1_SIZE);
if (p)
length = p - buffer;
else
length = P1_SIZE;

*data = buffer;
*size = p - buffer;
*size = length;
return 0;
}

Expand Down
41 changes: 22 additions & 19 deletions drivers/infiniband/hw/hfi1/file_ops.c
Original file line number Diff line number Diff line change
Expand Up @@ -930,15 +930,8 @@ static int assign_ctxt(struct hfi1_filedata *fd, struct hfi1_user_info *uinfo)
switch (ret) {
case 0:
ret = setup_base_ctxt(fd, uctxt);
if (uctxt->subctxt_cnt) {
/*
* Base context is done (successfully or not), notify
* anybody using a sub-context that is waiting for
* this completion.
*/
clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}
if (ret)
deallocate_ctxt(uctxt);
break;
case 1:
ret = complete_subctxt(fd);
Expand Down Expand Up @@ -1305,38 +1298,48 @@ static int setup_base_ctxt(struct hfi1_filedata *fd,
/* Now allocate the RcvHdr queue and eager buffers. */
ret = hfi1_create_rcvhdrq(dd, uctxt);
if (ret)
return ret;
goto done;

ret = hfi1_setup_eagerbufs(uctxt);
if (ret)
goto setup_failed;
goto done;

/* If sub-contexts are enabled, do the appropriate setup */
if (uctxt->subctxt_cnt)
ret = setup_subctxt(uctxt);
if (ret)
goto setup_failed;
goto done;

ret = hfi1_alloc_ctxt_rcv_groups(uctxt);
if (ret)
goto setup_failed;
goto done;

ret = init_user_ctxt(fd, uctxt);
if (ret)
goto setup_failed;
goto done;

user_init(uctxt);

/* Now that the context is set up, the fd can get a reference. */
fd->uctxt = uctxt;
hfi1_rcd_get(uctxt);

return 0;
done:
if (uctxt->subctxt_cnt) {
/*
* On error, set the failed bit so sub-contexts will clean up
* correctly.
*/
if (ret)
set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);

setup_failed:
/* Set the failed bit so sub-context init can do the right thing */
set_bit(HFI1_CTXT_BASE_FAILED, &uctxt->event_flags);
deallocate_ctxt(uctxt);
/*
* Base context is done (successfully or not), notify anybody
* using a sub-context that is waiting for this completion.
*/
clear_bit(HFI1_CTXT_BASE_UNINIT, &uctxt->event_flags);
wake_up(&uctxt->wait);
}

return ret;
}
Expand Down
Loading

0 comments on commit 9173583

Please sign in to comment.