Skip to content

Commit

Permalink
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/gi…
Browse files Browse the repository at this point in the history
…t/rdma/rdma

Pull RDMA fixes from Jason Gunthorpe:
 "Bug fixes for old bugs in the hns and hfi1 drivers:

   - Calculate various values in hns properly to avoid over/underflows
     in some cases

   - Fix an oops, PCI negotiation on Gen4 systems, and bugs related to
     retries"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/hns: Correct the value of srq_desc_size
  RDMA/hns: Correct the value of HNS_ROCE_HEM_CHUNK_LEN
  IB/hfi1: TID RDMA WRITE should not return IB_WC_RNR_RETRY_EXC_ERR
  IB/hfi1: Calculate flow weight based on QP MTU for TID RDMA
  IB/hfi1: Ensure r_tid_ack is valid before building TID RDMA ACK packet
  IB/hfi1: Ensure full Gen3 speed in a Gen4 system
  • Loading branch information
Linus Torvalds committed Nov 14, 2019
2 parents bf92947 + 411c1e6 commit 4e84608
Show file tree
Hide file tree
Showing 7 changed files with 46 additions and 39 deletions.
1 change: 0 additions & 1 deletion drivers/infiniband/hw/hfi1/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -1489,7 +1489,6 @@ static int __init hfi1_mod_init(void)
goto bail_dev;
}

hfi1_compute_tid_rdma_flow_wt();
/*
* These must be called before the driver is registered with
* the PCI subsystem.
Expand Down
4 changes: 3 additions & 1 deletion drivers/infiniband/hw/hfi1/pcie.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,9 @@ int pcie_speeds(struct hfi1_devdata *dd)
/*
* bus->max_bus_speed is set from the bridge's linkcap Max Link Speed
*/
if (parent && dd->pcidev->bus->max_bus_speed != PCIE_SPEED_8_0GT) {
if (parent &&
(dd->pcidev->bus->max_bus_speed == PCIE_SPEED_2_5GT ||
dd->pcidev->bus->max_bus_speed == PCIE_SPEED_5_0GT)) {
dd_dev_info(dd, "Parent PCIe bridge does not support Gen3\n");
dd->link_gen3_capable = 0;
}
Expand Down
16 changes: 8 additions & 8 deletions drivers/infiniband/hw/hfi1/rc.c
Original file line number Diff line number Diff line change
Expand Up @@ -2209,15 +2209,15 @@ int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
if (qp->s_flags & RVT_S_WAIT_RNR)
goto bail_stop;
rdi = ib_to_rvt(qp->ibqp.device);
if (qp->s_rnr_retry == 0 &&
!((rdi->post_parms[wqe->wr.opcode].flags &
RVT_OPERATION_IGN_RNR_CNT) &&
qp->s_rnr_retry_cnt == 0)) {
status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
if (!(rdi->post_parms[wqe->wr.opcode].flags &
RVT_OPERATION_IGN_RNR_CNT)) {
if (qp->s_rnr_retry == 0) {
status = IB_WC_RNR_RETRY_EXC_ERR;
goto class_b;
}
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
qp->s_rnr_retry--;
}
if (qp->s_rnr_retry_cnt < 7 && qp->s_rnr_retry_cnt > 0)
qp->s_rnr_retry--;

/*
* The last valid PSN is the previous PSN. For TID RDMA WRITE
Expand Down
57 changes: 32 additions & 25 deletions drivers/infiniband/hw/hfi1/tid_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ static u32 mask_generation(u32 a)
* C - Capcode
*/

static u32 tid_rdma_flow_wt;

static void tid_rdma_trigger_resume(struct work_struct *work);
static void hfi1_kern_exp_rcv_free_flows(struct tid_rdma_request *req);
static int hfi1_kern_exp_rcv_alloc_flows(struct tid_rdma_request *req,
Expand Down Expand Up @@ -136,6 +134,26 @@ static void update_r_next_psn_fecn(struct hfi1_packet *packet,
struct tid_rdma_flow *flow,
bool fecn);

static void validate_r_tid_ack(struct hfi1_qp_priv *priv)
{
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
priv->r_tid_ack = priv->r_tid_tail;
}

static void tid_rdma_schedule_ack(struct rvt_qp *qp)
{
struct hfi1_qp_priv *priv = qp->priv;

priv->s_flags |= RVT_S_ACK_PENDING;
hfi1_schedule_tid_send(qp);
}

static void tid_rdma_trigger_ack(struct rvt_qp *qp)
{
validate_r_tid_ack(qp->priv);
tid_rdma_schedule_ack(qp);
}

static u64 tid_rdma_opfn_encode(struct tid_rdma_params *p)
{
return
Expand Down Expand Up @@ -3005,10 +3023,7 @@ bool hfi1_handle_kdeth_eflags(struct hfi1_ctxtdata *rcd,
qpriv->s_nak_state = IB_NAK_PSN_ERROR;
/* We are NAK'ing the next expected PSN */
qpriv->s_nak_psn = mask_psn(flow->flow_state.r_next_psn);
qpriv->s_flags |= RVT_S_ACK_PENDING;
if (qpriv->r_tid_ack == HFI1_QP_WQE_INVALID)
qpriv->r_tid_ack = qpriv->r_tid_tail;
hfi1_schedule_tid_send(qp);
tid_rdma_trigger_ack(qp);
}
goto unlock;
}
Expand Down Expand Up @@ -3371,18 +3386,17 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
return sizeof(ohdr->u.tid_rdma.w_req) / sizeof(u32);
}

void hfi1_compute_tid_rdma_flow_wt(void)
static u32 hfi1_compute_tid_rdma_flow_wt(struct rvt_qp *qp)
{
/*
* Heuristic for computing the RNR timeout when waiting on the flow
* queue. Rather than a computationaly expensive exact estimate of when
* a flow will be available, we assume that if a QP is at position N in
* the flow queue it has to wait approximately (N + 1) * (number of
* segments between two sync points), assuming PMTU of 4K. The rationale
* for this is that flows are released and recycled at each sync point.
* segments between two sync points). The rationale for this is that
* flows are released and recycled at each sync point.
*/
tid_rdma_flow_wt = MAX_TID_FLOW_PSN * enum_to_mtu(OPA_MTU_4096) /
TID_RDMA_MAX_SEGMENT_SIZE;
return (MAX_TID_FLOW_PSN * qp->pmtu) >> TID_RDMA_SEGMENT_SHIFT;
}

static u32 position_in_queue(struct hfi1_qp_priv *qpriv,
Expand Down Expand Up @@ -3505,7 +3519,7 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
if (qpriv->flow_state.index >= RXE_NUM_TID_FLOWS) {
ret = hfi1_kern_setup_hw_flow(qpriv->rcd, qp);
if (ret) {
to_seg = tid_rdma_flow_wt *
to_seg = hfi1_compute_tid_rdma_flow_wt(qp) *
position_in_queue(qpriv,
&rcd->flow_queue);
break;
Expand All @@ -3526,16 +3540,15 @@ static void hfi1_tid_write_alloc_resources(struct rvt_qp *qp, bool intr_ctx)
/*
* If overtaking req->acked_tail, send an RNR NAK. Because the
* QP is not queued in this case, and the issue can only be
* caused due a delay in scheduling the second leg which we
* caused by a delay in scheduling the second leg which we
* cannot estimate, we use a rather arbitrary RNR timeout of
* (MAX_FLOWS / 2) segments
*/
if (!CIRC_SPACE(req->setup_head, req->acked_tail,
MAX_FLOWS)) {
ret = -EAGAIN;
to_seg = MAX_FLOWS >> 1;
qpriv->s_flags |= RVT_S_ACK_PENDING;
hfi1_schedule_tid_send(qp);
tid_rdma_trigger_ack(qp);
break;
}

Expand Down Expand Up @@ -4335,8 +4348,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
trace_hfi1_tid_req_rcv_write_data(qp, 0, e->opcode, e->psn, e->lpsn,
req);
trace_hfi1_tid_write_rsp_rcv_data(qp);
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
priv->r_tid_ack = priv->r_tid_tail;
validate_r_tid_ack(priv);

if (opcode == TID_OP(WRITE_DATA_LAST)) {
release_rdma_sge_mr(e);
Expand Down Expand Up @@ -4375,8 +4387,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
}

done:
priv->s_flags |= RVT_S_ACK_PENDING;
hfi1_schedule_tid_send(qp);
tid_rdma_schedule_ack(qp);
exit:
priv->r_next_psn_kdeth = flow->flow_state.r_next_psn;
if (fecn)
Expand All @@ -4388,10 +4399,7 @@ void hfi1_rc_rcv_tid_rdma_write_data(struct hfi1_packet *packet)
if (!priv->s_nak_state) {
priv->s_nak_state = IB_NAK_PSN_ERROR;
priv->s_nak_psn = flow->flow_state.r_next_psn;
priv->s_flags |= RVT_S_ACK_PENDING;
if (priv->r_tid_ack == HFI1_QP_WQE_INVALID)
priv->r_tid_ack = priv->r_tid_tail;
hfi1_schedule_tid_send(qp);
tid_rdma_trigger_ack(qp);
}
goto done;
}
Expand Down Expand Up @@ -4939,8 +4947,7 @@ void hfi1_rc_rcv_tid_rdma_resync(struct hfi1_packet *packet)
qpriv->resync = true;
/* RESYNC request always gets a TID RDMA ACK. */
qpriv->s_nak_state = 0;
qpriv->s_flags |= RVT_S_ACK_PENDING;
hfi1_schedule_tid_send(qp);
tid_rdma_trigger_ack(qp);
bail:
if (fecn)
qp->s_flags |= RVT_S_ECN;
Expand Down
3 changes: 1 addition & 2 deletions drivers/infiniband/hw/hfi1/tid_rdma.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#define TID_RDMA_MIN_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_SEGMENT_SIZE BIT(18) /* 256 KiB (for now) */
#define TID_RDMA_MAX_PAGES (BIT(18) >> PAGE_SHIFT)
#define TID_RDMA_SEGMENT_SHIFT 18

/*
* Bit definitions for priv->s_flags.
Expand Down Expand Up @@ -274,8 +275,6 @@ u32 hfi1_build_tid_rdma_write_req(struct rvt_qp *qp, struct rvt_swqe *wqe,
struct ib_other_headers *ohdr,
u32 *bth1, u32 *bth2, u32 *len);

void hfi1_compute_tid_rdma_flow_wt(void);

void hfi1_rc_rcv_tid_rdma_write_req(struct hfi1_packet *packet);

u32 hfi1_build_tid_rdma_write_resp(struct rvt_qp *qp, struct rvt_ack_entry *e,
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/hns/hns_roce_hem.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ enum {

#define HNS_ROCE_HEM_CHUNK_LEN \
((256 - sizeof(struct list_head) - 2 * sizeof(int)) / \
(sizeof(struct scatterlist)))
(sizeof(struct scatterlist) + sizeof(void *)))

#define check_whether_bt_num_3(type, hop_num) \
(type < HEM_TYPE_MTT && hop_num == 2)
Expand Down
2 changes: 1 addition & 1 deletion drivers/infiniband/hw/hns/hns_roce_srq.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq,
srq->max = roundup_pow_of_two(srq_init_attr->attr.max_wr + 1);
srq->max_gs = srq_init_attr->attr.max_sge;

srq_desc_size = max(16, 16 * srq->max_gs);
srq_desc_size = roundup_pow_of_two(max(16, 16 * srq->max_gs));

srq->wqe_shift = ilog2(srq_desc_size);

Expand Down

0 comments on commit 4e84608

Please sign in to comment.