From e4221314a593b00e035f70efbef52021f9a3a5fc Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 3 Nov 2011 17:48:25 -0700 Subject: [PATCH 1/4] IB/mthca: Fix buddy->num_free allocation size The num_free field of mthca_buddy has a type of array of unsigned int while it was allocated as an array of pointers. On 64-bit platforms this allocates twice more than required. Fix this by allocating the correct size for the type. This is the same bug just fixed in mlx4 by Eli Cohen . Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mthca/mthca_mr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mthca/mthca_mr.c b/drivers/infiniband/hw/mthca/mthca_mr.c index ab876f928a1bc..ed9a989e501b4 100644 --- a/drivers/infiniband/hw/mthca/mthca_mr.c +++ b/drivers/infiniband/hw/mthca/mthca_mr.c @@ -146,7 +146,7 @@ static int mthca_buddy_init(struct mthca_buddy *buddy, int max_order) buddy->bits = kzalloc((buddy->max_order + 1) * sizeof (long *), GFP_KERNEL); - buddy->num_free = kzalloc((buddy->max_order + 1) * sizeof (int *), + buddy->num_free = kcalloc((buddy->max_order + 1), sizeof *buddy->num_free, GFP_KERNEL); if (!buddy->bits || !buddy->num_free) goto err_out; From 2c4ce609347f2a45792c8d9ebb5af11217766cb6 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2011 00:19:46 +0200 Subject: [PATCH 2/4] IB/iser: Use separate buffers for the login request/response The driver counted on the transactional nature of iSCSI login/text flows and used the same buffer for both the request and the response. We also went further and did DMA mapping only once, with DMA_FROM_DEVICE, which violates the DMA mapping API. Fix that by using different buffers, one for requests and one for responses, and use the correct DMA mapping direction for each. Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.h | 3 +- drivers/infiniband/ulp/iser/iser_initiator.c | 31 +++++++++++++----- drivers/infiniband/ulp/iser/iser_verbs.c | 33 +++++++++++++++----- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index db6f3ce9f3bf5..2982a14a0557c 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -257,7 +257,8 @@ struct iser_conn { struct list_head conn_list; /* entry in ig conn list */ char *login_buf; - u64 login_dma; + char *login_req_buf, *login_resp_buf; + u64 login_req_dma, login_resp_dma; unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX]; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index f299de6b419bb..a607542fc7961 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -221,8 +221,14 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn) struct iser_device *device = ib_conn->device; if (ib_conn->login_buf) { - ib_dma_unmap_single(device->ib_device, ib_conn->login_dma, - ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + if (ib_conn->login_req_dma) + ib_dma_unmap_single(device->ib_device, + ib_conn->login_req_dma, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + if (ib_conn->login_resp_dma) + ib_dma_unmap_single(device->ib_device, + ib_conn->login_resp_dma, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); kfree(ib_conn->login_buf); } @@ -394,6 +400,7 @@ int iser_send_control(struct iscsi_conn *conn, unsigned long data_seg_len; int err = 0; struct iser_device *device; + struct iser_conn *ib_conn = iser_conn->ib_conn; /* build the tx desc regd header and add it to the tx desc dto */ mdesc->type = ISCSI_TX_CONTROL; @@ -409,9 +416,19 @@ int iser_send_control(struct iscsi_conn *conn, iser_err("data present on non login task!!!\n"); goto send_control_error; } - memcpy(iser_conn->ib_conn->login_buf, task->data, + + ib_dma_sync_single_for_cpu(device->ib_device, + ib_conn->login_req_dma, task->data_count, + DMA_TO_DEVICE); + + memcpy(iser_conn->ib_conn->login_req_buf, task->data, task->data_count); - tx_dsg->addr = iser_conn->ib_conn->login_dma; + + ib_dma_sync_single_for_device(device->ib_device, + ib_conn->login_req_dma, task->data_count, + DMA_TO_DEVICE); + + tx_dsg->addr = iser_conn->ib_conn->login_req_dma; tx_dsg->length = task->data_count; tx_dsg->lkey = device->mr->lkey; mdesc->num_sge = 2; @@ -445,8 +462,8 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, int rx_buflen, outstanding, count, err; /* differentiate between login to all other PDUs */ - if ((char *)rx_desc == ib_conn->login_buf) { - rx_dma = ib_conn->login_dma; + if ((char *)rx_desc == ib_conn->login_resp_buf) { + rx_dma = ib_conn->login_resp_dma; rx_buflen = ISER_RX_LOGIN_SIZE; } else { rx_dma = rx_desc->dma_addr; @@ -473,7 +490,7 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc, * for the posted rx bufs refcount to become zero handles everything */ conn->ib_conn->post_recv_buf_count--; - if (rx_dma == ib_conn->login_dma) + if (rx_dma == ib_conn->login_resp_dma) return; outstanding = ib_conn->post_recv_buf_count; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index ede1475bee09c..e28877c4ce159 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -155,20 +155,39 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) { struct iser_device *device; struct ib_qp_init_attr init_attr; - int ret = -ENOMEM; + int req_err, resp_err, ret = -ENOMEM; struct ib_fmr_pool_param params; BUG_ON(ib_conn->device == NULL); device = ib_conn->device; - ib_conn->login_buf = kmalloc(ISER_RX_LOGIN_SIZE, GFP_KERNEL); + ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN + + ISER_RX_LOGIN_SIZE, GFP_KERNEL); if (!ib_conn->login_buf) goto out_err; - ib_conn->login_dma = ib_dma_map_single(ib_conn->device->ib_device, - (void *)ib_conn->login_buf, ISER_RX_LOGIN_SIZE, - DMA_FROM_DEVICE); + ib_conn->login_req_buf = ib_conn->login_buf; + ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN; + + ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_req_buf, + ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE); + + ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device, + (void *)ib_conn->login_resp_buf, + ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE); + + req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma); + resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma); + + if (req_err || resp_err) { + if (req_err) + ib_conn->login_req_dma = 0; + if (resp_err) + ib_conn->login_resp_dma = 0; + goto out_err; + } ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)), @@ -658,11 +677,11 @@ int iser_post_recvl(struct iser_conn *ib_conn) struct ib_sge sge; int ib_ret; - sge.addr = ib_conn->login_dma; + sge.addr = ib_conn->login_resp_dma; sge.length = ISER_RX_LOGIN_SIZE; sge.lkey = ib_conn->device->mr->lkey; - rx_wr.wr_id = (unsigned long)ib_conn->login_buf; + rx_wr.wr_id = (unsigned long)ib_conn->login_resp_buf; rx_wr.sg_list = &sge; rx_wr.num_sge = 1; rx_wr.next = NULL; From 52439540ea30396982b69662dd21aede6b336288 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Fri, 4 Nov 2011 00:21:27 +0200 Subject: [PATCH 3/4] IB/iser: DMA unmap TX bufs used for iSCSI/iSER headers The current driver never does DMA unmapping on these buffers. Fix that by adding DMA unmapping to the task cleanup callback, and DMA mapping to the task init function (drop the headers_initialized micro-optimization). Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/ulp/iser/iscsi_iser.c | 11 ++++++++--- drivers/infiniband/ulp/iser/iscsi_iser.h | 1 - 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index 84e8c293a7155..c42b8f390c0b9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -151,7 +151,6 @@ int iser_initialize_task_headers(struct iscsi_task *task, tx_desc->tx_sg[0].length = ISER_HEADERS_LEN; tx_desc->tx_sg[0].lkey = device->mr->lkey; - iser_task->headers_initialized = 1; iser_task->iser_conn = iser_conn; return 0; } @@ -166,8 +165,7 @@ iscsi_iser_task_init(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; - if (!iser_task->headers_initialized) - if (iser_initialize_task_headers(task, &iser_task->desc)) + if (iser_initialize_task_headers(task, &iser_task->desc)) return -ENOMEM; /* mgmt task */ @@ -278,6 +276,13 @@ iscsi_iser_task_xmit(struct iscsi_task *task) static void iscsi_iser_cleanup_task(struct iscsi_task *task) { struct iscsi_iser_task *iser_task = task->dd_data; + struct iser_tx_desc *tx_desc = &iser_task->desc; + + struct iscsi_iser_conn *iser_conn = task->conn->dd_data; + struct iser_device *device = iser_conn->ib_conn->device; + + ib_dma_unmap_single(device->ib_device, + tx_desc->dma_addr, ISER_HEADERS_LEN, DMA_TO_DEVICE); /* mgmt tasks do not need special cleanup */ if (!task->sc) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 2982a14a0557c..db7ea3704da75 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -278,7 +278,6 @@ struct iscsi_iser_task { struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ - int headers_initialized; }; struct iser_page_vec { From 30ab7e230b996f750d4fc24b6bf8214e83effa12 Mon Sep 17 00:00:00 2001 From: Mike Marciniszyn Date: Fri, 4 Nov 2011 08:26:52 -0400 Subject: [PATCH 4/4] IB/qib: Fix panic in RC error flushing logic The following panic can occur when flushing a QP: RIP: 0010:[] [] qib_send_complete+0x3b/0x190 [ib_qib] RSP: 0018:ffff8803cdc6fc90 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff8803d84ba000 RCX: 0000000000000000 RDX: 0000000000000005 RSI: ffffc90015a53430 RDI: ffff8803d84ba000 RBP: ffff8803cdc6fce0 R08: ffff8803cdc6fc90 R09: 0000000000000001 R10: 00000000ffffffff R11: 0000000000000000 R12: ffff8803d84ba0c0 R13: ffff8803d84ba5cc R14: 0000000000000800 R15: 0000000000000246 FS: 0000000000000000(0000) GS:ffff880036600000(0000) knlGS:0000000000000000 CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b CR2: 0000000000000034 CR3: 00000003e44f9000 CR4: 00000000000406f0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process qib/0 (pid: 1350, threadinfo ffff8803cdc6e000, task ffff88042728a100) Stack: 53544c5553455201 0000000100000005 0000000000000000 ffff8803d84ba000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000001 ffff8803cdc6fd30 ffffffffa0165d7a Call Trace: [] qib_make_rc_req+0x36a/0xe80 [ib_qib] [] ? qib_make_rc_req+0x0/0xe80 [ib_qib] [] qib_do_send+0xf3/0xb60 [ib_qib] [] ? thread_return+0x4e/0x777 [] ? qib_do_send+0x0/0xb60 [ib_qib] [] worker_thread+0x170/0x2a0 [] ? autoremove_wake_function+0x0/0x40 [] ? worker_thread+0x0/0x2a0 [] kthread+0x96/0xa0 [] child_rip+0xa/0x20 [] ? kthread+0x0/0xa0 [] ? child_rip+0x0/0x20 RIP [] qib_send_complete+0x3b/0x190 [ib_qib] The RC error state flush logic in qib_make_rc_req() could return all of the acked wqes and potentially have emptied the queue. It would then unconditionally try return a flush completion via qib_send_complete() for an invalid wqe, or worse a valid one that is not queued. The panic results when the completion code tries to maintain an MR reference count for a NULL MR. This fix modifies logic to only send one completion per qib_make_rc_req() call and changing the completion status from IB_WC_SUCCESS to IB_WC_WR_FLUSH_ERR as the completions progress. The outer loop will call as many times as necessary to flush the queue. Reviewed-by: Ram Vepa Signed-off-by: Mike Marciniszyn Signed-off-by: Roland Dreier --- drivers/infiniband/hw/qib/qib_rc.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c index afaf4ac79f42d..894afac26f3b4 100644 --- a/drivers/infiniband/hw/qib/qib_rc.c +++ b/drivers/infiniband/hw/qib/qib_rc.c @@ -271,13 +271,9 @@ int qib_make_rc_req(struct qib_qp *qp) goto bail; } wqe = get_swqe_ptr(qp, qp->s_last); - while (qp->s_last != qp->s_acked) { - qib_send_complete(qp, wqe, IB_WC_SUCCESS); - if (++qp->s_last >= qp->s_size) - qp->s_last = 0; - wqe = get_swqe_ptr(qp, qp->s_last); - } - qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR); + qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ? + IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR); + /* will get called again */ goto done; }