Skip to content

Commit

Permalink
[SCSI] libiscsi: Reduce locking contention in fast path
Browse files Browse the repository at this point in the history
Replace the session lock with two locks, a forward lock and
a backwards lock named frwd_lock and back_lock respectively.

The forward lock protects resources that change while sending a
request to the target, such as cmdsn, queued_cmdsn, and allocating
task from the commands' pool with kfifo_out.

The backward lock protects resources that change while processing
a response or in error path, such as cmdsn_exp, cmdsn_max, and
returning tasks to the commands' pool with kfifo_in.

Under a steady state fast-path situation, that is when one
or more processes/threads submit IO to an iscsi device and
a single kernel upcall (e.g softirq) is dealing with processing
of responses without errors, this patch eliminates the contention
between the queuecommand()/request response/scsi_done() flows
associated with iscsi sessions.

Between the forward and the backward locks exists a strict locking
hierarchy. The mutual exclusion zone protected by the forward lock can
enclose the mutual exclusion zone protected by the backward lock but not
vice versa.

For example, in iscsi_conn_teardown or in iscsi_xmit_data when there is
a failure and __iscsi_put_task is called, the backward lock is taken while
the forward lock is still taken. On the other hand, if in the RX path a nop
is to be sent, for example in iscsi_handle_reject or __iscsi_complete_pdu
than the forward lock is released and the backward lock is taken for the
duration of iscsi_send_nopout, later the backward lock is released and the
forward lock is retaken.

libiscsi_tcp uses two kernel fifos the r2t pool and the r2t queue.

The insertion and deletion from these queues didn't corespond to the
assumption taken by the new forward/backwards session locking paradigm.

That is, in iscsi_tcp_clenup_task which belongs to the RX (backwards)
path, r2t is taken out from r2t queue and inserted to the r2t pool.
In iscsi_tcp_get_curr_r2t which belong to the TX (forward) path, r2t
is also inserted to the r2t pool and another r2t is pulled from r2t
queue.

Only in iscsi_tcp_r2t_rsp which is called in the RX path but can requeue
to the TX path, r2t is taken from the r2t pool and inserted to the r2t
queue.

In order to cope with this situation, two spin locks were added,
pool2queue and queue2pool. The former protects extracting from the
r2t pool and inserting to the r2t queue, and the later protects the
extracing from the r2t queue and inserting to the r2t pool.

Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
[minor fix up to apply cleanly and compile fix]
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <JBottomley@Parallels.com>
  • Loading branch information
Shlomo Pongratz authored and James Bottomley committed Mar 15, 2014
1 parent 5d0fddd commit 659743b
Show file tree
Hide file tree
Showing 9 changed files with 206 additions and 161 deletions.
26 changes: 13 additions & 13 deletions drivers/scsi/be2iscsi/be_main.c
Original file line number Diff line number Diff line change
Expand Up @@ -233,20 +233,20 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc)
cls_session = starget_to_session(scsi_target(sc->device));
session = cls_session->dd_data;

spin_lock_bh(&session->lock);
spin_lock_bh(&session->frwd_lock);
if (!aborted_task || !aborted_task->sc) {
/* we raced */
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
return SUCCESS;
}

aborted_io_task = aborted_task->dd_data;
if (!aborted_io_task->scsi_cmnd) {
/* raced or invalid command */
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
return SUCCESS;
}
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
/* Invalidate WRB Posted for this Task */
AMAP_SET_BITS(struct amap_iscsi_wrb, invld,
aborted_io_task->pwrb_handle->pwrb,
Expand Down Expand Up @@ -311,9 +311,9 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc)
/* invalidate iocbs */
cls_session = starget_to_session(scsi_target(sc->device));
session = cls_session->dd_data;
spin_lock_bh(&session->lock);
spin_lock_bh(&session->frwd_lock);
if (!session->leadconn || session->state != ISCSI_STATE_LOGGED_IN) {
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
return FAILED;
}
conn = session->leadconn;
Expand Down Expand Up @@ -342,7 +342,7 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc)
num_invalidate++;
inv_tbl++;
}
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
inv_tbl = phba->inv_tbl;

nonemb_cmd.va = pci_alloc_consistent(phba->ctrl.pdev,
Expand Down Expand Up @@ -1185,9 +1185,9 @@ beiscsi_process_async_pdu(struct beiscsi_conn *beiscsi_conn,
return 1;
}

spin_lock_bh(&session->lock);
spin_lock_bh(&session->back_lock);
__iscsi_complete_pdu(conn, (struct iscsi_hdr *)ppdu, pbuffer, buf_len);
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->back_lock);
return 0;
}

Expand Down Expand Up @@ -1606,7 +1606,7 @@ static void hwi_complete_cmd(struct beiscsi_conn *beiscsi_conn,
pwrb = pwrb_handle->pwrb;
type = ((struct beiscsi_io_task *)task->dd_data)->wrb_type;

spin_lock_bh(&session->lock);
spin_lock_bh(&session->back_lock);
switch (type) {
case HWH_TYPE_IO:
case HWH_TYPE_IO_RD:
Expand Down Expand Up @@ -1645,7 +1645,7 @@ static void hwi_complete_cmd(struct beiscsi_conn *beiscsi_conn,
break;
}

spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->back_lock);
}

static struct list_head *hwi_get_async_busy_list(struct hwi_async_pdu_context
Expand Down Expand Up @@ -4693,9 +4693,9 @@ beiscsi_offload_connection(struct beiscsi_conn *beiscsi_conn,
* login/startup related tasks.
*/
beiscsi_conn->login_in_progress = 0;
spin_lock_bh(&session->lock);
spin_lock_bh(&session->back_lock);
beiscsi_cleanup_task(task);
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->back_lock);

pwrb_handle = alloc_wrb_handle(phba, beiscsi_conn->beiscsi_conn_cid);

Expand Down
46 changes: 23 additions & 23 deletions drivers/scsi/bnx2i/bnx2i_hwi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1361,7 +1361,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
u32 datalen = 0;

resp_cqe = (struct bnx2i_cmd_response *)cqe;
spin_lock_bh(&session->lock);
spin_lock_bh(&session->back_lock);
task = iscsi_itt_to_task(conn,
resp_cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
if (!task)
Expand Down Expand Up @@ -1432,7 +1432,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session,
__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr,
conn->data, datalen);
fail:
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->back_lock);
return 0;
}

Expand All @@ -1457,7 +1457,7 @@ static int bnx2i_process_login_resp(struct iscsi_session *session,
int pad_len;

login = (struct bnx2i_login_response *) cqe;
spin_lock(&session->lock);
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(conn,
login->itt & ISCSI_LOGIN_RESPONSE_INDEX);
if (!task)
Expand Down Expand Up @@ -1500,7 +1500,7 @@ static int bnx2i_process_login_resp(struct iscsi_session *session,
bnx2i_conn->gen_pdu.resp_buf,
bnx2i_conn->gen_pdu.resp_wr_ptr - bnx2i_conn->gen_pdu.resp_buf);
done:
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
return 0;
}

Expand All @@ -1525,7 +1525,7 @@ static int bnx2i_process_text_resp(struct iscsi_session *session,
int pad_len;

text = (struct bnx2i_text_response *) cqe;
spin_lock(&session->lock);
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(conn, text->itt & ISCSI_LOGIN_RESPONSE_INDEX);
if (!task)
goto done;
Expand Down Expand Up @@ -1561,7 +1561,7 @@ static int bnx2i_process_text_resp(struct iscsi_session *session,
bnx2i_conn->gen_pdu.resp_wr_ptr -
bnx2i_conn->gen_pdu.resp_buf);
done:
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
return 0;
}

Expand All @@ -1584,7 +1584,7 @@ static int bnx2i_process_tmf_resp(struct iscsi_session *session,
struct iscsi_tm_rsp *resp_hdr;

tmf_cqe = (struct bnx2i_tmf_response *)cqe;
spin_lock(&session->lock);
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(conn,
tmf_cqe->itt & ISCSI_TMF_RESPONSE_INDEX);
if (!task)
Expand All @@ -1600,7 +1600,7 @@ static int bnx2i_process_tmf_resp(struct iscsi_session *session,

__iscsi_complete_pdu(conn, (struct iscsi_hdr *)resp_hdr, NULL, 0);
done:
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
return 0;
}

Expand All @@ -1623,7 +1623,7 @@ static int bnx2i_process_logout_resp(struct iscsi_session *session,
struct iscsi_logout_rsp *resp_hdr;

logout = (struct bnx2i_logout_response *) cqe;
spin_lock(&session->lock);
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(conn,
logout->itt & ISCSI_LOGOUT_RESPONSE_INDEX);
if (!task)
Expand All @@ -1647,7 +1647,7 @@ static int bnx2i_process_logout_resp(struct iscsi_session *session,

bnx2i_conn->ep->state = EP_STATE_LOGOUT_RESP_RCVD;
done:
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
return 0;
}

Expand All @@ -1668,12 +1668,12 @@ static void bnx2i_process_nopin_local_cmpl(struct iscsi_session *session,
struct iscsi_task *task;

nop_in = (struct bnx2i_nop_in_msg *)cqe;
spin_lock(&session->lock);
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(conn,
nop_in->itt & ISCSI_NOP_IN_MSG_INDEX);
if (task)
__iscsi_put_task(task);
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
}

/**
Expand Down Expand Up @@ -1712,7 +1712,7 @@ static int bnx2i_process_nopin_mesg(struct iscsi_session *session,

nop_in = (struct bnx2i_nop_in_msg *)cqe;

spin_lock(&session->lock);
spin_lock(&session->back_lock);
hdr = (struct iscsi_nopin *)&bnx2i_conn->gen_pdu.resp_hdr;
memset(hdr, 0, sizeof(struct iscsi_hdr));
hdr->opcode = nop_in->op_code;
Expand All @@ -1738,7 +1738,7 @@ static int bnx2i_process_nopin_mesg(struct iscsi_session *session,
}
done:
__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr, NULL, 0);
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);

return tgt_async_nop;
}
Expand Down Expand Up @@ -1771,7 +1771,7 @@ static void bnx2i_process_async_mesg(struct iscsi_session *session,
return;
}

spin_lock(&session->lock);
spin_lock(&session->back_lock);
resp_hdr = (struct iscsi_async *) &bnx2i_conn->gen_pdu.resp_hdr;
memset(resp_hdr, 0, sizeof(struct iscsi_hdr));
resp_hdr->opcode = async_cqe->op_code;
Expand All @@ -1790,7 +1790,7 @@ static void bnx2i_process_async_mesg(struct iscsi_session *session,

__iscsi_complete_pdu(bnx2i_conn->cls_conn->dd_data,
(struct iscsi_hdr *)resp_hdr, NULL, 0);
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
}


Expand All @@ -1817,7 +1817,7 @@ static void bnx2i_process_reject_mesg(struct iscsi_session *session,
} else
bnx2i_unsol_pdu_adjust_rq(bnx2i_conn);

spin_lock(&session->lock);
spin_lock(&session->back_lock);
hdr = (struct iscsi_reject *) &bnx2i_conn->gen_pdu.resp_hdr;
memset(hdr, 0, sizeof(struct iscsi_hdr));
hdr->opcode = reject->op_code;
Expand All @@ -1828,7 +1828,7 @@ static void bnx2i_process_reject_mesg(struct iscsi_session *session,
hdr->ffffffff = cpu_to_be32(RESERVED_ITT);
__iscsi_complete_pdu(conn, (struct iscsi_hdr *)hdr, conn->data,
reject->data_length);
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
}

/**
Expand All @@ -1848,13 +1848,13 @@ static void bnx2i_process_cmd_cleanup_resp(struct iscsi_session *session,
struct iscsi_task *task;

cmd_clean_rsp = (struct bnx2i_cleanup_response *)cqe;
spin_lock(&session->lock);
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(conn,
cmd_clean_rsp->itt & ISCSI_CLEANUP_RESPONSE_INDEX);
if (!task)
printk(KERN_ALERT "bnx2i: cmd clean ITT %x not active\n",
cmd_clean_rsp->itt & ISCSI_CLEANUP_RESPONSE_INDEX);
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
complete(&bnx2i_conn->cmd_cleanup_cmpl);
}

Expand Down Expand Up @@ -1921,11 +1921,11 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
int rc = 0;
int cpu;

spin_lock(&session->lock);
spin_lock(&session->back_lock);
task = iscsi_itt_to_task(bnx2i_conn->cls_conn->dd_data,
cqe->itt & ISCSI_CMD_RESPONSE_INDEX);
if (!task || !task->sc) {
spin_unlock(&session->lock);
spin_unlock(&session->back_lock);
return -EINVAL;
}
sc = task->sc;
Expand All @@ -1935,7 +1935,7 @@ static int bnx2i_queue_scsi_cmd_resp(struct iscsi_session *session,
else
cpu = sc->request->cpu;

spin_unlock(&session->lock);
spin_unlock(&session->back_lock);

p = &per_cpu(bnx2i_percpu, cpu);
spin_lock(&p->p_work_lock);
Expand Down
8 changes: 4 additions & 4 deletions drivers/scsi/bnx2i/bnx2i_iscsi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1169,10 +1169,10 @@ static void bnx2i_cleanup_task(struct iscsi_task *task)
if (task->state == ISCSI_TASK_ABRT_TMF) {
bnx2i_send_cmd_cleanup_req(hba, task->dd_data);

spin_unlock_bh(&conn->session->lock);
spin_unlock_bh(&conn->session->back_lock);
wait_for_completion_timeout(&bnx2i_conn->cmd_cleanup_cmpl,
msecs_to_jiffies(ISCSI_CMD_CLEANUP_TIMEOUT));
spin_lock_bh(&conn->session->lock);
spin_lock_bh(&conn->session->back_lock);
}
bnx2i_iscsi_unmap_sg_list(task->dd_data);
}
Expand Down Expand Up @@ -2059,7 +2059,7 @@ int bnx2i_hw_ep_disconnect(struct bnx2i_endpoint *bnx2i_ep)
goto out;

if (session) {
spin_lock_bh(&session->lock);
spin_lock_bh(&session->frwd_lock);
if (bnx2i_ep->state != EP_STATE_TCP_FIN_RCVD) {
if (session->state == ISCSI_STATE_LOGGING_OUT) {
if (bnx2i_ep->state == EP_STATE_LOGOUT_SENT) {
Expand All @@ -2075,7 +2075,7 @@ int bnx2i_hw_ep_disconnect(struct bnx2i_endpoint *bnx2i_ep)
} else
close = 1;

spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
}

bnx2i_ep->state = EP_STATE_DISCONN_START;
Expand Down
22 changes: 11 additions & 11 deletions drivers/scsi/iscsi_tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -593,9 +593,9 @@ static void iscsi_sw_tcp_release_conn(struct iscsi_conn *conn)
iscsi_sw_tcp_conn_restore_callbacks(conn);
sock_put(sock->sk);

spin_lock_bh(&session->lock);
spin_lock_bh(&session->frwd_lock);
tcp_sw_conn->sock = NULL;
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
sockfd_put(sock);
}

Expand Down Expand Up @@ -663,10 +663,10 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
if (err)
goto free_socket;

spin_lock_bh(&session->lock);
spin_lock_bh(&session->frwd_lock);
/* bind iSCSI connection and socket */
tcp_sw_conn->sock = sock;
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);

/* setup Socket parameters */
sk = sock->sk;
Expand Down Expand Up @@ -726,14 +726,14 @@ static int iscsi_sw_tcp_conn_get_param(struct iscsi_cls_conn *cls_conn,
switch(param) {
case ISCSI_PARAM_CONN_PORT:
case ISCSI_PARAM_CONN_ADDRESS:
spin_lock_bh(&conn->session->lock);
spin_lock_bh(&conn->session->frwd_lock);
if (!tcp_sw_conn || !tcp_sw_conn->sock) {
spin_unlock_bh(&conn->session->lock);
spin_unlock_bh(&conn->session->frwd_lock);
return -ENOTCONN;
}
rc = kernel_getpeername(tcp_sw_conn->sock,
(struct sockaddr *)&addr, &len);
spin_unlock_bh(&conn->session->lock);
spin_unlock_bh(&conn->session->frwd_lock);
if (rc)
return rc;

Expand All @@ -759,23 +759,23 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,

switch (param) {
case ISCSI_HOST_PARAM_IPADDRESS:
spin_lock_bh(&session->lock);
spin_lock_bh(&session->frwd_lock);
conn = session->leadconn;
if (!conn) {
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
return -ENOTCONN;
}
tcp_conn = conn->dd_data;

tcp_sw_conn = tcp_conn->dd_data;
if (!tcp_sw_conn->sock) {
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
return -ENOTCONN;
}

rc = kernel_getsockname(tcp_sw_conn->sock,
(struct sockaddr *)&addr, &len);
spin_unlock_bh(&session->lock);
spin_unlock_bh(&session->frwd_lock);
if (rc)
return rc;

Expand Down
Loading

0 comments on commit 659743b

Please sign in to comment.