Skip to content

Commit

Permalink
Merge patch series "scsi: target: Allow userspace to config cmd submi…
Browse files Browse the repository at this point in the history
…ssion"

Mike Christie <michael.christie@oracle.com> says:

The following patches were made over Linus's tree but apply over
Martin's branches. They allow userspace to configure how fabric
drivers submit cmds to backend drivers.

Right now loop and vhost use a worker thread, and the other drivers
submit from the contexts they receive/process the cmd from. For
multiple LUN cases where the target can queue more cmds than the
backend can handle then deferring to a worker thread is safest because
the backend driver can block when doing things like waiting for a free
request/tag. Deferring also helps when the target has to handle
transport level requests from the recv context.

For cases where the backend devices can queue everything the target
sends, then there is no need to defer to a workqueue and you can see a
perf boost of up to 26% for small IO workloads. For a nvme device and
vhost-scsi I can see with 4K IOs:

fio jobs        1       2       4       8       10
--------------------------------------------------
workqueue
submit        94K     190K    394K    770K    890K

direct
submit       128K     252K    488K    950K    -

Link: https://lore.kernel.org/r/1b1f7a5c-0988-45f9-b103-dfed2c0405b1@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
  • Loading branch information
Martin K. Petersen committed Oct 13, 2023
2 parents 9f4c887 + 6dbc829 commit 1caddfc
Show file tree
Hide file tree
Showing 21 changed files with 170 additions and 76 deletions.
3 changes: 3 additions & 0 deletions drivers/infiniband/ulp/srpt/ib_srpt.c
Original file line number Diff line number Diff line change
Expand Up @@ -3867,6 +3867,9 @@ static const struct target_core_fabric_ops srpt_template = {
.tfc_discovery_attrs = srpt_da_attrs,
.tfc_wwn_attrs = srpt_wwn_attrs,
.tfc_tpg_attrib_attrs = srpt_tpg_attrib_attrs,

.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};

/**
Expand Down
5 changes: 5 additions & 0 deletions drivers/scsi/elx/efct/efct_lio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1611,6 +1611,8 @@ static const struct target_core_fabric_ops efct_lio_ops = {
.sess_get_initiator_sid = NULL,
.tfc_tpg_base_attrs = efct_lio_tpg_attrs,
.tfc_tpg_attrib_attrs = efct_lio_tpg_attrib_attrs,
.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};

static const struct target_core_fabric_ops efct_lio_npiv_ops = {
Expand Down Expand Up @@ -1646,6 +1648,9 @@ static const struct target_core_fabric_ops efct_lio_npiv_ops = {
.sess_get_initiator_sid = NULL,
.tfc_tpg_base_attrs = efct_lio_npiv_tpg_attrs,
.tfc_tpg_attrib_attrs = efct_lio_npiv_tpg_attrib_attrs,

.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};

int efct_scsi_tgt_driver_init(void)
Expand Down
3 changes: 3 additions & 0 deletions drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
Original file line number Diff line number Diff line change
Expand Up @@ -3975,6 +3975,9 @@ static const struct target_core_fabric_ops ibmvscsis_ops = {
.fabric_drop_tpg = ibmvscsis_drop_tpg,

.tfc_wwn_attrs = ibmvscsis_wwn_attrs,

.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};

static void ibmvscsis_dev_release(struct device *dev) {};
Expand Down
6 changes: 6 additions & 0 deletions drivers/scsi/qla2xxx/tcm_qla2xxx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1822,6 +1822,9 @@ static const struct target_core_fabric_ops tcm_qla2xxx_ops = {
.tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs,
.tfc_tpg_base_attrs = tcm_qla2xxx_tpg_attrs,
.tfc_tpg_attrib_attrs = tcm_qla2xxx_tpg_attrib_attrs,

.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};

static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
Expand Down Expand Up @@ -1859,6 +1862,9 @@ static const struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
.fabric_init_nodeacl = tcm_qla2xxx_init_nodeacl,

.tfc_wwn_attrs = tcm_qla2xxx_wwn_attrs,

.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};

static int tcm_qla2xxx_register_configfs(void)
Expand Down
6 changes: 0 additions & 6 deletions drivers/target/iscsi/iscsi_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -1234,12 +1234,6 @@ int iscsit_setup_scsi_cmd(struct iscsit_conn *conn, struct iscsit_cmd *cmd,
spin_lock_bh(&conn->cmd_lock);
list_add_tail(&cmd->i_conn_node, &conn->conn_cmd_list);
spin_unlock_bh(&conn->cmd_lock);
/*
* Check if we need to delay processing because of ALUA
* Active/NonOptimized primary access state..
*/
core_alua_check_nonop_delay(&cmd->se_cmd);

return 0;
}
EXPORT_SYMBOL(iscsit_setup_scsi_cmd);
Expand Down
5 changes: 4 additions & 1 deletion drivers/target/iscsi/iscsi_target_configfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1589,5 +1589,8 @@ const struct target_core_fabric_ops iscsi_ops = {
.tfc_tpg_nacl_auth_attrs = lio_target_nacl_auth_attrs,
.tfc_tpg_nacl_param_attrs = lio_target_nacl_param_attrs,

.write_pending_must_be_called = true,
.write_pending_must_be_called = 1,

.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};
2 changes: 1 addition & 1 deletion drivers/target/iscsi/iscsi_target_erl1.c
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@ int iscsit_execute_cmd(struct iscsit_cmd *cmd, int ooo)

iscsit_set_unsolicited_dataout(cmd);
}
return transport_handle_cdb_direct(&cmd->se_cmd);
return target_submit(&cmd->se_cmd);

case ISCSI_OP_NOOP_OUT:
case ISCSI_OP_TEXT:
Expand Down
2 changes: 1 addition & 1 deletion drivers/target/iscsi/iscsi_target_tmr.c
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ static int iscsit_task_reassign_complete_read(
pr_debug("READ ITT: 0x%08x: t_state: %d never sent to"
" transport\n", cmd->init_task_tag,
cmd->se_cmd.t_state);
transport_handle_cdb_direct(se_cmd);
target_submit(se_cmd);
return 0;
}

Expand Down
4 changes: 3 additions & 1 deletion drivers/target/loopback/tcm_loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ static void tcm_loop_target_queue_cmd(struct tcm_loop_cmd *tl_cmd)
GFP_ATOMIC))
return;

target_queue_submission(se_cmd);
target_submit(se_cmd);
return;

out_done:
Expand Down Expand Up @@ -1102,6 +1102,8 @@ static const struct target_core_fabric_ops loop_ops = {
.tfc_wwn_attrs = tcm_loop_wwn_attrs,
.tfc_tpg_base_attrs = tcm_loop_tpg_attrs,
.tfc_tpg_attrib_attrs = tcm_loop_tpg_attrib_attrs,
.default_submit_type = TARGET_QUEUE_SUBMIT,
.direct_submit_supp = 0,
};

static int __init tcm_loop_fabric_init(void)
Expand Down
3 changes: 3 additions & 0 deletions drivers/target/sbp/sbp_target.c
Original file line number Diff line number Diff line change
Expand Up @@ -2278,6 +2278,9 @@ static const struct target_core_fabric_ops sbp_ops = {
.tfc_wwn_attrs = sbp_wwn_attrs,
.tfc_tpg_base_attrs = sbp_tpg_base_attrs,
.tfc_tpg_attrib_attrs = sbp_tpg_attrib_attrs,

.default_submit_type = TARGET_DIRECT_SUBMIT,
.direct_submit_supp = 1,
};

static int __init sbp_init(void)
Expand Down
1 change: 0 additions & 1 deletion drivers/target/target_core_alua.c
Original file line number Diff line number Diff line change
Expand Up @@ -850,7 +850,6 @@ int core_alua_check_nonop_delay(
msleep_interruptible(cmd->alua_nonop_delay);
return 0;
}
EXPORT_SYMBOL(core_alua_check_nonop_delay);

static int core_alua_write_tpg_metadata(
const char *path,
Expand Down
22 changes: 22 additions & 0 deletions drivers/target/target_core_configfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,7 @@ DEF_CONFIGFS_ATTRIB_SHOW(unmap_granularity_alignment);
DEF_CONFIGFS_ATTRIB_SHOW(unmap_zeroes_data);
DEF_CONFIGFS_ATTRIB_SHOW(max_write_same_len);
DEF_CONFIGFS_ATTRIB_SHOW(emulate_rsoc);
DEF_CONFIGFS_ATTRIB_SHOW(submit_type);

#define DEF_CONFIGFS_ATTRIB_STORE_U32(_name) \
static ssize_t _name##_store(struct config_item *item, const char *page,\
Expand Down Expand Up @@ -1231,6 +1232,24 @@ static ssize_t emulate_rsoc_store(struct config_item *item,
return count;
}

static ssize_t submit_type_store(struct config_item *item, const char *page,
size_t count)
{
struct se_dev_attrib *da = to_attrib(item);
int ret;
u8 val;

ret = kstrtou8(page, 0, &val);
if (ret < 0)
return ret;

if (val > TARGET_QUEUE_SUBMIT)
return -EINVAL;

da->submit_type = val;
return count;
}

CONFIGFS_ATTR(, emulate_model_alias);
CONFIGFS_ATTR(, emulate_dpo);
CONFIGFS_ATTR(, emulate_fua_write);
Expand Down Expand Up @@ -1266,6 +1285,7 @@ CONFIGFS_ATTR(, unmap_zeroes_data);
CONFIGFS_ATTR(, max_write_same_len);
CONFIGFS_ATTR(, alua_support);
CONFIGFS_ATTR(, pgr_support);
CONFIGFS_ATTR(, submit_type);

/*
* dev_attrib attributes for devices using the target core SBC/SPC
Expand Down Expand Up @@ -1308,6 +1328,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
&attr_alua_support,
&attr_pgr_support,
&attr_emulate_rsoc,
&attr_submit_type,
NULL,
};
EXPORT_SYMBOL(sbc_attrib_attrs);
Expand All @@ -1325,6 +1346,7 @@ struct configfs_attribute *passthrough_attrib_attrs[] = {
&attr_emulate_pr,
&attr_alua_support,
&attr_pgr_support,
&attr_submit_type,
NULL,
};
EXPORT_SYMBOL(passthrough_attrib_attrs);
Expand Down
1 change: 1 addition & 0 deletions drivers/target/target_core_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -779,6 +779,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
dev->dev_attrib.unmap_zeroes_data =
DA_UNMAP_ZEROES_DATA_DEFAULT;
dev->dev_attrib.max_write_same_len = DA_MAX_WRITE_SAME_LEN;
dev->dev_attrib.submit_type = TARGET_FABRIC_DEFAULT_SUBMIT;

xcopy_lun = &dev->xcopy_lun;
rcu_assign_pointer(xcopy_lun->lun_se_dev, dev);
Expand Down
24 changes: 24 additions & 0 deletions drivers/target/target_core_fabric_configfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -1065,8 +1065,32 @@ target_fabric_wwn_cmd_completion_affinity_store(struct config_item *item,
}
CONFIGFS_ATTR(target_fabric_wwn_, cmd_completion_affinity);

static ssize_t
target_fabric_wwn_default_submit_type_show(struct config_item *item,
char *page)
{
struct se_wwn *wwn = container_of(to_config_group(item), struct se_wwn,
param_group);
return sysfs_emit(page, "%u\n",
wwn->wwn_tf->tf_ops->default_submit_type);
}
CONFIGFS_ATTR_RO(target_fabric_wwn_, default_submit_type);

static ssize_t
target_fabric_wwn_direct_submit_supported_show(struct config_item *item,
char *page)
{
struct se_wwn *wwn = container_of(to_config_group(item), struct se_wwn,
param_group);
return sysfs_emit(page, "%u\n",
wwn->wwn_tf->tf_ops->direct_submit_supp);
}
CONFIGFS_ATTR_RO(target_fabric_wwn_, direct_submit_supported);

static struct configfs_attribute *target_fabric_wwn_param_attrs[] = {
&target_fabric_wwn_attr_cmd_completion_affinity,
&target_fabric_wwn_attr_default_submit_type,
&target_fabric_wwn_attr_direct_submit_supported,
NULL,
};

Expand Down
116 changes: 59 additions & 57 deletions drivers/target/target_core_transport.c
Original file line number Diff line number Diff line change
Expand Up @@ -1576,17 +1576,39 @@ target_cmd_parse_cdb(struct se_cmd *cmd)
}
EXPORT_SYMBOL(target_cmd_parse_cdb);

/*
* Used by fabric module frontends to queue tasks directly.
* May only be used from process context.
*/
int transport_handle_cdb_direct(
struct se_cmd *cmd)
static int __target_submit(struct se_cmd *cmd)
{
sense_reason_t ret;

might_sleep();

/*
* Check if we need to delay processing because of ALUA
* Active/NonOptimized primary access state..
*/
core_alua_check_nonop_delay(cmd);

if (cmd->t_data_nents != 0) {
/*
* This is primarily a hack for udev and tcm loop which sends
* INQUIRYs with a single page and expects the data to be
* cleared.
*/
if (!(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) &&
cmd->data_direction == DMA_FROM_DEVICE) {
struct scatterlist *sgl = cmd->t_data_sg;
unsigned char *buf = NULL;

BUG_ON(!sgl);

buf = kmap_local_page(sg_page(sgl));
if (buf) {
memset(buf + sgl->offset, 0, sgl->length);
kunmap_local(buf);
}
}
}

if (!cmd->se_lun) {
dump_stack();
pr_err("cmd->se_lun is NULL\n");
Expand Down Expand Up @@ -1614,7 +1636,6 @@ int transport_handle_cdb_direct(
transport_generic_request_failure(cmd, ret);
return 0;
}
EXPORT_SYMBOL(transport_handle_cdb_direct);

sense_reason_t
transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
Expand Down Expand Up @@ -1781,53 +1802,6 @@ int target_submit_prep(struct se_cmd *se_cmd, unsigned char *cdb,
}
EXPORT_SYMBOL_GPL(target_submit_prep);

/**
* target_submit - perform final initialization and submit cmd to LIO core
* @se_cmd: command descriptor to submit
*
* target_submit_prep must have been called on the cmd, and this must be
* called from process context.
*/
void target_submit(struct se_cmd *se_cmd)
{
struct scatterlist *sgl = se_cmd->t_data_sg;
unsigned char *buf = NULL;

might_sleep();

if (se_cmd->t_data_nents != 0) {
BUG_ON(!sgl);
/*
* A work-around for tcm_loop as some userspace code via
* scsi-generic do not memset their associated read buffers,
* so go ahead and do that here for type non-data CDBs. Also
* note that this is currently guaranteed to be a single SGL
* for this case by target core in target_setup_cmd_from_cdb()
* -> transport_generic_cmd_sequencer().
*/
if (!(se_cmd->se_cmd_flags & SCF_SCSI_DATA_CDB) &&
se_cmd->data_direction == DMA_FROM_DEVICE) {
if (sgl)
buf = kmap(sg_page(sgl)) + sgl->offset;

if (buf) {
memset(buf, 0, sgl->length);
kunmap(sg_page(sgl));
}
}

}

/*
* Check if we need to delay processing because of ALUA
* Active/NonOptimized primary access state..
*/
core_alua_check_nonop_delay(se_cmd);

transport_handle_cdb_direct(se_cmd);
}
EXPORT_SYMBOL_GPL(target_submit);

/**
* target_submit_cmd - lookup unpacked lun and submit uninitialized se_cmd
*
Expand Down Expand Up @@ -1923,7 +1897,7 @@ void target_queued_submit_work(struct work_struct *work)
se_plug = target_plug_device(se_dev);
}

target_submit(se_cmd);
__target_submit(se_cmd);
}

if (se_plug)
Expand All @@ -1934,7 +1908,7 @@ void target_queued_submit_work(struct work_struct *work)
* target_queue_submission - queue the cmd to run on the LIO workqueue
* @se_cmd: command descriptor to submit
*/
void target_queue_submission(struct se_cmd *se_cmd)
static void target_queue_submission(struct se_cmd *se_cmd)
{
struct se_device *se_dev = se_cmd->se_dev;
int cpu = se_cmd->cpuid;
Expand All @@ -1944,7 +1918,35 @@ void target_queue_submission(struct se_cmd *se_cmd)
llist_add(&se_cmd->se_cmd_list, &sq->cmd_list);
queue_work_on(cpu, target_submission_wq, &sq->work);
}
EXPORT_SYMBOL_GPL(target_queue_submission);

/**
* target_submit - perform final initialization and submit cmd to LIO core
* @cmd: command descriptor to submit
*
* target_submit_prep or something similar must have been called on the cmd,
* and this must be called from process context.
*/
int target_submit(struct se_cmd *se_cmd)
{
const struct target_core_fabric_ops *tfo = se_cmd->se_sess->se_tpg->se_tpg_tfo;
struct se_dev_attrib *da = &se_cmd->se_dev->dev_attrib;
u8 submit_type;

if (da->submit_type == TARGET_FABRIC_DEFAULT_SUBMIT)
submit_type = tfo->default_submit_type;
else if (da->submit_type == TARGET_DIRECT_SUBMIT &&
tfo->direct_submit_supp)
submit_type = TARGET_DIRECT_SUBMIT;
else
submit_type = TARGET_QUEUE_SUBMIT;

if (submit_type == TARGET_DIRECT_SUBMIT)
return __target_submit(se_cmd);

target_queue_submission(se_cmd);
return 0;
}
EXPORT_SYMBOL_GPL(target_submit);

static void target_complete_tmr_failure(struct work_struct *work)
{
Expand Down
Loading

0 comments on commit 1caddfc

Please sign in to comment.