diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a5653892d7739..314705da2c107 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -575,11 +575,12 @@ EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU); static inline void nvme_clear_nvme_request(struct request *req) { - if (!(req->rq_flags & RQF_DONTPREP)) { - nvme_req(req)->retries = 0; - nvme_req(req)->flags = 0; - req->rq_flags |= RQF_DONTPREP; - } + struct nvme_command *cmd = nvme_req(req)->cmd; + + memset(cmd, 0, sizeof(*cmd)); + nvme_req(req)->retries = 0; + nvme_req(req)->flags = 0; + req->rq_flags |= RQF_DONTPREP; } static inline unsigned int nvme_req_op(struct nvme_command *cmd) @@ -595,9 +596,12 @@ static inline void nvme_init_request(struct request *req, else /* no queuedata implies admin queue */ req->timeout = NVME_ADMIN_TIMEOUT; + /* passthru commands should let the driver set the SGL flags */ + cmd->common.flags &= ~NVME_CMD_SGL_ALL; + req->cmd_flags |= REQ_FAILFAST_DRIVER; nvme_clear_nvme_request(req); - nvme_req(req)->cmd = cmd; + memcpy(nvme_req(req)->cmd, cmd, sizeof(*cmd)); } struct request *nvme_alloc_request(struct request_queue *q, @@ -726,14 +730,6 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9; } -static void nvme_setup_passthrough(struct request *req, - struct nvme_command *cmd) -{ - memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); - /* passthru commands should let the driver set the SGL flags */ - cmd->common.flags &= ~NVME_CMD_SGL_ALL; -} - static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { @@ -888,18 +884,18 @@ void nvme_cleanup_cmd(struct request *req) } EXPORT_SYMBOL_GPL(nvme_cleanup_cmd); -blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, - struct nvme_command *cmd) +blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req) { + struct nvme_command *cmd = nvme_req(req)->cmd; blk_status_t ret = BLK_STS_OK; - nvme_clear_nvme_request(req); + if (!(req->rq_flags & RQF_DONTPREP)) + nvme_clear_nvme_request(req); - memset(cmd, 0, sizeof(*cmd)); switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: - nvme_setup_passthrough(req, cmd); + /* these are setup prior to execution in nvme_init_request() */ break; case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); @@ -1076,9 +1072,9 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) if (ns->head->effects) effects = le32_to_cpu(ns->head->effects->iocs[opcode]); if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) - dev_warn(ctrl->device, - "IO command:%02x has unhandled effects:%08x\n", - opcode, effects); + dev_warn_once(ctrl->device, + "IO command:%02x has unhandled effects:%08x\n", + opcode, effects); return 0; } @@ -1120,7 +1116,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) mutex_unlock(&ctrl->scan_lock); } if (effects & NVME_CMD_EFFECTS_CCC) - nvme_init_identify(ctrl); + nvme_init_ctrl_finish(ctrl); if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); flush_work(&ctrl->scan_work); @@ -1137,7 +1133,8 @@ void nvme_execute_passthru_rq(struct request *rq) effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); blk_execute_rq(disk, rq, 0); - nvme_passthru_end(ctrl, effects); + if (effects) /* nothing to be done for zero cmd effects */ + nvme_passthru_end(ctrl, effects); } EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); @@ -1635,6 +1632,12 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return -EFAULT; if (cmd.flags) return -EINVAL; + if (ns && cmd.nsid != ns->head->ns_id) { + dev_err(ctrl->device, + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", + current->comm, cmd.nsid, ns->head->ns_id); + return -EINVAL; + } memset(&c, 0, sizeof(c)); c.common.opcode = cmd.opcode; @@ -1679,6 +1682,12 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return -EFAULT; if (cmd.flags) return -EINVAL; + if (ns && cmd.nsid != ns->head->ns_id) { + dev_err(ctrl->device, + "%s: nsid (%u) in cmd does not match nsid (%u) of namespace\n", + current->comm, cmd.nsid, ns->head->ns_id); + return -EINVAL; + } memset(&c, 0, sizeof(c)); c.common.opcode = cmd.opcode; @@ -1939,7 +1948,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) struct request_queue *queue = disk->queue; u32 size = queue_logical_block_size(queue); - if (!(ctrl->oncs & NVME_CTRL_ONCS_DSM)) { + if (ctrl->max_discard_sectors == 0) { blk_queue_flag_clear(QUEUE_FLAG_DISCARD, queue); return; } @@ -1957,39 +1966,13 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) if (blk_queue_flag_test_and_set(QUEUE_FLAG_DISCARD, queue)) return; - blk_queue_max_discard_sectors(queue, UINT_MAX); - blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES); + blk_queue_max_discard_sectors(queue, ctrl->max_discard_sectors); + blk_queue_max_discard_segments(queue, ctrl->max_discard_segments); if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) -{ - u64 max_blocks; - - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) - return; - /* - * Even though NVMe spec explicitly states that MDTS is not - * applicable to the write-zeroes:- "The restriction does not apply to - * commands that do not transfer data between the host and the - * controller (e.g., Write Uncorrectable ro Write Zeroes command).". - * In order to be more cautious use controller's max_hw_sectors value - * to configure the maximum sectors for the write-zeroes which is - * configured based on the controller's MDTS field in the - * nvme_init_identify() if available. - */ - if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_blocks = (u64)USHRT_MAX + 1; - else - max_blocks = ns->ctrl->max_hw_sectors + 1; - - blk_queue_max_write_zeroes_sectors(disk->queue, - nvme_lba_to_sect(ns, max_blocks)); -} - static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) { return !uuid_is_null(&ids->uuid) || @@ -2159,7 +2142,8 @@ static void nvme_update_disk_info(struct gendisk *disk, set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); - nvme_config_write_zeroes(disk, ns); + blk_queue_max_write_zeroes_sectors(disk->queue, + ns->ctrl->max_zeroes_sectors); set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || test_bit(NVME_NS_FORCE_RO, &ns->flags)); @@ -2325,18 +2309,21 @@ static int nvme_pr_preempt(struct block_device *bdev, u64 old, u64 new, enum pr_type type, bool abort) { u32 cdw10 = nvme_pr_type(type) << 8 | (abort ? 2 : 1); + return nvme_pr_command(bdev, cdw10, old, new, nvme_cmd_resv_acquire); } static int nvme_pr_clear(struct block_device *bdev, u64 key) { u32 cdw10 = 1 | (key ? 1 << 3 : 0); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_register); } static int nvme_pr_release(struct block_device *bdev, u64 key, enum pr_type type) { u32 cdw10 = nvme_pr_type(type) << 8 | (key ? 1 << 3 : 0); + return nvme_pr_command(bdev, cdw10, key, 0, nvme_cmd_resv_release); } @@ -2876,8 +2863,8 @@ static ssize_t subsys_##field##_show(struct device *dev, \ { \ struct nvme_subsystem *subsys = \ container_of(dev, struct nvme_subsystem, dev); \ - return sprintf(buf, "%.*s\n", \ - (int)sizeof(subsys->field), subsys->field); \ + return sysfs_emit(buf, "%.*s\n", \ + (int)sizeof(subsys->field), subsys->field); \ } \ static SUBSYS_ATTR_RO(field, S_IRUGO, subsys_##field##_show); @@ -3060,28 +3047,74 @@ static int nvme_get_effects_log(struct nvme_ctrl *ctrl, u8 csi, return 0; } -/* - * Initialize the cached copies of the Identify data and various controller - * register in our nvme_ctrl structure. This should be called as soon as - * the admin queue is fully up and running. - */ -int nvme_init_identify(struct nvme_ctrl *ctrl) +static inline u32 nvme_mps_to_sectors(struct nvme_ctrl *ctrl, u32 units) { - struct nvme_id_ctrl *id; - int ret, page_shift; - u32 max_hw_sectors; - bool prev_apst_enabled; + u32 page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12, val; - ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); - if (ret) { - dev_err(ctrl->device, "Reading VS failed (%d)\n", ret); - return ret; + if (check_shl_overflow(1U, units + page_shift - 9, &val)) + return UINT_MAX; + return val; +} + +static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) +{ + struct nvme_command c = { }; + struct nvme_id_ctrl_nvm *id; + int ret; + + if (ctrl->oncs & NVME_CTRL_ONCS_DSM) { + ctrl->max_discard_sectors = UINT_MAX; + ctrl->max_discard_segments = NVME_DSM_MAX_RANGES; + } else { + ctrl->max_discard_sectors = 0; + ctrl->max_discard_segments = 0; } - page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12; - ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); - if (ctrl->vs >= NVME_VS(1, 1, 0)) - ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap); + /* + * Even though NVMe spec explicitly states that MDTS is not applicable + * to the write-zeroes, we are cautious and limit the size to the + * controllers max_hw_sectors value, which is based on the MDTS field + * and possibly other limiting factors. + */ + if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) && + !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) + ctrl->max_zeroes_sectors = ctrl->max_hw_sectors; + else + ctrl->max_zeroes_sectors = 0; + + if (nvme_ctrl_limited_cns(ctrl)) + return 0; + + id = kzalloc(sizeof(*id), GFP_KERNEL); + if (!id) + return 0; + + c.identify.opcode = nvme_admin_identify; + c.identify.cns = NVME_ID_CNS_CS_CTRL; + c.identify.csi = NVME_CSI_NVM; + + ret = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); + if (ret) + goto free_data; + + if (id->dmrl) + ctrl->max_discard_segments = id->dmrl; + if (id->dmrsl) + ctrl->max_discard_sectors = le32_to_cpu(id->dmrsl); + if (id->wzsl) + ctrl->max_zeroes_sectors = nvme_mps_to_sectors(ctrl, id->wzsl); + +free_data: + kfree(id); + return ret; +} + +static int nvme_init_identify(struct nvme_ctrl *ctrl) +{ + struct nvme_id_ctrl *id; + u32 max_hw_sectors; + bool prev_apst_enabled; + int ret; ret = nvme_identify_ctrl(ctrl, &id); if (ret) { @@ -3099,7 +3132,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->cntlid = le16_to_cpu(id->cntlid); if (!ctrl->identified) { - int i; + unsigned int i; ret = nvme_init_subsystem(ctrl, id); if (ret) @@ -3138,7 +3171,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; if (id->mdts) - max_hw_sectors = 1 << (id->mdts + page_shift - 9); + max_hw_sectors = nvme_mps_to_sectors(ctrl, id->mdts); else max_hw_sectors = UINT_MAX; ctrl->max_hw_sectors = @@ -3212,16 +3245,47 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } ret = nvme_mpath_init(ctrl, id); - kfree(id); - if (ret < 0) - return ret; + goto out_free; if (ctrl->apst_enabled && !prev_apst_enabled) dev_pm_qos_expose_latency_tolerance(ctrl->device); else if (!ctrl->apst_enabled && prev_apst_enabled) dev_pm_qos_hide_latency_tolerance(ctrl->device); +out_free: + kfree(id); + return ret; +} + +/* + * Initialize the cached copies of the Identify data and various controller + * register in our nvme_ctrl structure. This should be called as soon as + * the admin queue is fully up and running. + */ +int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl) +{ + int ret; + + ret = ctrl->ops->reg_read32(ctrl, NVME_REG_VS, &ctrl->vs); + if (ret) { + dev_err(ctrl->device, "Reading VS failed (%d)\n", ret); + return ret; + } + + ctrl->sqsize = min_t(u16, NVME_CAP_MQES(ctrl->cap), ctrl->sqsize); + + if (ctrl->vs >= NVME_VS(1, 1, 0)) + ctrl->subsystem = NVME_CAP_NSSRC(ctrl->cap); + + ret = nvme_init_identify(ctrl); + if (ret) + return ret; + + ret = nvme_init_non_mdts_limits(ctrl); + if (ret < 0) + return ret; + ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; @@ -3247,12 +3311,8 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->identified = true; return 0; - -out_free: - kfree(id); - return ret; } -EXPORT_SYMBOL_GPL(nvme_init_identify); +EXPORT_SYMBOL_GPL(nvme_init_ctrl_finish); static int nvme_dev_open(struct inode *inode, struct file *file) { @@ -3398,13 +3458,13 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, int model_len = sizeof(subsys->model); if (!uuid_is_null(&ids->uuid)) - return sprintf(buf, "uuid.%pU\n", &ids->uuid); + return sysfs_emit(buf, "uuid.%pU\n", &ids->uuid); if (memchr_inv(ids->nguid, 0, sizeof(ids->nguid))) - return sprintf(buf, "eui.%16phN\n", ids->nguid); + return sysfs_emit(buf, "eui.%16phN\n", ids->nguid); if (memchr_inv(ids->eui64, 0, sizeof(ids->eui64))) - return sprintf(buf, "eui.%8phN\n", ids->eui64); + return sysfs_emit(buf, "eui.%8phN\n", ids->eui64); while (serial_len > 0 && (subsys->serial[serial_len - 1] == ' ' || subsys->serial[serial_len - 1] == '\0')) @@ -3413,7 +3473,7 @@ static ssize_t wwid_show(struct device *dev, struct device_attribute *attr, subsys->model[model_len - 1] == '\0')) model_len--; - return sprintf(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, + return sysfs_emit(buf, "nvme.%04x-%*phN-%*phN-%08x\n", subsys->vendor_id, serial_len, subsys->serial, model_len, subsys->model, head->ns_id); } @@ -3422,7 +3482,7 @@ static DEVICE_ATTR_RO(wwid); static ssize_t nguid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); + return sysfs_emit(buf, "%pU\n", dev_to_ns_head(dev)->ids.nguid); } static DEVICE_ATTR_RO(nguid); @@ -3437,23 +3497,23 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, if (uuid_is_null(&ids->uuid)) { printk_ratelimited(KERN_WARNING "No UUID available providing old NGUID\n"); - return sprintf(buf, "%pU\n", ids->nguid); + return sysfs_emit(buf, "%pU\n", ids->nguid); } - return sprintf(buf, "%pU\n", &ids->uuid); + return sysfs_emit(buf, "%pU\n", &ids->uuid); } static DEVICE_ATTR_RO(uuid); static ssize_t eui_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); + return sysfs_emit(buf, "%8ph\n", dev_to_ns_head(dev)->ids.eui64); } static DEVICE_ATTR_RO(eui); static ssize_t nsid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", dev_to_ns_head(dev)->ns_id); + return sysfs_emit(buf, "%d\n", dev_to_ns_head(dev)->ns_id); } static DEVICE_ATTR_RO(nsid); @@ -3518,7 +3578,7 @@ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sprintf(buf, "%.*s\n", \ + return sysfs_emit(buf, "%.*s\n", \ (int)sizeof(ctrl->subsys->field), ctrl->subsys->field); \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); @@ -3532,7 +3592,7 @@ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ struct nvme_ctrl *ctrl = dev_get_drvdata(dev); \ - return sprintf(buf, "%d\n", ctrl->field); \ + return sysfs_emit(buf, "%d\n", ctrl->field); \ } \ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL); @@ -3580,9 +3640,9 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) && state_name[ctrl->state]) - return sprintf(buf, "%s\n", state_name[ctrl->state]); + return sysfs_emit(buf, "%s\n", state_name[ctrl->state]); - return sprintf(buf, "unknown state\n"); + return sysfs_emit(buf, "unknown state\n"); } static DEVICE_ATTR(state, S_IRUGO, nvme_sysfs_show_state, NULL); @@ -3634,9 +3694,9 @@ static ssize_t nvme_ctrl_loss_tmo_show(struct device *dev, struct nvmf_ctrl_options *opts = ctrl->opts; if (ctrl->opts->max_reconnects == -1) - return sprintf(buf, "off\n"); - return sprintf(buf, "%d\n", - opts->max_reconnects * opts->reconnect_delay); + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", + opts->max_reconnects * opts->reconnect_delay); } static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev, @@ -3650,7 +3710,7 @@ static ssize_t nvme_ctrl_loss_tmo_store(struct device *dev, if (err) return -EINVAL; - else if (ctrl_loss_tmo < 0) + if (ctrl_loss_tmo < 0) opts->max_reconnects = -1; else opts->max_reconnects = DIV_ROUND_UP(ctrl_loss_tmo, @@ -3666,8 +3726,8 @@ static ssize_t nvme_ctrl_reconnect_delay_show(struct device *dev, struct nvme_ctrl *ctrl = dev_get_drvdata(dev); if (ctrl->opts->reconnect_delay == -1) - return sprintf(buf, "off\n"); - return sprintf(buf, "%d\n", ctrl->opts->reconnect_delay); + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", ctrl->opts->reconnect_delay); } static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, @@ -3687,6 +3747,36 @@ static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, static DEVICE_ATTR(reconnect_delay, S_IRUGO | S_IWUSR, nvme_ctrl_reconnect_delay_show, nvme_ctrl_reconnect_delay_store); +static ssize_t nvme_ctrl_fast_io_fail_tmo_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (ctrl->opts->fast_io_fail_tmo == -1) + return sysfs_emit(buf, "off\n"); + return sysfs_emit(buf, "%d\n", ctrl->opts->fast_io_fail_tmo); +} + +static ssize_t nvme_ctrl_fast_io_fail_tmo_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t count) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct nvmf_ctrl_options *opts = ctrl->opts; + int fast_io_fail_tmo, err; + + err = kstrtoint(buf, 10, &fast_io_fail_tmo); + if (err) + return -EINVAL; + + if (fast_io_fail_tmo < 0) + opts->fast_io_fail_tmo = -1; + else + opts->fast_io_fail_tmo = fast_io_fail_tmo; + return count; +} +static DEVICE_ATTR(fast_io_fail_tmo, S_IRUGO | S_IWUSR, + nvme_ctrl_fast_io_fail_tmo_show, nvme_ctrl_fast_io_fail_tmo_store); + static struct attribute *nvme_dev_attrs[] = { &dev_attr_reset_controller.attr, &dev_attr_rescan_controller.attr, @@ -3706,6 +3796,7 @@ static struct attribute *nvme_dev_attrs[] = { &dev_attr_hostid.attr, &dev_attr_ctrl_loss_tmo.attr, &dev_attr_reconnect_delay.attr, + &dev_attr_fast_io_fail_tmo.attr, NULL }; @@ -4756,6 +4847,7 @@ static inline void _nvme_check_size(void) BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns_zns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_zns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl_nvm) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 73d0737483891..921b3315c2f1e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1708,7 +1708,7 @@ nvme_fc_handle_ls_rqst_work(struct work_struct *work) * * If this routine returns error, the LLDD should abort the exchange. * - * @remoteport: pointer to the (registered) remote port that the LS + * @portptr: pointer to the (registered) remote port that the LS * was received from. The remoteport is associated with * a specific localport. * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be @@ -2128,6 +2128,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, op->op.fcp_req.first_sgl = op->sgl; op->op.fcp_req.private = &op->priv[0]; nvme_req(rq)->ctrl = &ctrl->ctrl; + nvme_req(rq)->cmd = &op->op.cmd_iu.sqe; return res; } @@ -2759,8 +2760,6 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl = queue->ctrl; struct request *rq = bd->rq; struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); - struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; - struct nvme_command *sqe = &cmdiu->sqe; enum nvmefc_fcp_datadir io_dir; bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); u32 data_len; @@ -2770,7 +2769,7 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); - ret = nvme_setup_cmd(ns, rq, sqe); + ret = nvme_setup_cmd(ns, rq); if (ret) return ret; @@ -3086,7 +3085,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - ret = nvme_init_identify(&ctrl->ctrl); + ret = nvme_init_ctrl_finish(&ctrl->ctrl); if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; @@ -3100,6 +3099,11 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) } /* FC-NVME supports normal SGL Data Block Descriptors */ + if (!(ctrl->ctrl.sgls & ((1 << 0) | (1 << 1)))) { + dev_err(ctrl->ctrl.device, + "Mandatory sgls are not supported!\n"); + goto out_disconnect_admin_queue; + } if (opts->queue_size > ctrl->ctrl.maxcmd) { /* warn if maxcmd is lower than queue_size */ diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index a1d476e1ac020..f2d0ce0f4d381 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -602,8 +602,8 @@ static ssize_t nvme_subsys_iopolicy_show(struct device *dev, struct nvme_subsystem *subsys = container_of(dev, struct nvme_subsystem, dev); - return sprintf(buf, "%s\n", - nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); + return sysfs_emit(buf, "%s\n", + nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]); } static ssize_t nvme_subsys_iopolicy_store(struct device *dev, @@ -628,7 +628,7 @@ SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR, static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); + return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid); } DEVICE_ATTR_RO(ana_grpid); @@ -637,7 +637,7 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, { struct nvme_ns *ns = nvme_get_ns_from_dev(dev); - return sprintf(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); + return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]); } DEVICE_ATTR_RO(ana_state); @@ -668,6 +668,10 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) if (desc.state) { /* found the group desc: update */ nvme_update_ns_ana_state(&desc, ns); + } else { + /* group desc not found: trigger a re-read */ + set_bit(NVME_NS_ANA_PENDING, &ns->flags); + queue_work(nvme_wq, &ns->ctrl->ana_work); } } else { ns->ana_state = NVME_ANA_OPTIMIZED; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 07b34175c6ce6..815c032a190ef 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -276,6 +276,9 @@ struct nvme_ctrl { u32 max_hw_sectors; u32 max_segments; u32 max_integrity_segments; + u32 max_discard_sectors; + u32 max_discard_segments; + u32 max_zeroes_sectors; #ifdef CONFIG_BLK_DEV_ZONED u32 max_zone_append; #endif @@ -599,7 +602,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); -int nvme_init_identify(struct nvme_ctrl *ctrl); +int nvme_init_ctrl_finish(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); @@ -623,8 +626,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl); struct request *nvme_alloc_request(struct request_queue *q, struct nvme_command *cmd, blk_mq_req_flags_t flags); void nvme_cleanup_cmd(struct request *req); -blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, - struct nvme_command *cmd); +blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req); int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, @@ -745,7 +747,7 @@ static inline void nvme_trace_bio_complete(struct request *req) static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { - if (ctrl->subsys->cmic & (1 << 3)) + if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) dev_warn(ctrl->device, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 7249ae74f71ff..d47bb18b976ad 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -224,6 +224,7 @@ struct nvme_queue { */ struct nvme_iod { struct nvme_request req; + struct nvme_command cmd; struct nvme_queue *nvmeq; bool use_sgl; int aborted; @@ -429,6 +430,7 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, iod->nvmeq = nvmeq; nvme_req(req)->ctrl = &dev->ctrl; + nvme_req(req)->cmd = &iod->cmd; return 0; } @@ -917,7 +919,7 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct nvme_command cmnd; + struct nvme_command *cmnd = &iod->cmd; blk_status_t ret; iod->aborted = 0; @@ -931,24 +933,24 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) return BLK_STS_IOERR; - ret = nvme_setup_cmd(ns, req, &cmnd); + ret = nvme_setup_cmd(ns, req); if (ret) return ret; if (blk_rq_nr_phys_segments(req)) { - ret = nvme_map_data(dev, req, &cmnd); + ret = nvme_map_data(dev, req, cmnd); if (ret) goto out_free_cmd; } if (blk_integrity_rq(req)) { - ret = nvme_map_metadata(dev, req, &cmnd); + ret = nvme_map_metadata(dev, req, cmnd); if (ret) goto out_unmap_data; } blk_mq_start_request(req); - nvme_submit_cmd(nvmeq, &cmnd, bd->last); + nvme_submit_cmd(nvmeq, cmnd, bd->last); return BLK_STS_OK; out_unmap_data: nvme_unmap_data(dev, req); @@ -1060,18 +1062,10 @@ static inline int nvme_process_cq(struct nvme_queue *nvmeq) static irqreturn_t nvme_irq(int irq, void *data) { struct nvme_queue *nvmeq = data; - irqreturn_t ret = IRQ_NONE; - /* - * The rmb/wmb pair ensures we see all updates from a previous run of - * the irq handler, even if that was on another CPU. - */ - rmb(); if (nvme_process_cq(nvmeq)) - ret = IRQ_HANDLED; - wmb(); - - return ret; + return IRQ_HANDLED; + return IRQ_NONE; } static irqreturn_t nvme_irq_check(int irq, void *data) @@ -2653,7 +2647,7 @@ static void nvme_reset_work(struct work_struct *work) */ dev->ctrl.max_integrity_segments = 1; - result = nvme_init_identify(&dev->ctrl); + result = nvme_init_ctrl_finish(&dev->ctrl); if (result) goto out; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 53ac4d7442ba9..d6bc43e6c8a64 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -314,6 +314,7 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, NVME_RDMA_DATA_SGL_SIZE; req->queue = queue; + nvme_req(rq)->cmd = req->sqe.data; return 0; } @@ -917,7 +918,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - error = nvme_init_identify(&ctrl->ctrl); + error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) goto out_quiesce_queue; @@ -2038,7 +2039,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq = bd->rq; struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_qe *sqe = &req->sqe; - struct nvme_command *c = sqe->data; + struct nvme_command *c = nvme_req(rq)->cmd; struct ib_device *dev; bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags); blk_status_t ret; @@ -2061,7 +2062,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, ib_dma_sync_single_for_cpu(dev, sqe->dma, sizeof(struct nvme_command), DMA_TO_DEVICE); - ret = nvme_setup_cmd(ns, rq, c); + ret = nvme_setup_cmd(ns, rq); if (ret) goto unmap_qe; diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 69f59d2c5799b..8e55d8bc0c50f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -417,6 +417,7 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, { struct nvme_tcp_ctrl *ctrl = set->driver_data; struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); + struct nvme_tcp_cmd_pdu *pdu; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx]; u8 hdgst = nvme_tcp_hdgst_len(queue); @@ -427,8 +428,10 @@ static int nvme_tcp_init_request(struct blk_mq_tag_set *set, if (!req->pdu) return -ENOMEM; + pdu = req->pdu; req->queue = queue; nvme_req(rq)->ctrl = &ctrl->ctrl; + nvme_req(rq)->cmd = &pdu->cmd; return 0; } @@ -867,7 +870,7 @@ static void nvme_tcp_state_change(struct sock *sk) { struct nvme_tcp_queue *queue; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; if (!queue) goto done; @@ -888,7 +891,7 @@ static void nvme_tcp_state_change(struct sock *sk) queue->state_change(sk); done: - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) @@ -1875,7 +1878,7 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) blk_mq_unquiesce_queue(ctrl->admin_q); - error = nvme_init_identify(ctrl); + error = nvme_init_ctrl_finish(ctrl); if (error) goto out_quiesce_queue; @@ -1963,6 +1966,11 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) goto destroy_admin; } + if (!(ctrl->sgls & ((1 << 0) | (1 << 1)))) { + dev_err(ctrl->device, "Mandatory sgls are not supported!\n"); + goto destroy_admin; + } + if (opts->queue_size > ctrl->sqsize + 1) dev_warn(ctrl->device, "queue_size %zu > ctrl sqsize %u, clamping down\n", @@ -2259,7 +2267,7 @@ static blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0; blk_status_t ret; - ret = nvme_setup_cmd(ns, rq, &pdu->cmd); + ret = nvme_setup_cmd(ns, rq); if (ret) return ret; diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index fe6b8aa90b534..f4cc32674edd0 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -513,7 +513,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) default: id->nuse = id->nsze; break; - } + } if (req->ns->bdev) nvmet_bdev_set_limits(req->ns->bdev, id); @@ -940,7 +940,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) if (nvmet_req_subsys(req)->type == NVME_NQN_DISC) return nvmet_parse_discovery_cmd(req); - ret = nvmet_check_ctrl_status(req, cmd); + ret = nvmet_check_ctrl_status(req); if (unlikely(ret)) return ret; diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index e5dbd1923b7b7..125ef2c65d5fd 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1149,6 +1149,12 @@ static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, if (!len) return -EINVAL; + if (len > NVMET_MN_MAX_SIZE) { + pr_err("Model nubmer size can not exceed %d Bytes\n", + NVMET_MN_MAX_SIZE); + return -EINVAL; + } + for (pos = 0; pos < len; pos++) { if (!nvmet_is_ascii(page[pos])) return -EINVAL; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index be6fcdaf51a7b..adbede9ab7f3f 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -864,10 +864,9 @@ static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) static u16 nvmet_parse_io_cmd(struct nvmet_req *req) { - struct nvme_command *cmd = req->cmd; u16 ret; - ret = nvmet_check_ctrl_status(req, cmd); + ret = nvmet_check_ctrl_status(req); if (unlikely(ret)) return ret; @@ -1179,19 +1178,19 @@ static void nvmet_init_cap(struct nvmet_ctrl *ctrl) ctrl->cap |= NVMET_QUEUE_SIZE - 1; } -u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, - struct nvmet_req *req, struct nvmet_ctrl **ret) +struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, + const char *hostnqn, u16 cntlid, + struct nvmet_req *req) { + struct nvmet_ctrl *ctrl = NULL; struct nvmet_subsys *subsys; - struct nvmet_ctrl *ctrl; - u16 status = 0; subsys = nvmet_find_get_subsys(req->port, subsysnqn); if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); - return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; + goto out; } mutex_lock(&subsys->lock); @@ -1204,33 +1203,34 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, if (!kref_get_unless_zero(&ctrl->ref)) continue; - *ret = ctrl; - goto out; + /* ctrl found */ + goto found; } } + ctrl = NULL; /* ctrl not found */ pr_warn("could not find controller %d for subsys %s / host %s\n", cntlid, subsysnqn, hostnqn); req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); - status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; -out: +found: mutex_unlock(&subsys->lock); nvmet_subsys_put(subsys); - return status; +out: + return ctrl; } -u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd) +u16 nvmet_check_ctrl_status(struct nvmet_req *req) { if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) { pr_err("got cmd %d while CC.EN == 0 on qid = %d\n", - cmd->common.opcode, req->sq->qid); + req->cmd->common.opcode, req->sq->qid); return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) { pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n", - cmd->common.opcode, req->sq->qid); + req->cmd->common.opcode, req->sq->qid); return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; } return 0; @@ -1311,10 +1311,10 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); + req->error_loc = offsetof(struct nvme_common_command, dptr); goto out; } - status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; down_read(&nvmet_config_sem); if (!nvmet_host_allowed(subsys, hostnqn)) { pr_info("connect by host %s for subsystem %s not allowed\n", @@ -1322,6 +1322,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); up_read(&nvmet_config_sem); status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; + req->error_loc = offsetof(struct nvme_common_command, dptr); goto out_put_subsystem; } up_read(&nvmet_config_sem); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 682854e0e079d..4845d12e374ac 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -178,12 +178,14 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) if (req->cmd->get_log_page.lid != NVME_LOG_DISC) { req->error_loc = offsetof(struct nvme_get_log_page_command, lid); - status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto out; } /* Spec requires dword aligned offsets */ if (offset & 0x3) { + req->error_loc = + offsetof(struct nvme_get_log_page_command, lpo); status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto out; } @@ -250,7 +252,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) { req->error_loc = offsetof(struct nvme_identify, cns); - status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; goto out; } diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 42bd12b8bf00c..1420a8e3e0b10 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -190,12 +190,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) status = nvmet_alloc_ctrl(d->subsysnqn, d->hostnqn, req, le32_to_cpu(c->kato), &ctrl); - if (status) { - if (status == (NVME_SC_INVALID_FIELD | NVME_SC_DNR)) - req->error_loc = - offsetof(struct nvme_common_command, opcode); + if (status) goto out; - } ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; @@ -222,7 +218,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) { struct nvmf_connect_command *c = &req->cmd->connect; struct nvmf_connect_data *d; - struct nvmet_ctrl *ctrl = NULL; + struct nvmet_ctrl *ctrl; u16 qid = le16_to_cpu(c->qid); u16 status = 0; @@ -249,11 +245,12 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out; } - status = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, - le16_to_cpu(d->cntlid), - req, &ctrl); - if (status) + ctrl = nvmet_ctrl_find_get(d->subsysnqn, d->hostnqn, + le16_to_cpu(d->cntlid), req); + if (!ctrl) { + status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; goto out; + } if (unlikely(qid > ctrl->subsys->max_qid)) { pr_warn("invalid queue id (%d)\n", qid); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index d375745fc4ed3..1f1c70f9f8eb0 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1996,6 +1996,7 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * @target_port: pointer to the (registered) target port the LS was * received on. + * @hosthandle: pointer to the host specific data, gets stored in iod. * @lsrsp: pointer to a lsrsp structure to be used to reference * the exchange corresponding to the LS. * @lsreqbuf: pointer to the buffer containing the LS Request diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index cb6f86572b24a..b741854fc957a 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -141,7 +141,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req); - ret = nvme_setup_cmd(ns, req, &iod->cmd); + ret = nvme_setup_cmd(ns, req); if (ret) return ret; @@ -205,8 +205,10 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, unsigned int numa_node) { struct nvme_loop_ctrl *ctrl = set->driver_data; + struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); nvme_req(req)->ctrl = &ctrl->ctrl; + nvme_req(req)->cmd = &iod->cmd; return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req), (set == &ctrl->tag_set) ? hctx_idx + 1 : 0); } @@ -396,7 +398,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - error = nvme_init_identify(&ctrl->ctrl); + error = nvme_init_ctrl_finish(&ctrl->ctrl); if (error) goto out_cleanup_queue; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 4b84edb49f22c..5566ed403576e 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -27,6 +27,7 @@ #define NVMET_ERROR_LOG_SLOTS 128 #define NVMET_NO_ERROR_LOC ((u16)-1) #define NVMET_DEFAULT_CTRL_MODEL "Linux" +#define NVMET_MN_MAX_SIZE 40 /* * Supported optional AENs: @@ -428,10 +429,11 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl); void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new); u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp); -u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, - struct nvmet_req *req, struct nvmet_ctrl **ret); +struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn, + const char *hostnqn, u16 cntlid, + struct nvmet_req *req); void nvmet_ctrl_put(struct nvmet_ctrl *ctrl); -u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd); +u16 nvmet_check_ctrl_status(struct nvmet_req *req); struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, enum nvme_subsys_type type); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 8b0485ada315b..558a973277fd7 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -29,6 +29,16 @@ static int so_priority; module_param(so_priority, int, 0644); MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority"); +/* Define a time period (in usecs) that io_work() shall sample an activated + * queue before determining it to be idle. This optional module behavior + * can enable NIC solutions that support socket optimized packet processing + * using advanced interrupt moderation techniques. + */ +static int idle_poll_period_usecs; +module_param(idle_poll_period_usecs, int, 0644); +MODULE_PARM_DESC(idle_poll_period_usecs, + "nvmet tcp io_work poll till idle time period in usecs"); + #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_IO_WORK_BUDGET 64 @@ -119,6 +129,8 @@ struct nvmet_tcp_queue { struct ahash_request *snd_hash; struct ahash_request *rcv_hash; + unsigned long poll_end; + spinlock_t state_lock; enum nvmet_tcp_queue_state state; @@ -1216,6 +1228,23 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) spin_unlock(&queue->state_lock); } +static inline void nvmet_tcp_arm_queue_deadline(struct nvmet_tcp_queue *queue) +{ + queue->poll_end = jiffies + usecs_to_jiffies(idle_poll_period_usecs); +} + +static bool nvmet_tcp_check_queue_deadline(struct nvmet_tcp_queue *queue, + int ops) +{ + if (!idle_poll_period_usecs) + return false; + + if (ops) + nvmet_tcp_arm_queue_deadline(queue); + + return !time_after(jiffies, queue->poll_end); +} + static void nvmet_tcp_io_work(struct work_struct *w) { struct nvmet_tcp_queue *queue = @@ -1241,9 +1270,10 @@ static void nvmet_tcp_io_work(struct work_struct *w) } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); /* - * We exahusted our budget, requeue our selves + * Requeue the worker if idle deadline period is in progress or any + * ops activity was recorded during the do-while loop above. */ - if (pending) + if (nvmet_tcp_check_queue_deadline(queue, ops) || pending) queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } @@ -1434,7 +1464,7 @@ static void nvmet_tcp_state_change(struct sock *sk) { struct nvmet_tcp_queue *queue; - write_lock_bh(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; if (!queue) goto done; @@ -1452,7 +1482,7 @@ static void nvmet_tcp_state_change(struct sock *sk) queue->idx, sk->sk_state); } done: - write_unlock_bh(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) @@ -1501,6 +1531,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) sock->sk->sk_state_change = nvmet_tcp_state_change; queue->write_space = sock->sk->sk_write_space; sock->sk->sk_write_space = nvmet_tcp_write_space; + if (idle_poll_period_usecs) + nvmet_tcp_arm_queue_deadline(queue); queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); } write_unlock_bh(&sock->sk->sk_callback_lock); diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b08787cd08812..edcbd60b88b98 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -405,6 +405,16 @@ struct nvme_id_ctrl_zns { __u8 rsvd1[4095]; }; +struct nvme_id_ctrl_nvm { + __u8 vsl; + __u8 wzsl; + __u8 wusl; + __u8 dmrl; + __le32 dmrsl; + __le64 dmsl; + __u8 rsvd16[4080]; +}; + enum { NVME_ID_CNS_NS = 0x00, NVME_ID_CNS_CTRL = 0x01,