From 458f280d716d3205214c8bb5d4271bd54e939a61 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 23 Apr 2017 08:30:06 -0700 Subject: [PATCH 01/39] nvme_fc: fix command id check The code validates the command_id in the response to the original sqe command. But prior code was using the rq->rqno as the sqe command id. The core layer overwrites what the transport set there originally. Use the actual sqe content. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 596b3a453b545..cccade5a18c23 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1192,6 +1192,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_fc_ctrl *ctrl = op->ctrl; struct nvme_fc_queue *queue = op->queue; struct nvme_completion *cqe = &op->rsp_iu.cqe; + struct nvme_command *sqe = &op->cmd_iu.sqe; __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); union nvme_result result; @@ -1274,7 +1275,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || op->rsp_iu.status_code || - op->rqno != le16_to_cpu(cqe->command_id))) { + sqe->common.command_id != cqe->command_id)) { status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); goto done; } From 78a7ac260e38d511d3f62a9e574cf34aac48d7d3 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 23 Apr 2017 08:30:07 -0700 Subject: [PATCH 02/39] nvme_fc: add aen abort to teardown Add abort support for aens. Commonized the op abort to apply to aen or real ios (caused some reorg/routine movement). Abort path sets termination flag in prep for next patch that will be watching i/o abort completion before proceeding with controller teardown. Now that we're aborting aens, the "exit" code that simply cleared out their context no longer applies. Also clarified how we detect an AEN vs a normal io - by a flag, not by whether a rq exists or the a rqno is out of range. Note: saw some interesting cases where if the queues are stopped and we're waiting for the aborts, the core layer can call the complete_rq callback for the io. So the io completion synchronizes link side completion with possible blk layer completion under error. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 191 ++++++++++++++++++++++++++++++----------- 1 file changed, 139 insertions(+), 52 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index cccade5a18c23..b7118db11ea7a 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -65,6 +65,7 @@ enum nvme_fcop_flags { FCOP_FLAGS_TERMIO = (1 << 0), FCOP_FLAGS_RELEASED = (1 << 1), FCOP_FLAGS_COMPLETE = (1 << 2), + FCOP_FLAGS_AEN = (1 << 3), }; struct nvmefc_ls_req_op { @@ -86,6 +87,7 @@ enum nvme_fcpop_state { FCPOP_STATE_IDLE = 1, FCPOP_STATE_ACTIVE = 2, FCPOP_STATE_ABORTED = 3, + FCPOP_STATE_COMPLETE = 4, }; struct nvme_fc_fcp_op { @@ -104,6 +106,7 @@ struct nvme_fc_fcp_op { struct request *rq; atomic_t state; + u32 flags; u32 rqno; u32 nents; @@ -1132,6 +1135,7 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) /* *********************** NVME Ctrl Routines **************************** */ +static void __nvme_fc_final_op_cleanup(struct request *rq); static int nvme_fc_reinit_request(void *data, struct request *rq) @@ -1169,20 +1173,74 @@ nvme_fc_exit_request(void *data, struct request *rq, return __nvme_fc_exit_request(data, op); } +static int +__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) +{ + int state; + + state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); + if (state != FCPOP_STATE_ACTIVE) { + atomic_set(&op->state, state); + return -ECANCELED; + } + + ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, + &ctrl->rport->remoteport, + op->queue->lldd_handle, + &op->fcp_req); + + return 0; +} + static void -nvme_fc_exit_aen_ops(struct nvme_fc_ctrl *ctrl) +nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) { struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; - int i; + unsigned long flags; + int i, ret; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { - if (atomic_read(&aen_op->state) == FCPOP_STATE_UNINIT) + if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) continue; - __nvme_fc_exit_request(ctrl, aen_op); - nvme_fc_ctrl_put(ctrl); + + spin_lock_irqsave(&ctrl->lock, flags); + aen_op->flags |= FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + ret = __nvme_fc_abort_op(ctrl, aen_op); + if (ret) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ + + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + aen_op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + return; + } } } +static inline int +__nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, + struct nvme_fc_fcp_op *op) +{ + unsigned long flags; + bool complete_rq = false; + + spin_lock_irqsave(&ctrl->lock, flags); + if (op->flags & FCOP_FLAGS_RELEASED) + complete_rq = true; + else + op->flags |= FCOP_FLAGS_COMPLETE; + spin_unlock_irqrestore(&ctrl->lock, flags); + + return complete_rq; +} + void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { @@ -1195,6 +1253,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) struct nvme_command *sqe = &op->cmd_iu.sqe; __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); union nvme_result result; + bool complete_rq; /* * WARNING: @@ -1289,13 +1348,25 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) } done: - if (!queue->qnum && op->rqno >= AEN_CMDID_BASE) { + if (op->flags & FCOP_FLAGS_AEN) { nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags = FCOP_FLAGS_AEN; /* clear other flags */ nvme_fc_ctrl_put(ctrl); return; } - nvme_end_request(rq, status, result); + complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); + if (!complete_rq) { + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + status = cpu_to_le16(NVME_SC_ABORT_REQ); + if (blk_queue_dying(rq->q)) + status |= cpu_to_le16(NVME_SC_DNR); + } + nvme_end_request(rq, status, result); + } else + __nvme_fc_final_op_cleanup(rq); } static int @@ -1388,8 +1459,11 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) if (ret) return ret; + aen_op->flags = FCOP_FLAGS_AEN; + memset(sqe, 0, sizeof(*sqe)); sqe->common.opcode = nvme_admin_async_event; + /* Note: core layer may overwrite the sqe.command_id value */ sqe->common.command_id = AEN_CMDID_BASE + i; } return 0; @@ -1644,34 +1718,12 @@ nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl) nvme_fc_free_io_queues(ctrl); } - nvme_fc_exit_aen_ops(ctrl); - nvme_fc_destroy_admin_queue(ctrl); } nvme_fc_ctrl_put(ctrl); } - -static int -__nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) -{ - int state; - - state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); - if (state != FCPOP_STATE_ACTIVE) { - atomic_set(&op->state, state); - return -ECANCELED; /* fail */ - } - - ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, - &ctrl->rport->remoteport, - op->queue->lldd_handle, - &op->fcp_req); - - return 0; -} - enum blk_eh_timer_return nvme_fc_timeout(struct request *rq, bool reserved) { @@ -1830,10 +1882,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); sqe->rw.dptr.sgl.addr = 0; - /* odd that we set the command_id - should come from nvme-fabrics */ - WARN_ON_ONCE(sqe->common.command_id != cpu_to_le16(op->rqno)); - - if (op->rq) { /* skipped on aens */ + if (!(op->flags & FCOP_FLAGS_AEN)) { ret = nvme_fc_map_data(ctrl, op->rq, op); if (ret < 0) { dev_err(queue->ctrl->ctrl.device, @@ -1850,7 +1899,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, atomic_set(&op->state, FCPOP_STATE_ACTIVE); - if (op->rq) + if (!(op->flags & FCOP_FLAGS_AEN)) blk_mq_start_request(op->rq); ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, @@ -1967,13 +2016,14 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) } static void -nvme_fc_complete_rq(struct request *rq) +__nvme_fc_final_op_cleanup(struct request *rq) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); struct nvme_fc_ctrl *ctrl = op->ctrl; - int state; - state = atomic_xchg(&op->state, FCPOP_STATE_IDLE); + atomic_set(&op->state, FCPOP_STATE_IDLE); + op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | + FCOP_FLAGS_COMPLETE); nvme_cleanup_cmd(rq); nvme_fc_unmap_data(ctrl, rq, op); @@ -1982,6 +2032,33 @@ nvme_fc_complete_rq(struct request *rq) } +static void +nvme_fc_complete_rq(struct request *rq) +{ + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); + struct nvme_fc_ctrl *ctrl = op->ctrl; + unsigned long flags; + bool completed = false; + + /* + * the core layer, on controller resets after calling + * nvme_shutdown_ctrl(), calls complete_rq without our + * calling blk_mq_complete_request(), thus there may still + * be live i/o outstanding with the LLDD. Means transport has + * to track complete calls vs fcpio_done calls to know what + * path to take on completes and dones. + */ + spin_lock_irqsave(&ctrl->lock, flags); + if (op->flags & FCOP_FLAGS_COMPLETE) + completed = true; + else + op->flags |= FCOP_FLAGS_RELEASED; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (completed) + __nvme_fc_final_op_cleanup(rq); +} + static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, .complete = nvme_fc_complete_rq, @@ -2105,25 +2182,32 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_ctrl *nctrl = data; struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); -int status; + unsigned long flags; + int status; if (!blk_mq_request_started(req)) return; - /* this performs an ABTS-LS on the FC exchange for the io */ + spin_lock_irqsave(&ctrl->lock, flags); + op->flags |= FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + status = __nvme_fc_abort_op(ctrl, op); - /* - * if __nvme_fc_abort_op failed: io wasn't active to abort - * consider it done. Assume completion path already completing - * in parallel - */ - if (status) - /* io wasn't active to abort consider it done */ - /* assume completion path already completing in parallel */ + if (status) { + /* + * if __nvme_fc_abort_op failed the io wasn't + * active. Thus this call path is running in + * parallel to the io complete. Treat as non-error. + */ + + /* back out the flags/counters */ + spin_lock_irqsave(&ctrl->lock, flags); + op->flags &= ~FCOP_FLAGS_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); return; + } } - /* * This routine stops operation of the controller. Admin and IO queues * are stopped, outstanding ios on them terminated, and the nvme ctrl @@ -2161,6 +2245,9 @@ nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_fc_terminate_exchange, &ctrl->ctrl); + + /* kill the aens as they are a separate path */ + nvme_fc_abort_aen_ops(ctrl); } /* @@ -2406,12 +2493,12 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_fc_init_aen_ops(ctrl); if (ret) - goto out_exit_aen_ops; + goto out_stop_keep_alive; if (ctrl->queue_count > 1) { ret = nvme_fc_create_io_queues(ctrl); if (ret) - goto out_exit_aen_ops; + goto out_stop_keep_alive; } spin_lock_irqsave(&ctrl->lock, flags); @@ -2438,8 +2525,8 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, return &ctrl->ctrl; -out_exit_aen_ops: - nvme_fc_exit_aen_ops(ctrl); +out_stop_keep_alive: + nvme_stop_keep_alive(&ctrl->ctrl); out_remove_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); From 61bff8ef0088459e32bc0a7cc8c23144d324df90 Mon Sep 17 00:00:00 2001 From: James Smart Date: Sun, 23 Apr 2017 08:30:08 -0700 Subject: [PATCH 03/39] nvme_fc: add controller reset support This patch actually does quite a few things. When looking to add controller reset support, the organization modeled after rdma was very fragmented. rdma duplicates the reset and teardown paths and does different things to the block layer on the two paths. The code to build up the controller is also duplicated between the initial creation and the reset/error recovery paths. So I decided to make this sane. I reorganized the controller creation and teardown so that there is a connect path and a disconnect path. Initial creation obviously uses the connect path. Controller teardown will use the disconnect path, followed last access code. Controller reset will use the disconnect path to stop operation, and then the connect path to re-establish the controller. Along the way, several things were fixed - aens were not properly set up. They are allocated differently from the per-request structure on the blk queues. - aens were oddly torn down. the prior patch corrected to abort, but we still need to dma unmap and free relative elements. - missed a few ref counting points: in aen completion and on i/o's that fail - controller initial create failure paths were still confused vs teardown before converting to ref counting vs after we convert to refcounting. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 1049 ++++++++++++++++++++++++++-------------- 1 file changed, 675 insertions(+), 374 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index b7118db11ea7a..5aa52863ba54e 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "nvme.h" #include "fabrics.h" @@ -44,6 +45,8 @@ enum nvme_fc_queue_flags { #define NVMEFC_QUEUE_DELAY 3 /* ms units */ +#define NVME_FC_MAX_CONNECT_ATTEMPTS 1 + struct nvme_fc_queue { struct nvme_fc_ctrl *ctrl; struct device *dev; @@ -137,19 +140,17 @@ struct nvme_fc_rport { struct kref ref; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_state { - FCCTRL_INIT = 0, - FCCTRL_ACTIVE = 1, +enum nvme_fcctrl_flags { + FCCTRL_TERMIO = (1 << 0), }; struct nvme_fc_ctrl { spinlock_t lock; struct nvme_fc_queue *queues; - u32 queue_count; - struct device *dev; struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + u32 queue_count; u32 cnum; u64 association_id; @@ -162,8 +163,14 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set tag_set; struct work_struct delete_work; + struct work_struct reset_work; + struct delayed_work connect_work; + int reconnect_delay; + int connect_attempts; + struct kref ref; - int state; + u32 flags; + u32 iocnt; struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; @@ -1204,7 +1211,10 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) continue; spin_lock_irqsave(&ctrl->lock, flags); - aen_op->flags |= FCOP_FLAGS_TERMIO; + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + aen_op->flags |= FCOP_FLAGS_TERMIO; + } spin_unlock_irqrestore(&ctrl->lock, flags); ret = __nvme_fc_abort_op(ctrl, aen_op); @@ -1217,6 +1227,8 @@ nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) /* back out the flags/counters */ spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; aen_op->flags &= ~FCOP_FLAGS_TERMIO; spin_unlock_irqrestore(&ctrl->lock, flags); return; @@ -1232,6 +1244,10 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, bool complete_rq = false; spin_lock_irqsave(&ctrl->lock, flags); + if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; + } if (op->flags & FCOP_FLAGS_RELEASED) complete_rq = true; else @@ -1447,19 +1463,29 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; + void *private; int i, ret; aen_op = ctrl->aen_ops; for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + GFP_KERNEL); + if (!private) + return -ENOMEM; + cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], aen_op, (struct request *)NULL, (AEN_CMDID_BASE + i)); - if (ret) + if (ret) { + kfree(private); return ret; + } aen_op->flags = FCOP_FLAGS_AEN; + aen_op->fcp_req.first_sgl = NULL; /* no sg list */ + aen_op->fcp_req.private = private; memset(sqe, 0, sizeof(*sqe)); sqe->common.opcode = nvme_admin_async_event; @@ -1469,6 +1495,23 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) return 0; } +static void +nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) +{ + struct nvme_fc_fcp_op *aen_op; + int i; + + aen_op = ctrl->aen_ops; + for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { + if (!aen_op->fcp_req.private) + continue; + + __nvme_fc_exit_request(ctrl, aen_op); + + kfree(aen_op->fcp_req.private); + aen_op->fcp_req.private = NULL; + } +} static inline void __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, @@ -1567,15 +1610,6 @@ __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, queue->lldd_handle = NULL; } -static void -nvme_fc_destroy_admin_queue(struct nvme_fc_ctrl *ctrl) -{ - __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); - blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvme_fc_free_queue(&ctrl->queues[0]); -} - static void nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) { @@ -1663,17 +1697,24 @@ nvme_fc_ctrl_free(struct kref *ref) container_of(ref, struct nvme_fc_ctrl, ref); unsigned long flags; - if (ctrl->state != FCCTRL_INIT) { - /* remove from rport list */ - spin_lock_irqsave(&ctrl->rport->lock, flags); - list_del(&ctrl->ctrl_list); - spin_unlock_irqrestore(&ctrl->rport->lock, flags); + if (ctrl->ctrl.tagset) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + blk_mq_free_tag_set(&ctrl->tag_set); } + /* remove from rport list */ + spin_lock_irqsave(&ctrl->rport->lock, flags); + list_del(&ctrl->ctrl_list); + spin_unlock_irqrestore(&ctrl->rport->lock, flags); + + blk_cleanup_queue(ctrl->ctrl.admin_q); + blk_mq_free_tag_set(&ctrl->admin_tag_set); + + kfree(ctrl->queues); + put_device(ctrl->dev); nvme_fc_rport_put(ctrl->rport); - kfree(ctrl->queues); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); @@ -1696,32 +1737,35 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) * controller. Called after last nvme_put_ctrl() call */ static void -nvme_fc_free_nvme_ctrl(struct nvme_ctrl *nctrl) +nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); WARN_ON(nctrl != &ctrl->ctrl); - /* - * Tear down the association, which will generate link - * traffic to terminate connections - */ - - if (ctrl->state != FCCTRL_INIT) { - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); + nvme_fc_ctrl_put(ctrl); +} - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_fc_delete_hw_io_queues(ctrl); - nvme_fc_free_io_queues(ctrl); - } +static void +nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) +{ + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: transport association error detected: %s\n", + ctrl->cnum, errmsg); + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: resetting controller\n", ctrl->cnum); - nvme_fc_destroy_admin_queue(ctrl); + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Couldn't change state " + "to RECONNECTING\n", ctrl->cnum); + return; } - nvme_fc_ctrl_put(ctrl); + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: error_recovery: Failed to schedule " + "reset work\n", ctrl->cnum); } enum blk_eh_timer_return @@ -1740,11 +1784,13 @@ nvme_fc_timeout(struct request *rq, bool reserved) return BLK_EH_HANDLED; /* - * TODO: force a controller reset - * when that happens, queues will be torn down and outstanding - * ios will be terminated, and the above abort, on a single io - * will no longer be needed. + * we can't individually ABTS an io without affecting the queue, + * thus killing the queue, adn thus the association. + * So resolve by performing a controller reset, which will stop + * the host/io stack, terminate the association on the link, + * and recreate an association on the link. */ + nvme_fc_error_recovery(ctrl, "io timeout error"); return BLK_EH_HANDLED; } @@ -1838,6 +1884,13 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u32 csn; int ret; + /* + * before attempting to send the io, check to see if we believe + * the target device is present + */ + if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) + return BLK_MQ_RQ_QUEUE_ERROR; + if (!nvme_fc_ctrl_get(ctrl)) return BLK_MQ_RQ_QUEUE_ERROR; @@ -1885,8 +1938,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (!(op->flags & FCOP_FLAGS_AEN)) { ret = nvme_fc_map_data(ctrl, op->rq, op); if (ret < 0) { - dev_err(queue->ctrl->ctrl.device, - "Failed to map data (%d)\n", ret); nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); return (ret == -ENOMEM || ret == -EAGAIN) ? @@ -1907,9 +1958,6 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, queue->lldd_handle, &op->fcp_req); if (ret) { - dev_err(ctrl->dev, - "Send nvme command failed - lldd returned %d.\n", ret); - if (op->rq) { /* normal request */ nvme_fc_unmap_data(ctrl, op->rq, op); nvme_cleanup_cmd(op->rq); @@ -1979,12 +2027,8 @@ nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) struct nvme_fc_fcp_op *op; req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); - if (!req) { - dev_err(queue->ctrl->ctrl.device, - "tag 0x%x on QNum %#x not found\n", - tag, queue->qnum); + if (!req) return 0; - } op = blk_mq_rq_to_pdu(req); @@ -2001,11 +2045,21 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; + unsigned long flags; + bool terminating = false; int ret; if (aer_idx > NVME_FC_NR_AEN_COMMANDS) return; + spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + terminating = true; + spin_unlock_irqrestore(&ctrl->lock, flags); + + if (terminating) + return; + aen_op = &ctrl->aen_ops[aer_idx]; ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, @@ -2059,110 +2113,6 @@ nvme_fc_complete_rq(struct request *rq) __nvme_fc_final_op_cleanup(rq); } -static const struct blk_mq_ops nvme_fc_mq_ops = { - .queue_rq = nvme_fc_queue_rq, - .complete = nvme_fc_complete_rq, - .init_request = nvme_fc_init_request, - .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, - .init_hctx = nvme_fc_init_hctx, - .poll = nvme_fc_poll, - .timeout = nvme_fc_timeout, -}; - -static const struct blk_mq_ops nvme_fc_admin_mq_ops = { - .queue_rq = nvme_fc_queue_rq, - .complete = nvme_fc_complete_rq, - .init_request = nvme_fc_init_admin_request, - .exit_request = nvme_fc_exit_request, - .reinit_request = nvme_fc_reinit_request, - .init_hctx = nvme_fc_init_admin_hctx, - .timeout = nvme_fc_timeout, -}; - -static int -nvme_fc_configure_admin_queue(struct nvme_fc_ctrl *ctrl) -{ - u32 segs; - int error; - - nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); - - error = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], - NVME_FC_AQ_BLKMQ_DEPTH, - (NVME_FC_AQ_BLKMQ_DEPTH / 4)); - if (error) - return error; - - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_free_queue; - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; - } - - error = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, - NVME_FC_AQ_BLKMQ_DEPTH); - if (error) - goto out_cleanup_queue; - - error = nvmf_connect_admin_queue(&ctrl->ctrl); - if (error) - goto out_delete_hw_queue; - - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); - if (error) { - dev_err(ctrl->ctrl.device, - "prop_get NVME_REG_CAP failed\n"); - goto out_delete_hw_queue; - } - - ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); - - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); - if (error) - goto out_delete_hw_queue; - - segs = min_t(u32, NVME_FC_MAX_SEGMENTS, - ctrl->lport->ops->max_sgl_segments); - ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); - - error = nvme_init_identify(&ctrl->ctrl); - if (error) - goto out_delete_hw_queue; - - nvme_start_keep_alive(&ctrl->ctrl); - - return 0; - -out_delete_hw_queue: - __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); -out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_tagset: - blk_mq_free_tag_set(&ctrl->admin_tag_set); -out_free_queue: - nvme_fc_free_queue(&ctrl->queues[0]); - return error; -} - /* * This routine is used by the transport when it needs to find active * io on a queue that is to be terminated. The transport uses @@ -2189,7 +2139,10 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) return; spin_lock_irqsave(&ctrl->lock, flags); - op->flags |= FCOP_FLAGS_TERMIO; + if (ctrl->flags & FCCTRL_TERMIO) { + ctrl->iocnt++; + op->flags |= FCOP_FLAGS_TERMIO; + } spin_unlock_irqrestore(&ctrl->lock, flags); status = __nvme_fc_abort_op(ctrl, op); @@ -2202,144 +2155,101 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) /* back out the flags/counters */ spin_lock_irqsave(&ctrl->lock, flags); + if (ctrl->flags & FCCTRL_TERMIO) + ctrl->iocnt--; op->flags &= ~FCOP_FLAGS_TERMIO; spin_unlock_irqrestore(&ctrl->lock, flags); return; } } -/* - * This routine stops operation of the controller. Admin and IO queues - * are stopped, outstanding ios on them terminated, and the nvme ctrl - * is shutdown. - */ -static void -nvme_fc_shutdown_ctrl(struct nvme_fc_ctrl *ctrl) -{ - /* - * If io queues are present, stop them and terminate all outstanding - * ios on them. As FC allocates FC exchange for each io, the - * transport must contact the LLDD to terminate the exchange, - * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() - * to tell us what io's are busy and invoke a transport routine - * to kill them with the LLDD. After terminating the exchange - * the LLDD will call the transport's normal io done path, but it - * will have an aborted status. The done path will return the - * io requests back to the block layer as part of normal completions - * (but with error status). - */ - if (ctrl->queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - } - - if (ctrl->ctrl.state == NVME_CTRL_LIVE) - nvme_shutdown_ctrl(&ctrl->ctrl); - - /* - * now clean up the admin queue. Same thing as above. - * use blk_mq_tagset_busy_itr() and the transport routine to - * terminate the exchanges. - */ - blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - /* kill the aens as they are a separate path */ - nvme_fc_abort_aen_ops(ctrl); -} +static const struct blk_mq_ops nvme_fc_mq_ops = { + .queue_rq = nvme_fc_queue_rq, + .complete = nvme_fc_complete_rq, + .init_request = nvme_fc_init_request, + .exit_request = nvme_fc_exit_request, + .reinit_request = nvme_fc_reinit_request, + .init_hctx = nvme_fc_init_hctx, + .poll = nvme_fc_poll, + .timeout = nvme_fc_timeout, +}; -/* - * Called to teardown an association. - * May be called with association fully in place or partially in place. - */ -static void -__nvme_fc_remove_ctrl(struct nvme_fc_ctrl *ctrl) +static int +nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) { - nvme_stop_keep_alive(&ctrl->ctrl); + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + int ret; - /* stop and terminate ios on admin and io queues */ - nvme_fc_shutdown_ctrl(ctrl); + ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); + if (ret) { + dev_info(ctrl->ctrl.device, + "set_queue_count failed: %d\n", ret); + return ret; + } - /* - * tear down the controller - * This will result in the last reference on the nvme ctrl to - * expire, calling the transport nvme_fc_free_nvme_ctrl() callback. - * From there, the transport will tear down it's logical queues and - * association. - */ - nvme_uninit_ctrl(&ctrl->ctrl); + ctrl->queue_count = opts->nr_io_queues + 1; + if (!opts->nr_io_queues) + return 0; - nvme_put_ctrl(&ctrl->ctrl); -} + dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", + opts->nr_io_queues); -static void -nvme_fc_del_ctrl_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, delete_work); + nvme_fc_init_io_queues(ctrl); - __nvme_fc_remove_ctrl(ctrl); -} + memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); + ctrl->tag_set.ops = &nvme_fc_mq_ops; + ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; + ctrl->tag_set.reserved_tags = 1; /* fabric connect */ + ctrl->tag_set.numa_node = NUMA_NO_NODE; + ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; + ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + (SG_CHUNK_SIZE * + sizeof(struct scatterlist)) + + ctrl->lport->ops->fcprqst_priv_sz; + ctrl->tag_set.driver_data = ctrl; + ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; + ctrl->tag_set.timeout = NVME_IO_TIMEOUT; -static int -__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) -{ - if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) - return -EBUSY; + ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + if (ret) + return ret; - if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) - return -EBUSY; + ctrl->ctrl.tagset = &ctrl->tag_set; - return 0; -} + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } -/* - * Request from nvme core layer to delete the controller - */ -static int -nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) -{ - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_rport *rport = ctrl->rport; - unsigned long flags; - int ret; - - spin_lock_irqsave(&rport->lock, flags); - ret = __nvme_fc_del_ctrl(ctrl); - spin_unlock_irqrestore(&rport->lock, flags); + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); if (ret) - return ret; + goto out_cleanup_blk_queue; - flush_work(&ctrl->delete_work); + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + if (ret) + goto out_delete_hw_queues; return 0; -} -static int -nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) -{ - return -EIO; -} +out_delete_hw_queues: + nvme_fc_delete_hw_io_queues(ctrl); +out_cleanup_blk_queue: + nvme_stop_keep_alive(&ctrl->ctrl); + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + blk_mq_free_tag_set(&ctrl->tag_set); + nvme_fc_free_io_queues(ctrl); -static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { - .name = "fc", - .module = THIS_MODULE, - .is_fabrics = true, - .reg_read32 = nvmf_reg_read32, - .reg_read64 = nvmf_reg_read64, - .reg_write32 = nvmf_reg_write32, - .reset_ctrl = nvme_fc_reset_nvme_ctrl, - .free_ctrl = nvme_fc_free_nvme_ctrl, - .submit_async_event = nvme_fc_submit_async_event, - .delete_ctrl = nvme_fc_del_nvme_ctrl, - .get_subsysnqn = nvmf_get_subsysnqn, - .get_address = nvmf_get_address, -}; + /* force put free routine to ignore io queues */ + ctrl->ctrl.tagset = NULL; + + return ret; +} static int -nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) +nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; int ret; @@ -2351,44 +2261,22 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) return ret; } - ctrl->queue_count = opts->nr_io_queues + 1; - if (!opts->nr_io_queues) + /* check for io queues existing */ + if (ctrl->queue_count == 1) return 0; - dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", + dev_info(ctrl->ctrl.device, "Recreating %d I/O queues.\n", opts->nr_io_queues); nvme_fc_init_io_queues(ctrl); - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_fc_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + - (SG_CHUNK_SIZE * - sizeof(struct scatterlist)) + - ctrl->lport->ops->fcprqst_priv_sz; - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); + ret = blk_mq_reinit_tagset(&ctrl->tag_set); if (ret) - return ret; - - ctrl->ctrl.tagset = &ctrl->tag_set; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } + goto out_free_io_queues; ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); if (ret) - goto out_cleanup_blk_queue; + goto out_free_io_queues; ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); if (ret) @@ -2398,28 +2286,440 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) out_delete_hw_queues: nvme_fc_delete_hw_io_queues(ctrl); -out_cleanup_blk_queue: - nvme_stop_keep_alive(&ctrl->ctrl); - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); +out_free_io_queues: nvme_fc_free_io_queues(ctrl); + return ret; +} - /* force put free routine to ignore io queues */ - ctrl->ctrl.tagset = NULL; +/* + * This routine restarts the controller on the host side, and + * on the link side, recreates the controller association. + */ +static int +nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) +{ + struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + u32 segs; + int ret; + bool changed; + + ctrl->connect_attempts++; + + /* + * Create the admin queue + */ + + nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); + + ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, + NVME_FC_AQ_BLKMQ_DEPTH); + if (ret) + goto out_free_queue; + + ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], + NVME_FC_AQ_BLKMQ_DEPTH, + (NVME_FC_AQ_BLKMQ_DEPTH / 4)); + if (ret) + goto out_delete_hw_queue; + + if (ctrl->ctrl.state != NVME_CTRL_NEW) + blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); + + ret = nvmf_connect_admin_queue(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* + * Check controller capabilities + * + * todo:- add code to check if ctrl attributes changed from + * prior connection values + */ + + ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); + if (ret) { + dev_err(ctrl->ctrl.device, + "prop_get NVME_REG_CAP failed\n"); + goto out_disconnect_admin_queue; + } + + ctrl->ctrl.sqsize = + min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); + + ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); + if (ret) + goto out_disconnect_admin_queue; + + segs = min_t(u32, NVME_FC_MAX_SEGMENTS, + ctrl->lport->ops->max_sgl_segments); + ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); + + ret = nvme_init_identify(&ctrl->ctrl); + if (ret) + goto out_disconnect_admin_queue; + + /* sanity checks */ + + /* FC-NVME does not have other data in the capsule */ + if (ctrl->ctrl.icdoff) { + dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", + ctrl->ctrl.icdoff); + goto out_disconnect_admin_queue; + } + + nvme_start_keep_alive(&ctrl->ctrl); + + /* FC-NVME supports normal SGL Data Block Descriptors */ + + if (opts->queue_size > ctrl->ctrl.maxcmd) { + /* warn if maxcmd is lower than queue_size */ + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl maxcmd %u, reducing " + "to queue_size\n", + opts->queue_size, ctrl->ctrl.maxcmd); + opts->queue_size = ctrl->ctrl.maxcmd; + } + + ret = nvme_fc_init_aen_ops(ctrl); + if (ret) + goto out_term_aen_ops; + + /* + * Create the io queues + */ + + if (ctrl->queue_count > 1) { + if (ctrl->ctrl.state == NVME_CTRL_NEW) + ret = nvme_fc_create_io_queues(ctrl); + else + ret = nvme_fc_reinit_io_queues(ctrl); + if (ret) + goto out_term_aen_ops; + } + + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + WARN_ON_ONCE(!changed); + + ctrl->connect_attempts = 0; + + kref_get(&ctrl->ctrl.kref); + + if (ctrl->queue_count > 1) { + nvme_start_queues(&ctrl->ctrl); + nvme_queue_scan(&ctrl->ctrl); + nvme_queue_async_events(&ctrl->ctrl); + } + + return 0; /* Success */ + +out_term_aen_ops: + nvme_fc_term_aen_ops(ctrl); + nvme_stop_keep_alive(&ctrl->ctrl); +out_disconnect_admin_queue: + /* send a Disconnect(association) LS to fc-nvme target */ + nvme_fc_xmt_disconnect_assoc(ctrl); +out_delete_hw_queue: + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); +out_free_queue: + nvme_fc_free_queue(&ctrl->queues[0]); return ret; } +/* + * This routine stops operation of the controller on the host side. + * On the host os stack side: Admin and IO queues are stopped, + * outstanding ios on them terminated via FC ABTS. + * On the link side: the association is terminated. + */ +static void +nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) +{ + unsigned long flags; + + nvme_stop_keep_alive(&ctrl->ctrl); + + spin_lock_irqsave(&ctrl->lock, flags); + ctrl->flags |= FCCTRL_TERMIO; + ctrl->iocnt = 0; + spin_unlock_irqrestore(&ctrl->lock, flags); + + /* + * If io queues are present, stop them and terminate all outstanding + * ios on them. As FC allocates FC exchange for each io, the + * transport must contact the LLDD to terminate the exchange, + * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() + * to tell us what io's are busy and invoke a transport routine + * to kill them with the LLDD. After terminating the exchange + * the LLDD will call the transport's normal io done path, but it + * will have an aborted status. The done path will return the + * io requests back to the block layer as part of normal completions + * (but with error status). + */ + if (ctrl->queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + blk_mq_tagset_busy_iter(&ctrl->tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + } + + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controlelr is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ + + /* + * clean up the admin queue. Same thing as above. + * use blk_mq_tagset_busy_itr() and the transport routine to + * terminate the exchanges. + */ + blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + + /* kill the aens as they are a separate path */ + nvme_fc_abort_aen_ops(ctrl); + + /* wait for all io that had to be aborted */ + spin_lock_irqsave(&ctrl->lock, flags); + while (ctrl->iocnt) { + spin_unlock_irqrestore(&ctrl->lock, flags); + msleep(1000); + spin_lock_irqsave(&ctrl->lock, flags); + } + ctrl->flags &= ~FCCTRL_TERMIO; + spin_unlock_irqrestore(&ctrl->lock, flags); + + nvme_fc_term_aen_ops(ctrl); + + /* + * send a Disconnect(association) LS to fc-nvme target + * Note: could have been sent at top of process, but + * cleaner on link traffic if after the aborts complete. + * Note: if association doesn't exist, association_id will be 0 + */ + if (ctrl->association_id) + nvme_fc_xmt_disconnect_assoc(ctrl); + + if (ctrl->ctrl.tagset) { + nvme_fc_delete_hw_io_queues(ctrl); + nvme_fc_free_io_queues(ctrl); + } + + __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); + nvme_fc_free_queue(&ctrl->queues[0]); +} + +static void +nvme_fc_delete_ctrl_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, delete_work); + + cancel_work_sync(&ctrl->reset_work); + cancel_delayed_work_sync(&ctrl->connect_work); + + /* + * kill the association on the link side. this will block + * waiting for io to terminate + */ + nvme_fc_delete_association(ctrl); + + /* + * tear down the controller + * This will result in the last reference on the nvme ctrl to + * expire, calling the transport nvme_fc_nvme_ctrl_freed() callback. + * From there, the transport will tear down it's logical queues and + * association. + */ + nvme_uninit_ctrl(&ctrl->ctrl); + + nvme_put_ctrl(&ctrl->ctrl); +} + +static int +__nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) +{ + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) + return -EBUSY; + + if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) + return -EBUSY; + + return 0; +} + +/* + * Request from nvme core layer to delete the controller + */ +static int +nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + int ret; + + if (!kref_get_unless_zero(&ctrl->ctrl.kref)) + return -EBUSY; + + ret = __nvme_fc_del_ctrl(ctrl); + + if (!ret) + flush_workqueue(nvme_fc_wq); + + nvme_put_ctrl(&ctrl->ctrl); + + return ret; +} + +static void +nvme_fc_reset_ctrl_work(struct work_struct *work) +{ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, reset_work); + int ret; + + /* will block will waiting for io to terminate */ + nvme_fc_delete_association(ctrl); + + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } + + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); +} + +/* + * called by the nvme core layer, for sysfs interface that requests + * a reset of the nvme controller + */ +static int +nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) +{ + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) + return -EBUSY; + + if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) + return -EBUSY; + + flush_work(&ctrl->reset_work); + + return 0; +} + +static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { + .name = "fc", + .module = THIS_MODULE, + .is_fabrics = true, + .reg_read32 = nvmf_reg_read32, + .reg_read64 = nvmf_reg_read64, + .reg_write32 = nvmf_reg_write32, + .reset_ctrl = nvme_fc_reset_nvme_ctrl, + .free_ctrl = nvme_fc_nvme_ctrl_freed, + .submit_async_event = nvme_fc_submit_async_event, + .delete_ctrl = nvme_fc_del_nvme_ctrl, + .get_subsysnqn = nvmf_get_subsysnqn, + .get_address = nvmf_get_address, +}; + +static void +nvme_fc_connect_ctrl_work(struct work_struct *work) +{ + int ret; + + struct nvme_fc_ctrl *ctrl = + container_of(to_delayed_work(work), + struct nvme_fc_ctrl, connect_work); + + ret = nvme_fc_create_association(ctrl); + if (ret) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt failed (%d)\n", + ctrl->cnum, ret); + if (ctrl->connect_attempts >= NVME_FC_MAX_CONNECT_ATTEMPTS) { + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Max reconnect attempts (%d) " + "reached. Removing controller\n", + ctrl->cnum, ctrl->connect_attempts); + + if (!nvme_change_ctrl_state(&ctrl->ctrl, + NVME_CTRL_DELETING)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to change state " + "to DELETING\n", ctrl->cnum); + return; + } + + WARN_ON(!queue_work(nvme_fc_wq, &ctrl->delete_work)); + return; + } + + dev_warn(ctrl->ctrl.device, + "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", + ctrl->cnum, ctrl->reconnect_delay); + queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, + ctrl->reconnect_delay * HZ); + } else + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: controller reconnect complete\n", + ctrl->cnum); +} + + +static const struct blk_mq_ops nvme_fc_admin_mq_ops = { + .queue_rq = nvme_fc_queue_rq, + .complete = nvme_fc_complete_rq, + .init_request = nvme_fc_init_admin_request, + .exit_request = nvme_fc_exit_request, + .reinit_request = nvme_fc_reinit_request, + .init_hctx = nvme_fc_init_admin_hctx, + .timeout = nvme_fc_timeout, +}; + static struct nvme_ctrl * -__nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, +nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) { struct nvme_fc_ctrl *ctrl; unsigned long flags; int ret, idx; - bool changed; ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); if (!ctrl) { @@ -2438,17 +2738,15 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->lport = lport; ctrl->rport = rport; ctrl->dev = lport->dev; - ctrl->state = FCCTRL_INIT; ctrl->cnum = idx; - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); - if (ret) - goto out_free_ida; - get_device(ctrl->dev); kref_init(&ctrl->ref); - INIT_WORK(&ctrl->delete_work, nvme_fc_del_ctrl_work); + INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); + INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); + INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); + ctrl->reconnect_delay = opts->reconnect_delay; spin_lock_init(&ctrl->lock); /* io queue count */ @@ -2465,87 +2763,86 @@ __nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), GFP_KERNEL); if (!ctrl->queues) - goto out_uninit_ctrl; - - ret = nvme_fc_configure_admin_queue(ctrl); - if (ret) - goto out_uninit_ctrl; - - /* sanity checks */ - - /* FC-NVME does not have other data in the capsule */ - if (ctrl->ctrl.icdoff) { - dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", - ctrl->ctrl.icdoff); - goto out_remove_admin_queue; - } - - /* FC-NVME supports normal SGL Data Block Descriptors */ + goto out_free_ida; - if (opts->queue_size > ctrl->ctrl.maxcmd) { - /* warn if maxcmd is lower than queue_size */ - dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl maxcmd %u, reducing " - "to queue_size\n", - opts->queue_size, ctrl->ctrl.maxcmd); - opts->queue_size = ctrl->ctrl.maxcmd; - } + memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); + ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; + ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ + ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + + (SG_CHUNK_SIZE * + sizeof(struct scatterlist)) + + ctrl->lport->ops->fcprqst_priv_sz; + ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.nr_hw_queues = 1; + ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - ret = nvme_fc_init_aen_ops(ctrl); + ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) - goto out_stop_keep_alive; + goto out_free_queues; - if (ctrl->queue_count > 1) { - ret = nvme_fc_create_io_queues(ctrl); - if (ret) - goto out_stop_keep_alive; + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + ret = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_admin_tag_set; } - spin_lock_irqsave(&ctrl->lock, flags); - ctrl->state = FCCTRL_ACTIVE; - spin_unlock_irqrestore(&ctrl->lock, flags); - - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - WARN_ON_ONCE(!changed); + /* + * Would have been nice to init io queues tag set as well. + * However, we require interaction from the controller + * for max io queue count before we can do so. + * Defer this to the connect path. + */ - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", - ctrl->cnum, ctrl->ctrl.opts->subsysnqn); + ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); + if (ret) + goto out_cleanup_admin_q; - kref_get(&ctrl->ctrl.kref); + /* at this point, teardown path changes to ref counting on nvme ctrl */ spin_lock_irqsave(&rport->lock, flags); list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); spin_unlock_irqrestore(&rport->lock, flags); - if (opts->nr_io_queues) { - nvme_queue_scan(&ctrl->ctrl); - nvme_queue_async_events(&ctrl->ctrl); + ret = nvme_fc_create_association(ctrl); + if (ret) { + /* initiate nvme ctrl ref counting teardown */ + nvme_uninit_ctrl(&ctrl->ctrl); + nvme_put_ctrl(&ctrl->ctrl); + + /* as we're past the point where we transition to the ref + * counting teardown path, if we return a bad pointer here, + * the calling routine, thinking it's prior to the + * transition, will do an rport put. Since the teardown + * path also does a rport put, we do an extra get here to + * so proper order/teardown happens. + */ + nvme_fc_rport_get(rport); + + if (ret > 0) + ret = -EIO; + return ERR_PTR(ret); } - return &ctrl->ctrl; + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", + ctrl->cnum, ctrl->ctrl.opts->subsysnqn); -out_stop_keep_alive: - nvme_stop_keep_alive(&ctrl->ctrl); -out_remove_admin_queue: - /* send a Disconnect(association) LS to fc-nvme target */ - nvme_fc_xmt_disconnect_assoc(ctrl); - nvme_stop_keep_alive(&ctrl->ctrl); - nvme_fc_destroy_admin_queue(ctrl); -out_uninit_ctrl: - nvme_uninit_ctrl(&ctrl->ctrl); - nvme_put_ctrl(&ctrl->ctrl); - if (ret > 0) - ret = -EIO; - /* exit via here will follow ctlr ref point callbacks to free */ - return ERR_PTR(ret); + return &ctrl->ctrl; +out_cleanup_admin_q: + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_admin_tag_set: + blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_free_queues: + kfree(ctrl->queues); out_free_ida: + put_device(ctrl->dev); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); out_free_ctrl: kfree(ctrl); out_fail: - nvme_fc_rport_put(rport); /* exit via here doesn't follow ctlr ref points */ return ERR_PTR(ret); } @@ -2617,6 +2914,7 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { struct nvme_fc_lport *lport; struct nvme_fc_rport *rport; + struct nvme_ctrl *ctrl; struct nvmet_fc_traddr laddr = { 0L, 0L }; struct nvmet_fc_traddr raddr = { 0L, 0L }; unsigned long flags; @@ -2648,7 +2946,10 @@ nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) spin_unlock_irqrestore(&nvme_fc_lock, flags); - return __nvme_fc_create_ctrl(dev, opts, lport, rport); + ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); + if (IS_ERR(ctrl)) + nvme_fc_rport_put(rport); + return ctrl; } } spin_unlock_irqrestore(&nvme_fc_lock, flags); From baee29ac17e6363c9ec843de6b83afc95932e9c4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Apr 2017 10:44:06 +0200 Subject: [PATCH 04/39] nvme-fc: mark two symbols static Found by sparse. Signed-off-by: Christoph Hellwig Reviewed-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5aa52863ba54e..e73862ebb8b46 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1257,7 +1257,7 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, return complete_rq; } -void +static void nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) { struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); @@ -1768,7 +1768,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) "reset work\n", ctrl->cnum); } -enum blk_eh_timer_return +static enum blk_eh_timer_return nvme_fc_timeout(struct request *rq, bool reserved) { struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); From edba98dd46fb6dca98d3e6913a86e1d54f02c440 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Apr 2017 10:37:59 +0200 Subject: [PATCH 05/39] nvmet-fc: mark nvmet_fc_handle_fcp_rqst static Found by sparse. Signed-off-by: Christoph Hellwig Reviewed-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 074bd3743b5fc..3626dd8cc8a13 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2055,7 +2055,7 @@ nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req) /* * Actual processing routine for received FC-NVME LS Requests from the LLD */ -void +static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod) { From 3f5e118848118d132b1dabdbb9687ea140756c1e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Apr 2017 10:40:22 +0200 Subject: [PATCH 06/39] nvmet-fc: fix endianess annoations for nvmet_fc_format_rsp_hdr The passed in desc_len is a big endian value, so mark it as such. Found by sparse. Signed-off-by: Christoph Hellwig Reviewed-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3626dd8cc8a13..d504c52ac9cb9 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1058,7 +1058,7 @@ EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); static void -nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, u32 desc_len, u8 rqst_ls_cmd) +nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) { struct fcnvme_ls_acc_hdr *acc = buf; From f63688a610958a410e9cab7b4d41b77a29912379 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Apr 2017 10:42:23 +0200 Subject: [PATCH 07/39] nvmet-fc: mark the sqhd field as __le16 That's what it's used as. Found by sparse. Signed-off-by: Christoph Hellwig Reviewed-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index d504c52ac9cb9..4392b514725b5 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -119,7 +119,7 @@ struct nvmet_fc_tgt_queue { u16 qid; u16 sqsize; u16 ersp_ratio; - u16 sqhd; + __le16 sqhd; int cpu; atomic_t connected; atomic_t sqtail; From 8ad76cf10012b625800a9764c5af08dbc0948ff7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Apr 2017 10:43:13 +0200 Subject: [PATCH 08/39] nvmet-fc: properly endian swap sq_head Found by sparse. Signed-off-by: Christoph Hellwig Reviewed-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/fc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 4392b514725b5..62eba29c85fb9 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1700,7 +1700,7 @@ nvmet_fc_prep_fcp_rsp(struct nvmet_fc_tgtport *tgtport, xfr_length != fod->total_length || (le16_to_cpu(cqe->status) & 0xFFFE) || cqewd[0] || cqewd[1] || (sqe->flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND)) || - queue_90percent_full(fod->queue, cqe->sq_head)) + queue_90percent_full(fod->queue, le16_to_cpu(cqe->sq_head))) send_ersp = true; /* re-set the fields */ From 36b8890e91690fa392fa0bf7913fed79b156c194 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Apr 2017 10:37:25 +0200 Subject: [PATCH 09/39] nvmet-fcloop: mark two symbols static Found by sparse. Signed-off-by: Christoph Hellwig Reviewed-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/fcloop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index aaa3dbe22bd5e..15551ef79c8c4 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -666,7 +666,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) #define FCLOOP_SGL_SEGS 256 #define FCLOOP_DMABOUND_4G 0xFFFFFFFF -struct nvme_fc_port_template fctemplate = { +static struct nvme_fc_port_template fctemplate = { .localport_delete = fcloop_localport_delete, .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, @@ -686,7 +686,7 @@ struct nvme_fc_port_template fctemplate = { .fcprqst_priv_sz = sizeof(struct fcloop_ini_fcpreq), }; -struct nvmet_fc_target_template tgttemplate = { +static struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, .xmt_ls_rsp = fcloop_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, From a44e4e8b6bf22926be6ed63f99def5e62953ac08 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:45 -0700 Subject: [PATCH 10/39] Standardize nvme SGL segment count Standardize default SGL segment count for nvme target and initiator The driver needs to make them the same for clarity. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_nvme.h | 4 +--- drivers/scsi/lpfc/lpfc_nvmet.c | 2 +- drivers/scsi/lpfc/lpfc_nvmet.h | 4 +--- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 1347deb8dd6cb..62777965f92f6 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -21,9 +21,7 @@ * included with this package. * ********************************************************************/ -#define LPFC_NVME_MIN_SEGS 16 -#define LPFC_NVME_DEFAULT_SEGS 66 /* 256K IOs - 64 + 2 */ -#define LPFC_NVME_MAX_SEGS 510 +#define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVMET_MIN_POSTBUF 16 #define LPFC_NVMET_DEFAULT_POSTBUF 1024 #define LPFC_NVMET_MAX_POSTBUF 4096 diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index d488c3318d4bd..c8a49418b7358 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -709,7 +709,7 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) pinfo.port_id = vport->fc_myDID; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; - lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt; + lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt + 1; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_CMD_IN_ISR | diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index 02735fc6fd411..d8bac4c61541b 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -21,9 +21,7 @@ * included with this package. * ********************************************************************/ -#define LPFC_NVMET_MIN_SEGS 16 -#define LPFC_NVMET_DEFAULT_SEGS 64 /* 256K IOs */ -#define LPFC_NVMET_MAX_SEGS 510 +#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVMET_SUCCESS_LEN 12 /* Used for NVME Target */ From 975ff31c77348c567b4c12418d456446c50006dd Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:46 -0700 Subject: [PATCH 11/39] Fix nvme unregister port timeout. During some link event testing it was observed that the wait_for_completion_timeout in the lpfc_nvme_unregister_port was timing out all the time. The initiator is claiming the nvme_fc_unregister_remoteport upcall is not completing the unregister in the time allotted. [ 2186.151317] lpfc 0000:07:00.0: 0:(0):6169 Unreg nvme wait failed 0 The wait_for_completion_timeout returns 0 when the wait has been outstanding for the jiffies passed by the caller. In this error message, the nvme initiator passed value 5 - meaning 5 jiffies - and this is just wrong. Calculate 5 seconds in Jiffies and pass that value from the current jiffies. Also the log message for the unregister timeout was reduced because timeout failure is the same as timeout. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_nvme.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 0024de1c6c1fe..a39d72c2e7215 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2409,6 +2409,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) struct lpfc_nvme_lport *lport; struct lpfc_nvme_rport *rport; struct nvme_fc_remote_port *remoteport; + unsigned long wait_tmo; localport = vport->localport; @@ -2451,11 +2452,12 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) * before proceeding. This guarantees the transport and driver * have completed the unreg process. */ - ret = wait_for_completion_timeout(&rport->rport_unreg_done, 5); + wait_tmo = msecs_to_jiffies(5000); + ret = wait_for_completion_timeout(&rport->rport_unreg_done, + wait_tmo); if (ret == 0) { lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6169 Unreg nvme wait failed %d\n", - ret); + "6169 Unreg nvme wait timeout\n"); } } return; From ba43c4d0fe5ab053c058fd7e2402eb8ba1c41074 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:47 -0700 Subject: [PATCH 12/39] Fix rejected nvme LS Req. In this case, the NVME initiator is sending an LS REQ command on an NDLP that is not MAPPED. The FW rejects it. The lpfc_nvme_ls_req routine checks for a NULL ndlp pointer but does not check the NDLP state. This allows the routine to send an LS IO when the ndlp is disconnected. Check the ndlp for NULL, actual node, Target and MAPPED or Initiator and UNMAPPED. This avoids Fabric nodes getting the Create Association or Create Connection commands. Initiators are free to Reject either Create. Also some of the messages numbers in lpfc_nvme_ls_req were changed because they were already used in other log messages. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_nvme.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index a39d72c2e7215..cbf3fe3f8637a 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -401,6 +401,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nodelist *ndlp; struct ulp_bde64 *bpl; struct lpfc_dmabuf *bmp; + uint16_t ntype, nstate; /* there are two dma buf in the request, actually there is one and * the second one is just the start address + cmd size. @@ -417,11 +418,26 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, vport = lport->vport; ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); - if (!ndlp) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6043 Could not find node for DID %x\n", + if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, + "6051 DID x%06x not an active rport.\n", pnvme_rport->port_id); - return 1; + return -ENODEV; + } + + /* The remote node has to be a mapped nvme target or an + * unmapped nvme initiator or it's an error. + */ + ntype = ndlp->nlp_type; + nstate = ndlp->nlp_state; + if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) || + (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) { + lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, + "6088 DID x%06x not ready for " + "IO. State x%x, Type x%x\n", + pnvme_rport->port_id, + ndlp->nlp_state, ndlp->nlp_type); + return -ENODEV; } bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); if (!bmp) { @@ -456,7 +472,7 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, /* Expand print to include key fields. */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, - "6051 ENTER. lport %p, rport %p lsreq%p rqstlen:%d " + "6149 ENTER. lport %p, rport %p lsreq%p rqstlen:%d " "rsplen:%d %pad %pad\n", pnvme_lport, pnvme_rport, pnvme_lsreq, pnvme_lsreq->rqstlen, From c154e750d38e561e076e9d2fc6ffaa18820f4fa1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:48 -0700 Subject: [PATCH 13/39] Fix log message in completion path. In the lpfc_nvme_io_cmd_wqe_cmpl routine the driver was printing two pointers and the DID for the rport whenever an IO completed on a now that had transitioned to a non active state. There is no need to print the node pointer address for a node that is not active the DID should be enough to debug. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_nvme.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index cbf3fe3f8637a..ee1a285d08209 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -788,9 +788,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, ndlp = rport->ndlp; if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, - "6061 rport %p, ndlp %p, DID x%06x ndlp " - "not ready.\n", - rport, ndlp, rport->remoteport->port_id); + "6061 rport %p, DID x%06x node not ready.\n", + rport, rport->remoteport->port_id); ndlp = lpfc_findnode_did(vport, rport->remoteport->port_id); if (!ndlp) { From e8c0a7793523e607bace8e457aa1bad0b16215ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:49 -0700 Subject: [PATCH 14/39] Add debug messages for nvme/fcp resource allocation. The xri resources are split into pools for NVME and FCP IO when NVME is enabled. There was not message in the log that identified this allocation. Added debug message to log XRI split. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_init.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 6cc561b042118..b56da015e3b36 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -3508,6 +3508,12 @@ lpfc_sli4_scsi_sgl_update(struct lpfc_hba *phba) spin_unlock(&phba->scsi_buf_list_put_lock); spin_unlock_irq(&phba->scsi_buf_list_get_lock); + lpfc_printf_log(phba, KERN_INFO, LOG_SLI, + "6060 Current allocated SCSI xri-sgl count:%d, " + "maximum SCSI xri count:%d (split:%d)\n", + phba->sli4_hba.scsi_xri_cnt, + phba->sli4_hba.scsi_xri_max, phba->cfg_xri_split); + if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) { /* max scsi xri shrinked below the allocated scsi buffers */ scsi_xri_cnt = phba->sli4_hba.scsi_xri_cnt - From 0ef699684cfaaa8d3b7993e12d5a2066dbb1d6e9 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:50 -0700 Subject: [PATCH 15/39] Fix spelling in comments. Comment should have said Repost. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_sli.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1c9fa45df7eb5..7087c5546db21 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -6338,7 +6338,7 @@ lpfc_sli4_get_allocated_extnts(struct lpfc_hba *phba, uint16_t type, } /** - * lpfc_sli4_repost_sgl_list - Repsot the buffers sgl pages as block + * lpfc_sli4_repost_sgl_list - Repost the buffers sgl pages as block * @phba: pointer to lpfc hba data structure. * @pring: Pointer to driver SLI ring object. * @sgl_list: linked link of sgl buffers to post From 06909bb91f6477db3dd40d5c5fe0364cd114a918 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:51 -0700 Subject: [PATCH 16/39] Remove unused defines for NVME PostBuf. These defines for the posting of buffers for nvmet target were not used. Removing the unused defines. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_nvme.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 62777965f92f6..2582f46edf05c 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -22,9 +22,6 @@ ********************************************************************/ #define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */ -#define LPFC_NVMET_MIN_POSTBUF 16 -#define LPFC_NVMET_DEFAULT_POSTBUF 1024 -#define LPFC_NVMET_MAX_POSTBUF 4096 #define LPFC_NVME_WQSIZE 256 #define LPFC_NVME_ERSP_LEN 0x20 From eafe89f556ef1fe1b07ab8c1b4ce453ca1da1e41 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:52 -0700 Subject: [PATCH 17/39] Remove NULL ptr check before kfree. The check for NULL ptr is not necessary, kfree will check it. Removing NULL ptr check. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_debugfs.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 913eed822cb8e..76857e2db5096 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -5700,10 +5700,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) #ifdef CONFIG_SCSI_LPFC_DEBUG_FS struct lpfc_hba *phba = vport->phba; - if (vport->disc_trc) { - kfree(vport->disc_trc); - vport->disc_trc = NULL; - } + kfree(vport->disc_trc); + vport->disc_trc = NULL; debugfs_remove(vport->debug_disc_trc); /* discovery_trace */ vport->debug_disc_trc = NULL; @@ -5770,10 +5768,8 @@ lpfc_debugfs_terminate(struct lpfc_vport *vport) debugfs_remove(phba->debug_readRef); /* readRef */ phba->debug_readRef = NULL; - if (phba->slow_ring_trc) { - kfree(phba->slow_ring_trc); - phba->slow_ring_trc = NULL; - } + kfree(phba->slow_ring_trc); + phba->slow_ring_trc = NULL; /* slow_ring_trace */ debugfs_remove(phba->debug_slow_ring_trc); From afbb38fedaa25935b963b07cc10f72a25bb3acbb Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:53 -0700 Subject: [PATCH 18/39] Fix extra line print in rqpair debug print. An extra blank line was being added the the rqpair printing. Remove the extra line feed. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_debugfs.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 76857e2db5096..55a8d8ffcfd4d 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -3128,8 +3128,6 @@ __lpfc_idiag_print_rqpair(struct lpfc_queue *qp, struct lpfc_queue *datqp, datqp->queue_id, datqp->entry_count, datqp->entry_size, datqp->host_index, datqp->hba_index); - len += snprintf(pbuffer + len, LPFC_QUE_INFO_GET_BUF_SIZE - len, "\n"); - return len; } From c07f10cd56ae30d3f1f1c89c81dffd6fe6bb2223 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:54 -0700 Subject: [PATCH 19/39] Fix PRLI ACC rsp for NVME PRLI ACC from target is FCP oriented. Word 0 was wrong. This was noticed by another nvmet-fc vendor that was testing the lpfc nvme-fc initiator with their target. Verified results with analyzer. PRLI BC B5 56 56 22 61 04 00 00 61 00 00 01 29 00 00 20 00 00 00 00 10 FF FF 00 00 00 00 20 14 00 18 28 00 00 00 00 00 00 00 00 00 00 00 00 00 00 20 00 00 00 00 9C D8 DA C9 BC 95 75 75 ACC BC B5 56 56 23 61 00 00 00 61 04 00 01 98 00 00 30 00 00 00 00 10 00 18 00 00 00 00 02 14 00 18 28 00 01 00 00 00 00 00 00 00 00 00 00 00 00 18 00 00 00 00 B0 6B 07 57 BC B5 75 75 Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_els.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d9c61d030034d..d6e0f58717982 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -4403,7 +4403,7 @@ lpfc_els_rsp_prli_acc(struct lpfc_vport *vport, struct lpfc_iocbq *oldiocb, pcmd = (uint8_t *) (((struct lpfc_dmabuf *) elsiocb->context2)->virt); memset(pcmd, 0, cmdsize); - *((uint32_t *) (pcmd)) = (ELS_CMD_ACC | (ELS_CMD_PRLI & ~ELS_RSP_MASK)); + *((uint32_t *)(pcmd)) = elsrspcmd; pcmd += sizeof(uint32_t); /* For PRLI, remainder of payload is PRLI parameter page */ From d1f525aaa4d7e575a655365b6ae01a2a1c5fb321 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:55 -0700 Subject: [PATCH 20/39] Fix driver unload/reload operation. There are couple of different load/unload issues fixed with this patch. One of the issues was reported by Junichi Nomura, a patch was submitted by Johannes Thumsrhirn which did fix one of the problems but the fix in this patch separates the pring free from the queue free and does not set the parameter passed in to NULL. issues: (1) driver could not be unloaded and reloaded without some Oops or Panic occurring. (2) The driver was panicking because of a corruption in the Memory Manager when the iocb list was getting allocated. Root cause for the memory corruption was a double free of the Work Queue ring pointer memory - Freed once in the lpfc_sli4_queue_free when the CQ was destroyed and again in lpfc_sli4_queue_free when the WQ was destroyed. The pring free and the queue free were separated, the pring free was moved to the wq destroy routine because it a better fit logically to delete the ring with the wq. The checkpatch flagged several alignmenet issues that were also corrected with this patch. The mboxq was never initialed correctly before it was used by the driver this patch corrects that issue. Reported-by: Junichi Nomura Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Tested-by: Junichi Nomura --- drivers/scsi/lpfc/lpfc_init.c | 46 +++++++++++++++++++---------------- drivers/scsi/lpfc/lpfc_sli.c | 7 +++++- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index b56da015e3b36..cca7f81357c3b 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -5815,6 +5815,12 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_vfi_blk_list); INIT_LIST_HEAD(&phba->lpfc_vpi_blk_list); + /* Initialize mboxq lists. If the early init routines fail + * these lists need to be correctly initialized. + */ + INIT_LIST_HEAD(&phba->sli.mboxq); + INIT_LIST_HEAD(&phba->sli.mboxq_cmpl); + /* initialize optic_state to 0xFF */ phba->sli4_hba.lnk_info.optic_state = 0xff; @@ -5880,6 +5886,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) "READ_NV, mbxStatus x%x\n", bf_get(lpfc_mqe_command, &mboxq->u.mqe), bf_get(lpfc_mqe_status, &mboxq->u.mqe)); + mempool_free(mboxq, phba->mbox_mem_pool); rc = -EIO; goto out_free_bsmbx; } @@ -7805,7 +7812,7 @@ lpfc_alloc_fcp_wq_cq(struct lpfc_hba *phba, int wqidx) /* Create Fast Path FCP WQs */ wqesize = (phba->fcp_embed_io) ? - LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; + LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount); if (!qdesc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -7836,7 +7843,7 @@ int lpfc_sli4_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; - int idx, io_channel, max; + int idx, io_channel; /* * Create HBA Record arrays. @@ -7997,15 +8004,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) if (lpfc_alloc_nvme_wq_cq(phba, idx)) goto out_error; - /* allocate MRQ CQs */ - max = phba->cfg_nvme_io_channel; - if (max < phba->cfg_nvmet_mrq) - max = phba->cfg_nvmet_mrq; - - for (idx = 0; idx < max; idx++) - if (lpfc_alloc_nvme_wq_cq(phba, idx)) - goto out_error; - if (phba->nvmet_support) { for (idx = 0; idx < phba->cfg_nvmet_mrq; idx++) { qdesc = lpfc_sli4_queue_alloc(phba, @@ -8227,11 +8225,11 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba) /* Release FCP cqs */ lpfc_sli4_release_queues(&phba->sli4_hba.fcp_cq, - phba->cfg_fcp_io_channel); + phba->cfg_fcp_io_channel); /* Release FCP wqs */ lpfc_sli4_release_queues(&phba->sli4_hba.fcp_wq, - phba->cfg_fcp_io_channel); + phba->cfg_fcp_io_channel); /* Release FCP CQ mapping array */ lpfc_sli4_release_queue_map(&phba->sli4_hba.fcp_cq_map); @@ -8577,15 +8575,15 @@ lpfc_sli4_queue_setup(struct lpfc_hba *phba) lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0528 %s not allocated\n", phba->sli4_hba.mbx_cq ? - "Mailbox WQ" : "Mailbox CQ"); + "Mailbox WQ" : "Mailbox CQ"); rc = -ENOMEM; goto out_destroy; } rc = lpfc_create_wq_cq(phba, phba->sli4_hba.hba_eq[0], - phba->sli4_hba.mbx_cq, - phba->sli4_hba.mbx_wq, - NULL, 0, LPFC_MBOX); + phba->sli4_hba.mbx_cq, + phba->sli4_hba.mbx_wq, + NULL, 0, LPFC_MBOX); if (rc) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, "0529 Failed setup of mailbox WQ/CQ: rc = 0x%x\n", @@ -10054,9 +10052,14 @@ lpfc_sli4_hba_unset(struct lpfc_hba *phba) /* Stop kthread signal shall trigger work_done one more time */ kthread_stop(phba->worker_thread); + /* Unset the queues shared with the hardware then release all + * allocated resources. + */ + lpfc_sli4_queue_unset(phba); + lpfc_sli4_queue_destroy(phba); + /* Reset SLI4 HBA FCoE function */ lpfc_pci_function_reset(phba); - lpfc_sli4_queue_destroy(phba); /* Stop the SLI4 device port */ phba->pport->work_port_events = 0; @@ -10312,6 +10315,7 @@ lpfc_pci_probe_one_s3(struct pci_dev *pdev, const struct pci_device_id *pid) } /* Initialize and populate the iocb list per host */ + error = lpfc_init_iocb_list(phba, LPFC_IOCB_LIST_CNT); if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, @@ -11092,7 +11096,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) } /* Initialize and populate the iocb list per host */ - lpfc_printf_log(phba, KERN_INFO, LOG_INIT, "2821 initialize iocb list %d.\n", phba->cfg_iocb_cnt*1024); @@ -11183,7 +11186,9 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) if ((phba->nvmet_support == 0) && (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) { /* Create NVME binding with nvme_fc_transport. This - * ensures the vport is initialized. + * ensures the vport is initialized. If the localport + * create fails, it should not unload the driver to + * support field issues. */ error = lpfc_nvme_create_localport(vport); if (error) { @@ -11191,7 +11196,6 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) "6004 NVME registration failed, " "error x%x\n", error); - goto out_disable_intr; } } diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 7087c5546db21..d60694469f459 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -13758,7 +13758,10 @@ lpfc_sli4_queue_free(struct lpfc_queue *queue) lpfc_free_rq_buffer(queue->phba, queue); kfree(queue->rqbp); } - kfree(queue->pring); + + if (!list_empty(&queue->wq_list)) + list_del(&queue->wq_list); + kfree(queue); return; } @@ -15561,6 +15564,8 @@ lpfc_wq_destroy(struct lpfc_hba *phba, struct lpfc_queue *wq) } /* Remove wq from any list */ list_del_init(&wq->list); + kfree(wq->pring); + wq->pring = NULL; mempool_free(mbox, wq->phba->mbox_mem_pool); return status; } From 3f247de750b8dd8f50a2c1390e2a1238790a9dff Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:56 -0700 Subject: [PATCH 21/39] Fix driver usage of 128B WQEs when WQ_CREATE is V1. There are two versions of a structure for queue creation and setup that the driver shares with FW. The driver was only treating as version 0. Verify WQ_CREATE with 128B WQEs in V0 and V1. Code review of another bug showed the driver passing 128B WQEs and 8 pages in WQ CREATE and V0. Code inspection/instrumentation showed that the driver uses V0 in WQ_CREATE and if the caller passes queue->entry_size 128B, the driver sets the hdr_version to V1 so all is good. When I tested the V1 WQ_CREATE, the mailbox failed causing the driver to unload. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_sli.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index d60694469f459..1519fdfc1aa3e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -14741,6 +14741,9 @@ lpfc_wq_create(struct lpfc_hba *phba, struct lpfc_queue *wq, case LPFC_Q_CREATE_VERSION_1: bf_set(lpfc_mbx_wq_create_wqe_count, &wq_create->u.request_1, wq->entry_count); + bf_set(lpfc_mbox_hdr_version, &shdr->request, + LPFC_Q_CREATE_VERSION_1); + switch (wq->entry_size) { default: case 64: From 4410a67a9e53d3cf8d1b88169c642d91f1292fb2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:57 -0700 Subject: [PATCH 22/39] Fix nvme initiator handling when not enabled. Fix nvme initiator handline when CONFIG_LPFC_NVME_INITIATOR is not enabled. With update nvme upstream driver sources, loading the driver with nvme enabled resulting in this Oops. BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: lpfc_nvme_update_localport+0x23/0xd0 [lpfc] PGD 0 Oops: 0000 [#1] SMP CPU: 0 PID: 10256 Comm: lpfc_worker_0 Tainted Hardware name: ... task: ffff881028191c40 task.stack: ffff880ffdf00000 RIP: 0010:lpfc_nvme_update_localport+0x23/0xd0 [lpfc] RSP: 0018:ffff880ffdf03c20 EFLAGS: 00010202 Cause: As the initiator driver completes discovery at different stages, it call lpfc_nvme_update_localport to hint that the DID and role may have changed. In the implementation of lpfc_nvme_update_localport, the driver was not validating the localport or the lport during the execution of the update_localport routine. With the recent upstream additions to the driver, the create_localport routine didn't run and so the localport was NULL causing the page-fault Oops. Fix: Add the CONFIG_LPFC_NVME_INITIATOR preprocessor inclusions to lpfc_nvme_update_localport to turn off all routine processing when the running kernel does not have NVME configured. Add NULL pointer checks on the localport and lport in lpfc_nvme_update_localport and dump messages if they are NULL and just exit. Also one alingment issue fixed. Repalces the ifdef with the IS_ENABLED macro. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_attr.c | 2 +- drivers/scsi/lpfc/lpfc_nvme.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 22819afbaef5c..4b32d021f7d98 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3335,7 +3335,7 @@ LPFC_ATTR_R(enable_fc4_type, LPFC_ENABLE_FCP, * percentage will go to NVME. */ LPFC_ATTR_R(xri_split, 50, 10, 90, - "Division of XRI resources between SCSI and NVME"); + "Division of XRI resources between SCSI and NVME"); /* # lpfc_log_verbose: Only turn this flag on if you are willing to risk being diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index ee1a285d08209..264da8f02c976 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -2264,12 +2264,23 @@ lpfc_nvme_destroy_localport(struct lpfc_vport *vport) void lpfc_nvme_update_localport(struct lpfc_vport *vport) { +#if (IS_ENABLED(CONFIG_NVME_FC)) struct nvme_fc_local_port *localport; struct lpfc_nvme_lport *lport; localport = vport->localport; + if (!localport) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME, + "6710 Update NVME fail. No localport\n"); + return; + } lport = (struct lpfc_nvme_lport *)localport->private; - + if (!lport) { + lpfc_printf_vlog(vport, KERN_WARNING, LOG_NVME, + "6171 Update NVME fail. localP %p, No lport\n", + localport); + return; + } lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME, "6012 Update NVME lport %p did x%x\n", localport, vport->fc_myDID); @@ -2283,7 +2294,7 @@ lpfc_nvme_update_localport(struct lpfc_vport *vport) lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, "6030 bound lport %p to DID x%06x\n", lport, localport->port_id); - +#endif } int From 59c6e13ecd50fdb2d872617d194e05f2c31f26d7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:58 -0700 Subject: [PATCH 23/39] Remove hba lock from NVMET issue WQE. Unnecessary lock is taken. ring lock should be sufficient to protect the work queue submission. This was noticed when doing performance testing. The hbalock is not needed to issue io to the nvme work queue. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_nvmet.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index c8a49418b7358..5a994a8d799d6 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -517,7 +517,6 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; struct lpfc_iocbq *nvmewqeq; - unsigned long iflags; int rc, id; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -571,10 +570,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", ctxp->oxid, rsp->op, rsp->rsplen); - /* For now we take hbalock */ - spin_lock_irqsave(&phba->hbalock, iflags); rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, nvmewqeq); - spin_unlock_irqrestore(&phba->hbalock, iflags); if (rc == WQE_SUCCESS) { ctxp->flag |= LPFC_NVMET_IO_INP; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS From 2b7824d00d9cae2e625dcf6b004c5d738a9c8df4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:04:59 -0700 Subject: [PATCH 24/39] Fix driver load issues when MRQ=8 The symptom is that the driver will fail to login to the fabric. The reason is because it is out of iocb resources. There is a one to one relationship between MRQs (receive buffers for NVMET-FC) and iocbs and the default number of IOCBs was not accounting for the number of MRQs that were being created. This fix aligns the number of MRQ resources with the total resources so that it can handle fabric events when needed. Also the initialization of ctxlock to be on FCP commands, NOT LS commands. And modified log messages so that the log output can be correlated with the analyzer trace. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_init.c | 12 ++++++++---- drivers/scsi/lpfc/lpfc_nvme.c | 23 ++++++++++++----------- drivers/scsi/lpfc/lpfc_nvmet.c | 6 +++--- 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index cca7f81357c3b..3bd43f4a98d65 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -11061,7 +11061,7 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) struct lpfc_hba *phba; struct lpfc_vport *vport = NULL; struct Scsi_Host *shost = NULL; - int error; + int error, cnt; uint32_t cfg_mode, intr_mode; /* Allocate memory for HBA structure */ @@ -11095,11 +11095,15 @@ lpfc_pci_probe_one_s4(struct pci_dev *pdev, const struct pci_device_id *pid) goto out_unset_pci_mem_s4; } + cnt = phba->cfg_iocb_cnt * 1024; + if (phba->nvmet_support) + cnt += phba->cfg_nvmet_mrq_post * phba->cfg_nvmet_mrq; + /* Initialize and populate the iocb list per host */ lpfc_printf_log(phba, KERN_INFO, LOG_INIT, - "2821 initialize iocb list %d.\n", - phba->cfg_iocb_cnt*1024); - error = lpfc_init_iocb_list(phba, phba->cfg_iocb_cnt*1024); + "2821 initialize iocb list %d total %d\n", + phba->cfg_iocb_cnt, cnt); + error = lpfc_init_iocb_list(phba, cnt); if (error) { lpfc_printf_log(phba, KERN_ERR, LOG_INIT, diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 264da8f02c976..cb1e82b907195 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1495,6 +1495,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, "io buffer. Skipping abort req.\n"); return; } + nvmereq_wqe = &lpfc_nbuf->cur_iocbq; /* * The lpfc_nbuf and the mapped nvme_fcreq in the driver's @@ -1508,20 +1509,19 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, "6143 NVME req mismatch: " "lpfc_nbuf %p nvmeCmd %p, " - "pnvme_fcreq %p. Skipping Abort\n", + "pnvme_fcreq %p. Skipping Abort xri x%x\n", lpfc_nbuf, lpfc_nbuf->nvmeCmd, - pnvme_fcreq); + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } /* Don't abort IOs no longer on the pending queue. */ - nvmereq_wqe = &lpfc_nbuf->cur_iocbq; if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) { spin_unlock_irqrestore(&phba->hbalock, flags); lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, "6142 NVME IO req %p not queued - skipping " - "abort req\n", - pnvme_fcreq); + "abort req xri x%x\n", + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } @@ -1535,8 +1535,9 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, "6144 Outstanding NVME I/O Abort Request " "still pending on nvme_fcreq %p, " - "lpfc_ncmd %p\n", - pnvme_fcreq, lpfc_nbuf); + "lpfc_ncmd %p xri x%x\n", + pnvme_fcreq, lpfc_nbuf, + nvmereq_wqe->sli4_xritag); return; } @@ -1545,8 +1546,8 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, spin_unlock_irqrestore(&phba->hbalock, flags); lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, "6136 No available abort wqes. Skipping " - "Abts req for nvme_fcreq %p.\n", - pnvme_fcreq); + "Abts req for nvme_fcreq %p xri x%x\n", + pnvme_fcreq, nvmereq_wqe->sli4_xritag); return; } @@ -1604,7 +1605,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, } lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, - "6138 Transport Abort NVME Request Issued for\n" + "6138 Transport Abort NVME Request Issued for " "ox_id x%x on reqtag x%x\n", nvmereq_wqe->sli4_xritag, abts_buf->iotag); @@ -2491,7 +2492,7 @@ lpfc_nvme_unregister_port(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) input_err: #endif lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6168: State error: lport %p, rport%p FCID x%06x\n", + "6168 State error: lport %p, rport%p FCID x%06x\n", vport->localport, ndlp->rport, ndlp->nlp_DID); } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 5a994a8d799d6..8348bb53dcc72 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -872,7 +872,6 @@ lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ctxp->wqeq = NULL; ctxp->state = LPFC_NVMET_STE_RCV; ctxp->rqb_buffer = (void *)nvmebuf; - spin_lock_init(&ctxp->ctxlock); lpfc_nvmeio_data(phba, "NVMET LS RCV: xri x%x sz %d from %06x\n", oxid, size, sid); @@ -981,6 +980,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->rqb_buffer = nvmebuf; ctxp->entry_cnt = 1; ctxp->flag = 0; + spin_lock_init(&ctxp->ctxlock); #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (phba->ktime_on) { @@ -1003,8 +1003,8 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, } #endif - lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d from %06x\n", - oxid, size, sid); + lpfc_nvmeio_data(phba, "NVMET FCP RCV: xri x%x sz %d CPU %02x\n", + oxid, size, smp_processor_id()); atomic_inc(&tgtp->rcv_fcp_cmd_in); /* From 9d3d340d19d36b923d768f25c66cedc900d8ad90 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:05:00 -0700 Subject: [PATCH 25/39] Fix crash after issuing lip reset When RPI is not available, driver sends WQE with invalid RPI value and rejected by HBA. lpfc 0000:82:00.3: 1:3154 BLS ABORT RSP failed, data: x3/xa0320008 and lpfc :2753 PLOGI failure DID:FFFFFA Status:x3/xa0240008 In this case, driver accesses rpi_ids array out of bounds. Fix: Check return value of lpfc_sli4_alloc_rpi(). Do not allocate lpfc_nodelist entry if RPI is not available. When RPI is not available, we will get discovery timeouts and command drops for some of the vports as seen below. lpfc :0273 Unexpected discovery timeout, vport State x0 lpfc :0230 Unexpected timeout, hba link state x5 lpfc :0111 Dropping received ELS cmd Data: x0 xc90c55 x0 Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_bsg.c | 4 +++ drivers/scsi/lpfc/lpfc_crtn.h | 2 +- drivers/scsi/lpfc/lpfc_els.c | 35 +++++++---------------- drivers/scsi/lpfc/lpfc_hbadisc.c | 47 +++++++++++++++++++++++------- drivers/scsi/lpfc/lpfc_init.c | 49 +++++++++++++++++--------------- drivers/scsi/lpfc/lpfc_sli.c | 3 +- drivers/scsi/lpfc/lpfc_vport.c | 3 +- 7 files changed, 79 insertions(+), 64 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_bsg.c b/drivers/scsi/lpfc/lpfc_bsg.c index 18157d2840a3b..a1686c2d863c5 100644 --- a/drivers/scsi/lpfc/lpfc_bsg.c +++ b/drivers/scsi/lpfc/lpfc_bsg.c @@ -2486,6 +2486,10 @@ static int lpfcdiag_loop_self_reg(struct lpfc_hba *phba, uint16_t *rpi) mbox, *rpi); else { *rpi = lpfc_sli4_alloc_rpi(phba); + if (*rpi == LPFC_RPI_ALLOC_ERROR) { + mempool_free(mbox, phba->mbox_mem_pool); + return -EBUSY; + } status = lpfc_reg_rpi(phba, phba->pport->vpi, phba->pport->fc_myDID, (uint8_t *)&phba->pport->fc_sparam, diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 54e6ac42fbcd4..0079c6fc31fca 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -99,7 +99,7 @@ void lpfc_issue_reg_vpi(struct lpfc_hba *, struct lpfc_vport *); int lpfc_check_sli_ndlp(struct lpfc_hba *, struct lpfc_sli_ring *, struct lpfc_iocbq *, struct lpfc_nodelist *); -void lpfc_nlp_init(struct lpfc_vport *, struct lpfc_nodelist *, uint32_t); +struct lpfc_nodelist *lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did); struct lpfc_nodelist *lpfc_nlp_get(struct lpfc_nodelist *); int lpfc_nlp_put(struct lpfc_nodelist *); int lpfc_nlp_not_used(struct lpfc_nodelist *ndlp); diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index d6e0f58717982..ffdcac04d7ff6 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -895,10 +895,9 @@ lpfc_cmpl_els_flogi_nport(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, * Cannot find existing Fabric ndlp, so allocate a * new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, PT2PT_RemoteID); if (!ndlp) goto fail; - lpfc_nlp_init(vport, ndlp, PT2PT_RemoteID); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -1364,7 +1363,6 @@ lpfc_els_abort_flogi(struct lpfc_hba *phba) int lpfc_initial_flogi(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; vport->port_state = LPFC_FLOGI; @@ -1374,10 +1372,9 @@ lpfc_initial_flogi(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Set the node type */ ndlp->nlp_type |= NLP_FABRIC; /* Put ndlp onto node list */ @@ -1418,17 +1415,15 @@ lpfc_initial_flogi(struct lpfc_vport *vport) int lpfc_initial_fdisc(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; /* First look for the Fabric ndlp */ ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Put ndlp onto node list */ lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { @@ -1564,14 +1559,13 @@ lpfc_plogi_confirm_nport(struct lpfc_hba *phba, uint32_t *prsp, phba->active_rrq_pool); return ndlp; } - new_ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_ATOMIC); + new_ndlp = lpfc_nlp_init(vport, ndlp->nlp_DID); if (!new_ndlp) { if (active_rrqs_xri_bitmap) mempool_free(active_rrqs_xri_bitmap, phba->active_rrq_pool); return ndlp; } - lpfc_nlp_init(vport, new_ndlp, ndlp->nlp_DID); } else if (!NLP_CHK_NODE_ACT(new_ndlp)) { rc = memcmp(&ndlp->nlp_portname, name, sizeof(struct lpfc_name)); @@ -2845,10 +2839,9 @@ lpfc_issue_els_scr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) ndlp = lpfc_findnode_did(vport, nportid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, nportid); if (!ndlp) return 1; - lpfc_nlp_init(vport, ndlp, nportid); lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -2938,10 +2931,9 @@ lpfc_issue_els_farpr(struct lpfc_vport *vport, uint32_t nportid, uint8_t retry) ndlp = lpfc_findnode_did(vport, nportid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, nportid); if (!ndlp) return 1; - lpfc_nlp_init(vport, ndlp, nportid); lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); @@ -6133,7 +6125,6 @@ int lpfc_els_handle_rscn(struct lpfc_vport *vport) { struct lpfc_nodelist *ndlp; - struct lpfc_hba *phba = vport->phba; /* Ignore RSCN if the port is being torn down. */ if (vport->load_flag & FC_UNLOADING) { @@ -6182,12 +6173,11 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) } ndlp->nlp_prev_state = NLP_STE_UNUSED_NODE; } else { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, NameServer_DID); if (!ndlp) { lpfc_els_flush_rscn(vport); return 0; } - lpfc_nlp_init(vport, ndlp, NameServer_DID); ndlp->nlp_prev_state = ndlp->nlp_state; lpfc_nlp_set_state(vport, ndlp, NLP_STE_PLOGI_ISSUE); } @@ -7746,11 +7736,9 @@ lpfc_els_unsol_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, did); if (!ndlp) goto dropit; - - lpfc_nlp_init(vport, ndlp, did); lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); newnode = 1; if ((did & Fabric_DID_MASK) == Fabric_DID_MASK) @@ -8192,7 +8180,6 @@ lpfc_els_unsol_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, static void lpfc_start_fdmi(struct lpfc_vport *vport) { - struct lpfc_hba *phba = vport->phba; struct lpfc_nodelist *ndlp; /* If this is the first time, allocate an ndlp and initialize @@ -8201,9 +8188,8 @@ lpfc_start_fdmi(struct lpfc_vport *vport) */ ndlp = lpfc_findnode_did(vport, FDMI_DID); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, FDMI_DID); if (ndlp) { - lpfc_nlp_init(vport, ndlp, FDMI_DID); ndlp->nlp_type |= NLP_FABRIC; } else { return; @@ -8256,7 +8242,7 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, NameServer_DID); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, NameServer_DID); if (!ndlp) { if (phba->fc_topology == LPFC_TOPOLOGY_LOOP) { lpfc_disc_start(vport); @@ -8267,7 +8253,6 @@ lpfc_do_scr_ns_plogi(struct lpfc_hba *phba, struct lpfc_vport *vport) "0251 NameServer login: no memory\n"); return; } - lpfc_nlp_init(vport, ndlp, NameServer_DID); } else if (!NLP_CHK_NODE_ACT(ndlp)) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_UNUSED_NODE); if (!ndlp) { diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 180b072beef6b..d313dde76963c 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4368,10 +4368,17 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, uint32_t did; unsigned long flags; unsigned long *active_rrqs_xri_bitmap = NULL; + int rpi = LPFC_RPI_ALLOC_ERROR; if (!ndlp) return NULL; + if (phba->sli_rev == LPFC_SLI_REV4) { + rpi = lpfc_sli4_alloc_rpi(vport->phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) + return NULL; + } + spin_lock_irqsave(&phba->ndlp_lock, flags); /* The ndlp should not be in memory free mode */ if (NLP_CHK_FREE_REQ(ndlp)) { @@ -4381,7 +4388,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, kref_read(&ndlp->kref)); - return NULL; + goto free_rpi; } /* The ndlp should not already be in active mode */ if (NLP_CHK_NODE_ACT(ndlp)) { @@ -4391,7 +4398,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "usgmap:x%x refcnt:%d\n", (void *)ndlp, ndlp->nlp_usg_map, kref_read(&ndlp->kref)); - return NULL; + goto free_rpi; } /* Keep the original DID */ @@ -4409,7 +4416,7 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, spin_unlock_irqrestore(&phba->ndlp_lock, flags); if (vport->phba->sli_rev == LPFC_SLI_REV4) { - ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba); + ndlp->nlp_rpi = rpi; lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0008 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, @@ -4426,6 +4433,11 @@ lpfc_enable_node(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "node enable: did:x%x", ndlp->nlp_DID, 0, 0); return ndlp; + +free_rpi: + if (phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli4_free_rpi(vport->phba, rpi); + return NULL; } void @@ -5107,11 +5119,9 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) if ((vport->fc_flag & FC_RSCN_MODE) != 0 && lpfc_rscn_payload_check(vport, did) == 0) return NULL; - ndlp = (struct lpfc_nodelist *) - mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, did); if (!ndlp) return NULL; - lpfc_nlp_init(vport, ndlp, did); lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); if (vport->phba->nvmet_support) return ndlp; @@ -5887,16 +5897,31 @@ lpfc_find_vport_by_vpid(struct lpfc_hba *phba, uint16_t vpi) return NULL; } -void -lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, - uint32_t did) +struct lpfc_nodelist * +lpfc_nlp_init(struct lpfc_vport *vport, uint32_t did) { + struct lpfc_nodelist *ndlp; + int rpi = LPFC_RPI_ALLOC_ERROR; + + if (vport->phba->sli_rev == LPFC_SLI_REV4) { + rpi = lpfc_sli4_alloc_rpi(vport->phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) + return NULL; + } + + ndlp = mempool_alloc(vport->phba->nlp_mem_pool, GFP_KERNEL); + if (!ndlp) { + if (vport->phba->sli_rev == LPFC_SLI_REV4) + lpfc_sli4_free_rpi(vport->phba, rpi); + return NULL; + } + memset(ndlp, 0, sizeof (struct lpfc_nodelist)); lpfc_initialize_node(vport, ndlp, did); INIT_LIST_HEAD(&ndlp->nlp_listp); if (vport->phba->sli_rev == LPFC_SLI_REV4) { - ndlp->nlp_rpi = lpfc_sli4_alloc_rpi(vport->phba); + ndlp->nlp_rpi = rpi; lpfc_printf_vlog(vport, KERN_INFO, LOG_NODE, "0007 rpi:%x DID:%x flg:%x refcnt:%d " "map:%x %p\n", ndlp->nlp_rpi, ndlp->nlp_DID, @@ -5918,7 +5943,7 @@ lpfc_nlp_init(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, "node init: did:x%x", ndlp->nlp_DID, 0, 0); - return; + return ndlp; } /* This routine releases all resources associated with a specifc NPort's ndlp diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 3bd43f4a98d65..b1957f868eeea 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -2874,34 +2874,38 @@ lpfc_sli4_node_prep(struct lpfc_hba *phba) { struct lpfc_nodelist *ndlp, *next_ndlp; struct lpfc_vport **vports; - int i; + int i, rpi; + unsigned long flags; if (phba->sli_rev != LPFC_SLI_REV4) return; vports = lpfc_create_vport_work_array(phba); - if (vports != NULL) { - for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { - if (vports[i]->load_flag & FC_UNLOADING) - continue; + if (vports == NULL) + return; - list_for_each_entry_safe(ndlp, next_ndlp, - &vports[i]->fc_nodes, - nlp_listp) { - if (NLP_CHK_NODE_ACT(ndlp)) { - ndlp->nlp_rpi = - lpfc_sli4_alloc_rpi(phba); - lpfc_printf_vlog(ndlp->vport, KERN_INFO, - LOG_NODE, - "0009 rpi:%x DID:%x " - "flg:%x map:%x %p\n", - ndlp->nlp_rpi, - ndlp->nlp_DID, - ndlp->nlp_flag, - ndlp->nlp_usg_map, - ndlp); - } + for (i = 0; i <= phba->max_vports && vports[i] != NULL; i++) { + if (vports[i]->load_flag & FC_UNLOADING) + continue; + + list_for_each_entry_safe(ndlp, next_ndlp, + &vports[i]->fc_nodes, + nlp_listp) { + if (!NLP_CHK_NODE_ACT(ndlp)) + continue; + rpi = lpfc_sli4_alloc_rpi(phba); + if (rpi == LPFC_RPI_ALLOC_ERROR) { + spin_lock_irqsave(&phba->ndlp_lock, flags); + NLP_CLR_NODE_ACT(ndlp); + spin_unlock_irqrestore(&phba->ndlp_lock, flags); + continue; } + ndlp->nlp_rpi = rpi; + lpfc_printf_vlog(ndlp->vport, KERN_INFO, LOG_NODE, + "0009 rpi:%x DID:%x " + "flg:%x map:%x %p\n", ndlp->nlp_rpi, + ndlp->nlp_DID, ndlp->nlp_flag, + ndlp->nlp_usg_map, ndlp); } } lpfc_destroy_vport_work_array(phba, vports); @@ -4722,10 +4726,9 @@ lpfc_sli4_perform_vport_cvl(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, so allocate a new one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) return 0; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Set the node type */ ndlp->nlp_type |= NLP_FABRIC; /* Put ndlp onto node list */ diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index 1519fdfc1aa3e..a6adc2b04ba3e 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -16542,14 +16542,13 @@ lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, ndlp = lpfc_findnode_did(vport, sid); if (!ndlp) { - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, sid); if (!ndlp) { lpfc_printf_vlog(vport, KERN_WARNING, LOG_ELS, "1268 Failed to allocate ndlp for " "oxid:x%x SID:x%x\n", oxid, sid); return; } - lpfc_nlp_init(vport, ndlp, sid); /* Put ndlp onto pport node list */ lpfc_enqueue_node(vport, ndlp); } else if (!NLP_CHK_NODE_ACT(ndlp)) { diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index 9a0339dbc024b..c714482bf4c55 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -738,10 +738,9 @@ lpfc_vport_delete(struct fc_vport *fc_vport) ndlp = lpfc_findnode_did(vport, Fabric_DID); if (!ndlp) { /* Cannot find existing Fabric ndlp, allocate one */ - ndlp = mempool_alloc(phba->nlp_mem_pool, GFP_KERNEL); + ndlp = lpfc_nlp_init(vport, Fabric_DID); if (!ndlp) goto skip_logo; - lpfc_nlp_init(vport, ndlp, Fabric_DID); /* Indicate free memory when release */ NLP_SET_FREE_REQ(ndlp); } else { From 4d4c4a4ac756e7a0198259a49eeaf0d4a0e9a4fb Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:05:01 -0700 Subject: [PATCH 26/39] Fix max_sgl_segments settings for NVME / NVMET Cannot set NVME segment counts to a large number The existing module parameter lpfc_sg_seg_cnt is used for both SCSI and NVME. Limit the module parameter lpfc_sg_seg_cnt to 128 with the default being 64 for both NVME and NVMET, assuming NVME is enabled in the driver for that port. The driver will set max_sgl_segments in the NVME/NVMET template to lpfc_sg_seg_cnt + 1. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc.h | 3 ++- drivers/scsi/lpfc/lpfc_nvme.c | 18 ++++++++++++++---- drivers/scsi/lpfc/lpfc_nvmet.c | 19 +++++++++++++++---- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 257bbdd0f0b83..0fc145092e9bc 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -56,7 +56,7 @@ struct lpfc_sli2_slim; #define LPFC_MAX_SG_SEG_CNT 4096 /* sg element count per scsi cmnd */ #define LPFC_MAX_SGL_SEG_CNT 512 /* SGL element count per scsi cmnd */ #define LPFC_MAX_BPL_SEG_CNT 4096 /* BPL element count per scsi cmnd */ -#define LPFC_MIN_NVME_SEG_CNT 254 +#define LPFC_MAX_NVME_SEG_CNT 128 /* max SGL element cnt per NVME cmnd */ #define LPFC_MAX_SGE_SIZE 0x80000000 /* Maximum data allowed in a SGE */ #define LPFC_IOCB_LIST_CNT 2250 /* list of IOCBs for fast-path usage. */ @@ -781,6 +781,7 @@ struct lpfc_hba { uint32_t cfg_nvmet_fb_size; uint32_t cfg_total_seg_cnt; uint32_t cfg_sg_seg_cnt; + uint32_t cfg_nvme_seg_cnt; uint32_t cfg_sg_dma_buf_size; uint64_t cfg_soft_wwnn; uint64_t cfg_soft_wwpn; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index cb1e82b907195..278ae00dff83d 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -1114,12 +1114,12 @@ lpfc_nvme_prep_io_dma(struct lpfc_vport *vport, first_data_sgl = sgl; lpfc_ncmd->seg_cnt = nCmd->sg_cnt; - if (lpfc_ncmd->seg_cnt > phba->cfg_sg_seg_cnt) { + if (lpfc_ncmd->seg_cnt > phba->cfg_nvme_seg_cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6058 Too many sg segments from " "NVME Transport. Max %d, " "nvmeIO sg_cnt %d\n", - phba->cfg_sg_seg_cnt, + phba->cfg_nvme_seg_cnt, lpfc_ncmd->seg_cnt); lpfc_ncmd->seg_cnt = 0; return 1; @@ -2158,8 +2158,18 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) nfcp_info.node_name = wwn_to_u64(vport->fc_nodename.u.wwn); nfcp_info.port_name = wwn_to_u64(vport->fc_portname.u.wwn); - /* For now need + 1 to get around NVME transport logic */ - lpfc_nvme_template.max_sgl_segments = phba->cfg_sg_seg_cnt + 1; + /* Limit to LPFC_MAX_NVME_SEG_CNT. + * For now need + 1 to get around NVME transport logic. + */ + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME | LOG_INIT, + "6300 Reducing sg segment cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else { + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + lpfc_nvme_template.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_nvme_template.max_hw_queues = phba->cfg_nvme_io_channel; /* localport is allocated from the stack, but the registration diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 8348bb53dcc72..d0dda428ecb47 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -704,8 +704,19 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) pinfo.port_name = wwn_to_u64(vport->fc_portname.u.wwn); pinfo.port_id = vport->fc_myDID; + /* Limit to LPFC_MAX_NVME_SEG_CNT. + * For now need + 1 to get around NVME transport logic. + */ + if (phba->cfg_sg_seg_cnt > LPFC_MAX_NVME_SEG_CNT) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_INIT, + "6400 Reducing sg segment cnt to %d\n", + LPFC_MAX_NVME_SEG_CNT); + phba->cfg_nvme_seg_cnt = LPFC_MAX_NVME_SEG_CNT; + } else { + phba->cfg_nvme_seg_cnt = phba->cfg_sg_seg_cnt; + } + lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; - lpfc_tgttemplate.max_sgl_segments = phba->cfg_sg_seg_cnt + 1; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_CMD_IN_ISR | @@ -1278,11 +1289,11 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, return NULL; } - if (rsp->sg_cnt > phba->cfg_sg_seg_cnt) { + if (rsp->sg_cnt > phba->cfg_nvme_seg_cnt) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6109 lpfc_nvmet_prep_fcp_wqe: seg cnt err: " - "NPORT x%x oxid:x%x\n", - ctxp->sid, ctxp->oxid); + "NPORT x%x oxid:x%x cnt %d\n", + ctxp->sid, ctxp->oxid, phba->cfg_nvme_seg_cnt); return NULL; } From aeb3c8170bf7f177ab4825b751a1773594636ebf Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:05:02 -0700 Subject: [PATCH 27/39] Add Fabric assigned WWN support. Adding support for Fabric assigned WWPN and WWNN. Firmware sends first FLOGI to fabric with vendor version changes. On link up driver gets updated service parameter with FAWWN assigned port name. Driver sends 2nd FLOGI with updated fawwpn and modifies the vport->fc_portname in driver. Note: Soft wwpn will not be allowed when fawwpn is enabled. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc.h | 2 ++ drivers/scsi/lpfc/lpfc_attr.c | 8 ++++++++ drivers/scsi/lpfc/lpfc_els.c | 8 +++++--- drivers/scsi/lpfc/lpfc_hbadisc.c | 24 ++++++++++++++++++------ drivers/scsi/lpfc/lpfc_hw.h | 3 +++ drivers/scsi/lpfc/lpfc_hw4.h | 1 + drivers/scsi/lpfc/lpfc_init.c | 31 ++++++++++++++++++++++++++++--- 7 files changed, 65 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 0fc145092e9bc..6d7840b096e6f 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -474,6 +474,8 @@ struct lpfc_vport { unsigned long rcv_buffer_time_stamp; uint32_t vport_flag; #define STATIC_VPORT 1 +#define FAWWPN_SET 2 +#define FAWWPN_PARAM_CHG 4 uint16_t fdmi_num_disc; uint32_t fdmi_hba_mask; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 4b32d021f7d98..513fd07715cdf 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -2292,6 +2292,8 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr, struct lpfc_vport *vport = (struct lpfc_vport *) shost->hostdata; struct lpfc_hba *phba = vport->phba; unsigned int cnt = count; + uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level; + u32 *fawwpn_key = (uint32_t *)&vport->fc_sparam.un.vendorVersion[0]; /* * We're doing a simple sanity check for soft_wwpn setting. @@ -2305,6 +2307,12 @@ lpfc_soft_wwn_enable_store(struct device *dev, struct device_attribute *attr, * here. The intent is to protect against the random user or * application that is just writing attributes. */ + if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) { + lpfc_printf_log(phba, KERN_ERR, LOG_INIT, + "0051 "LPFC_DRIVER_NAME" soft wwpn can not" + " be enabled: fawwpn is enabled\n"); + return -EINVAL; + } /* count may include a LF at end of string */ if (buf[cnt-1] == '\n') diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index ffdcac04d7ff6..35fc260f5f213 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -603,9 +603,11 @@ lpfc_check_clean_addr_bit(struct lpfc_vport *vport, memcmp(&vport->fabric_portname, &sp->portName, sizeof(struct lpfc_name)) || memcmp(&vport->fabric_nodename, &sp->nodeName, - sizeof(struct lpfc_name))) + sizeof(struct lpfc_name)) || + (vport->vport_flag & FAWWPN_PARAM_CHG)) { fabric_param_changed = 1; - + vport->vport_flag &= ~FAWWPN_PARAM_CHG; + } /* * Word 1 Bit 31 in common service parameter is overloaded. * Word 1 Bit 31 in FLOGI request is multiple NPort request @@ -8755,7 +8757,7 @@ lpfc_issue_els_fdisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, pcmd += sizeof(uint32_t); /* Node Name */ pcmd += sizeof(uint32_t); /* Node Name */ memcpy(pcmd, &vport->fc_nodename, 8); - + memset(sp->un.vendorVersion, 0, sizeof(sp->un.vendorVersion)); lpfc_set_disctmo(vport); phba->fc_stat.elsXmitFDISC++; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index d313dde76963c..90d361686d96d 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -3002,6 +3002,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) MAILBOX_t *mb = &pmb->u.mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) pmb->context1; struct lpfc_vport *vport = pmb->vport; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); struct serv_parm *sp = &vport->fc_sparam; uint32_t ed_tov; @@ -3031,6 +3032,7 @@ lpfc_mbx_cmpl_read_sparam(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } lpfc_update_vport_wwn(vport); + fc_host_port_name(shost) = wwn_to_u64(vport->fc_portname.u.wwn); if (vport->port_type == LPFC_PHYSICAL_PORT) { memcpy(&phba->wwnn, &vport->fc_nodename, sizeof(phba->wwnn)); memcpy(&phba->wwpn, &vport->fc_portname, sizeof(phba->wwnn)); @@ -3309,6 +3311,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_sli_ring *pring; MAILBOX_t *mb = &pmb->u.mb; struct lpfc_dmabuf *mp = (struct lpfc_dmabuf *) (pmb->context1); + uint8_t attn_type; /* Unblock ELS traffic */ pring = lpfc_phba_elsring(phba); @@ -3325,6 +3328,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } la = (struct lpfc_mbx_read_top *) &pmb->u.mb.un.varReadTop; + attn_type = bf_get(lpfc_mbx_read_top_att_type, la); memcpy(&phba->alpa_map[0], mp->virt, 128); @@ -3337,7 +3341,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) if (phba->fc_eventTag <= la->eventTag) { phba->fc_stat.LinkMultiEvent++; - if (bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) + if (attn_type == LPFC_ATT_LINK_UP) if (phba->fc_eventTag != 0) lpfc_linkdown(phba); } @@ -3353,7 +3357,7 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) } phba->link_events++; - if ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP) && + if ((attn_type == LPFC_ATT_LINK_UP) && !(phba->sli.sli_flag & LPFC_MENLO_MAINT)) { phba->fc_stat.LinkUp++; if (phba->link_flag & LS_LOOPBACK_MODE) { @@ -3379,8 +3383,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) phba->wait_4_mlo_maint_flg); } lpfc_mbx_process_link_up(phba, la); - } else if (bf_get(lpfc_mbx_read_top_att_type, la) == - LPFC_ATT_LINK_DOWN) { + } else if (attn_type == LPFC_ATT_LINK_DOWN || + attn_type == LPFC_ATT_UNEXP_WWPN) { phba->fc_stat.LinkDown++; if (phba->link_flag & LS_LOOPBACK_MODE) lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, @@ -3389,6 +3393,14 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) "Data: x%x x%x x%x\n", la->eventTag, phba->fc_eventTag, phba->pport->port_state, vport->fc_flag); + else if (attn_type == LPFC_ATT_UNEXP_WWPN) + lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, + "1313 Link Down UNEXP WWPN Event x%x received " + "Data: x%x x%x x%x x%x x%x\n", + la->eventTag, phba->fc_eventTag, + phba->pport->port_state, vport->fc_flag, + bf_get(lpfc_mbx_read_top_mm, la), + bf_get(lpfc_mbx_read_top_fa, la)); else lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, "1305 Link Down Event x%x received " @@ -3399,8 +3411,8 @@ lpfc_mbx_cmpl_read_topology(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) bf_get(lpfc_mbx_read_top_fa, la)); lpfc_mbx_issue_link_down(phba); } - if ((phba->sli.sli_flag & LPFC_MENLO_MAINT) && - ((bf_get(lpfc_mbx_read_top_att_type, la) == LPFC_ATT_LINK_UP))) { + if (phba->sli.sli_flag & LPFC_MENLO_MAINT && + attn_type == LPFC_ATT_LINK_UP) { if (phba->link_state != LPFC_LINK_DOWN) { phba->fc_stat.LinkDown++; lpfc_printf_log(phba, KERN_ERR, LOG_LINK_EVENT, diff --git a/drivers/scsi/lpfc/lpfc_hw.h b/drivers/scsi/lpfc/lpfc_hw.h index 15ca214841505..26a5647e057e6 100644 --- a/drivers/scsi/lpfc/lpfc_hw.h +++ b/drivers/scsi/lpfc/lpfc_hw.h @@ -509,6 +509,8 @@ struct class_parms { uint8_t word3Reserved2; /* Fc Word 3, bit 0: 7 */ }; +#define FAPWWN_KEY_VENDOR 0x42524344 /*valid vendor version fawwpn key*/ + struct serv_parm { /* Structure is in Big Endian format */ struct csp cmn; struct lpfc_name portName; @@ -2885,6 +2887,7 @@ struct lpfc_mbx_read_top { #define LPFC_ATT_RESERVED 0x00 /* Reserved - attType */ #define LPFC_ATT_LINK_UP 0x01 /* Link is up */ #define LPFC_ATT_LINK_DOWN 0x02 /* Link is down */ +#define LPFC_ATT_UNEXP_WWPN 0x06 /* Link is down Unexpected WWWPN */ uint32_t word3; #define lpfc_mbx_read_top_alpa_granted_SHIFT 24 #define lpfc_mbx_read_top_alpa_granted_MASK 0x000000FF diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 15277705cb6b8..90499f9e03883 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -3853,6 +3853,7 @@ struct lpfc_acqe_fc_la { #define LPFC_FC_LA_TYPE_NO_HARD_ALPA 0x3 #define LPFC_FC_LA_TYPE_MDS_LINK_DOWN 0x4 #define LPFC_FC_LA_TYPE_MDS_LOOPBACK 0x5 +#define LPFC_FC_LA_TYPE_UNEXP_WWPN 0x6 #define lpfc_acqe_fc_la_port_type_SHIFT 6 #define lpfc_acqe_fc_la_port_type_MASK 0x00000003 #define lpfc_acqe_fc_la_port_type_WORD word0 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index b1957f868eeea..4138d6155a4fd 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -335,6 +335,9 @@ lpfc_dump_wakeup_param_cmpl(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmboxq) void lpfc_update_vport_wwn(struct lpfc_vport *vport) { + uint8_t vvvl = vport->fc_sparam.cmn.valid_vendor_ver_level; + u32 *fawwpn_key = (u32 *)&vport->fc_sparam.un.vendorVersion[0]; + /* If the soft name exists then update it using the service params */ if (vport->phba->cfg_soft_wwnn) u64_to_wwn(vport->phba->cfg_soft_wwnn, @@ -354,9 +357,25 @@ lpfc_update_vport_wwn(struct lpfc_vport *vport) memcpy(&vport->fc_sparam.nodeName, &vport->fc_nodename, sizeof(struct lpfc_name)); - if (vport->fc_portname.u.wwn[0] == 0 || vport->phba->cfg_soft_wwpn) + /* + * If the port name has changed, then set the Param changes flag + * to unreg the login + */ + if (vport->fc_portname.u.wwn[0] != 0 && + memcmp(&vport->fc_portname, &vport->fc_sparam.portName, + sizeof(struct lpfc_name))) + vport->vport_flag |= FAWWPN_PARAM_CHG; + + if (vport->fc_portname.u.wwn[0] == 0 || + vport->phba->cfg_soft_wwpn || + (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) || + vport->vport_flag & FAWWPN_SET) { memcpy(&vport->fc_portname, &vport->fc_sparam.portName, sizeof(struct lpfc_name)); + vport->vport_flag &= ~FAWWPN_SET; + if (vvvl == 1 && cpu_to_be32(*fawwpn_key) == FAPWWN_KEY_VENDOR) + vport->vport_flag |= FAWWPN_SET; + } else memcpy(&vport->fc_sparam.portName, &vport->fc_portname, sizeof(struct lpfc_name)); @@ -4518,9 +4537,15 @@ lpfc_sli4_async_fc_evt(struct lpfc_hba *phba, struct lpfc_acqe_fc_la *acqe_fc) /* Parse and translate link attention fields */ la = (struct lpfc_mbx_read_top *)&pmb->u.mb.un.varReadTop; la->eventTag = acqe_fc->event_tag; - bf_set(lpfc_mbx_read_top_att_type, la, - LPFC_FC_LA_TYPE_LINK_DOWN); + if (phba->sli4_hba.link_state.status == + LPFC_FC_LA_TYPE_UNEXP_WWPN) { + bf_set(lpfc_mbx_read_top_att_type, la, + LPFC_FC_LA_TYPE_UNEXP_WWPN); + } else { + bf_set(lpfc_mbx_read_top_att_type, la, + LPFC_FC_LA_TYPE_LINK_DOWN); + } /* Invoke the mailbox command callback function */ lpfc_mbx_cmpl_read_topology(phba, pmb); From 1c5b12f76301b86d0e5828c7d11ec7c36ffd0195 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:05:03 -0700 Subject: [PATCH 28/39] Fix implicit logo and RSCN handling for NVMET NVMET didn't have any RSCN handling at all and would not execute implicit LOGO when receiving a PLOGI from an rport that NVMET had in state UNMAPPED. Clean up the logic in lpfc_nlp_state_cleanup for initiators (FCP and NVME). NVMET should not respond to RSCN including allocating new ndlps so this code was conditionalized when nvmet_support is true. The check for NLP_RCV_PLOGI in lpfc_setup_disc_node was moved below the check for nvmet_support to allow the NVMET to recover initiator nodes correctly. The implicit logo was introduced with lpfc_rcv_plogi when NVMET gets a PLOGI on an ndlp in UNMAPPED state. The RSCN handling was modified to not respond to an RSCN in NVMET. Instead NVMET sends a GID_FT and determines if an NVMEP_INITIATOR it has is UNMAPPED but no longer in the zone membership. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_ct.c | 68 +++++++++++++++++++++++------- drivers/scsi/lpfc/lpfc_disc.h | 1 + drivers/scsi/lpfc/lpfc_els.c | 23 +++++----- drivers/scsi/lpfc/lpfc_hbadisc.c | 62 +++++++++++++++++---------- drivers/scsi/lpfc/lpfc_nportdisc.c | 8 +++- 5 files changed, 110 insertions(+), 52 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index d3e9af983015c..1487406aea778 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -537,19 +537,53 @@ lpfc_prep_node_fc4type(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) } } +static void +lpfc_ns_rsp_audit_did(struct lpfc_vport *vport, uint32_t Did, uint8_t fc4_type) +{ + struct lpfc_hba *phba = vport->phba; + struct lpfc_nodelist *ndlp = NULL; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + + /* + * To conserve rpi's, filter out addresses for other + * vports on the same physical HBAs. + */ + if (Did != vport->fc_myDID && + (!lpfc_find_vport_by_did(phba, Did) || + vport->cfg_peer_port_login)) { + if (!phba->nvmet_support) { + /* FCPI/NVMEI path. Process Did */ + lpfc_prep_node_fc4type(vport, Did, fc4_type); + return; + } + /* NVMET path. NVMET only cares about NVMEI nodes. */ + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (ndlp->nlp_type != NLP_NVME_INITIATOR || + ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) + continue; + spin_lock_irq(shost->host_lock); + if (ndlp->nlp_DID == Did) + ndlp->nlp_flag &= ~NLP_NVMET_RECOV; + else + ndlp->nlp_flag |= NLP_NVMET_RECOV; + spin_unlock_irq(shost->host_lock); + } + } +} + static int lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, uint32_t Size) { - struct lpfc_hba *phba = vport->phba; struct lpfc_sli_ct_request *Response = (struct lpfc_sli_ct_request *) mp->virt; - struct lpfc_nodelist *ndlp = NULL; struct lpfc_dmabuf *mlast, *next_mp; uint32_t *ctptr = (uint32_t *) & Response->un.gid.PortType; uint32_t Did, CTentry; int Cnt; struct list_head head; + struct Scsi_Host *shost = lpfc_shost_from_vport(vport); + struct lpfc_nodelist *ndlp = NULL; lpfc_set_disctmo(vport); vport->num_disc_nodes = 0; @@ -574,19 +608,7 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, /* Get next DID from NameServer List */ CTentry = *ctptr++; Did = ((be32_to_cpu(CTentry)) & Mask_DID); - - ndlp = NULL; - - /* - * Check for rscn processing or not - * To conserve rpi's, filter out addresses for other - * vports on the same physical HBAs. - */ - if ((Did != vport->fc_myDID) && - ((lpfc_find_vport_by_did(phba, Did) == NULL) || - vport->cfg_peer_port_login)) - lpfc_prep_node_fc4type(vport, Did, fc4_type); - + lpfc_ns_rsp_audit_did(vport, Did, fc4_type); if (CTentry & (cpu_to_be32(SLI_CT_LAST_ENTRY))) goto nsout1; @@ -596,6 +618,22 @@ lpfc_ns_rsp(struct lpfc_vport *vport, struct lpfc_dmabuf *mp, uint8_t fc4_type, } + /* All GID_FT entries processed. If the driver is running in + * in target mode, put impacted nodes into recovery and drop + * the RPI to flush outstanding IO. + */ + if (vport->phba->nvmet_support) { + list_for_each_entry(ndlp, &vport->fc_nodes, nlp_listp) { + if (!(ndlp->nlp_flag & NLP_NVMET_RECOV)) + continue; + lpfc_disc_state_machine(vport, ndlp, NULL, + NLP_EVT_DEVICE_RECOVERY); + spin_lock_irq(shost->host_lock); + ndlp->nlp_flag &= ~NLP_NVMET_RECOV; + spin_lock_irq(shost->host_lock); + } + } + nsout1: list_del(&head); return 0; diff --git a/drivers/scsi/lpfc/lpfc_disc.h b/drivers/scsi/lpfc/lpfc_disc.h index f4ff99d95db34..9d5a379f4b157 100644 --- a/drivers/scsi/lpfc/lpfc_disc.h +++ b/drivers/scsi/lpfc/lpfc_disc.h @@ -157,6 +157,7 @@ struct lpfc_node_rrq { #define NLP_LOGO_SND 0x00000100 /* sent LOGO request for this entry */ #define NLP_RNID_SND 0x00000400 /* sent RNID request for this entry */ #define NLP_ELS_SND_MASK 0x000007e0 /* sent ELS request for this entry */ +#define NLP_NVMET_RECOV 0x00001000 /* NVMET auditing node for recovery. */ #define NLP_DEFER_RM 0x00010000 /* Remove this ndlp if no longer used */ #define NLP_DELAY_TMO 0x00020000 /* delay timeout is running for node */ #define NLP_NPR_2B_DISC 0x00040000 /* node is included in num_disc_nodes */ diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c index 35fc260f5f213..8d1c6897ca623 100644 --- a/drivers/scsi/lpfc/lpfc_els.c +++ b/drivers/scsi/lpfc/lpfc_els.c @@ -5861,8 +5861,11 @@ lpfc_rscn_recovery_check(struct lpfc_vport *vport) (ndlp->nlp_state == NLP_STE_UNUSED_NODE) || !lpfc_rscn_payload_check(vport, ndlp->nlp_DID)) continue; + + /* NVME Target mode does not do RSCN Recovery. */ if (vport->phba->nvmet_support) continue; + lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RECOVERY); lpfc_cancel_retry_delay_tmo(vport, ndlp); @@ -6150,22 +6153,16 @@ lpfc_els_handle_rscn(struct lpfc_vport *vport) ndlp = lpfc_findnode_did(vport, NameServer_DID); if (ndlp && NLP_CHK_NODE_ACT(ndlp) && ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) { - /* Good ndlp, issue CT Request to NameServer */ + /* Good ndlp, issue CT Request to NameServer. Need to + * know how many gidfts were issued. If none, then just + * flush the RSCN. Otherwise, the outstanding requests + * need to complete. + */ vport->gidft_inp = 0; - if (lpfc_issue_gidft(vport) == 0) - /* Wait for NameServer query cmpl before we can - * continue - */ + if (lpfc_issue_gidft(vport) > 0) return 1; } else { - /* If login to NameServer does not exist, issue one */ - /* Good status, issue PLOGI to NameServer */ - ndlp = lpfc_findnode_did(vport, NameServer_DID); - if (ndlp && NLP_CHK_NODE_ACT(ndlp)) - /* Wait for NameServer login cmpl before we can - continue */ - return 1; - + /* Nameserver login in question. Revalidate. */ if (ndlp) { ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_PLOGI_ISSUE); diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 90d361686d96d..0482c55803310 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -4148,7 +4148,6 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, int old_state, int new_state) { struct Scsi_Host *shost = lpfc_shost_from_vport(vport); - struct lpfc_hba *phba = vport->phba; if (new_state == NLP_STE_UNMAPPED_NODE) { ndlp->nlp_flag &= ~NLP_NODEV_REMOVE; @@ -4167,14 +4166,14 @@ lpfc_nlp_state_cleanup(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_unregister_remote_port(ndlp); } - /* Notify the NVME transport of this rport's loss */ - if (((phba->cfg_enable_fc4_type == LPFC_ENABLE_BOTH) || - (phba->cfg_enable_fc4_type == LPFC_ENABLE_NVME)) && - (vport->phba->nvmet_support == 0) && - ((ndlp->nlp_fc4_type & NLP_FC4_NVME) || - (ndlp->nlp_DID == Fabric_DID))) { + /* Notify the NVME transport of this rport's loss on the + * Initiator. For NVME Target, should upcall transport + * in the else clause when API available. + */ + if (ndlp->nlp_fc4_type & NLP_FC4_NVME) { vport->phba->nport_event_cnt++; - lpfc_nvme_unregister_port(vport, ndlp); + if (vport->phba->nvmet_support == 0) + lpfc_nvme_unregister_port(vport, ndlp); } } @@ -5128,6 +5127,8 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) ndlp = lpfc_findnode_did(vport, did); if (!ndlp) { + if (vport->phba->nvmet_support) + return NULL; if ((vport->fc_flag & FC_RSCN_MODE) != 0 && lpfc_rscn_payload_check(vport, did) == 0) return NULL; @@ -5135,56 +5136,73 @@ lpfc_setup_disc_node(struct lpfc_vport *vport, uint32_t did) if (!ndlp) return NULL; lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); - if (vport->phba->nvmet_support) - return ndlp; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); return ndlp; } else if (!NLP_CHK_NODE_ACT(ndlp)) { + if (vport->phba->nvmet_support) + return NULL; ndlp = lpfc_enable_node(vport, ndlp, NLP_STE_NPR_NODE); if (!ndlp) return NULL; - if (vport->phba->nvmet_support) - return ndlp; spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); return ndlp; } + /* The NVME Target does not want to actively manage an rport. + * The goal is to allow the target to reset its state and clear + * pending IO in preparation for the initiator to recover. + */ if ((vport->fc_flag & FC_RSCN_MODE) && !(vport->fc_flag & FC_NDISC_ACTIVE)) { if (lpfc_rscn_payload_check(vport, did)) { - /* If we've already received a PLOGI from this NPort - * we don't need to try to discover it again. - */ - if (ndlp->nlp_flag & NLP_RCV_PLOGI) - return NULL; /* Since this node is marked for discovery, * delay timeout is not needed. */ lpfc_cancel_retry_delay_tmo(vport, ndlp); + + /* NVME Target mode waits until rport is known to be + * impacted by the RSCN before it transitions. No + * active management - just go to NPR provided the + * node had a valid login. + */ if (vport->phba->nvmet_support) return ndlp; + + /* If we've already received a PLOGI from this NPort + * we don't need to try to discover it again. + */ + if (ndlp->nlp_flag & NLP_RCV_PLOGI) + return NULL; + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); } else ndlp = NULL; } else { - /* If we've already received a PLOGI from this NPort, - * or we are already in the process of discovery on it, - * we don't need to try to discover it again. + /* If the initiator received a PLOGI from this NPort or if the + * initiator is already in the process of discovery on it, + * there's no need to try to discover it again. */ if (ndlp->nlp_state == NLP_STE_ADISC_ISSUE || ndlp->nlp_state == NLP_STE_PLOGI_ISSUE || - ndlp->nlp_flag & NLP_RCV_PLOGI) + (!vport->phba->nvmet_support && + ndlp->nlp_flag & NLP_RCV_PLOGI)) return NULL; - lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + if (vport->phba->nvmet_support) return ndlp; + + /* Moving to NPR state clears unsolicited flags and + * allows for rediscovery + */ + lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); + spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_2B_DISC; spin_unlock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index 061626bdf7010..8777c2d5f50d3 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -361,8 +361,12 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, case NLP_STE_PRLI_ISSUE: case NLP_STE_UNMAPPED_NODE: case NLP_STE_MAPPED_NODE: - /* lpfc_plogi_confirm_nport skips fabric did, handle it here */ - if (!(ndlp->nlp_type & NLP_FABRIC)) { + /* For initiators, lpfc_plogi_confirm_nport skips fabric did. + * For target mode, execute implicit logo. + * Fabric nodes go into NPR. + */ + if (!(ndlp->nlp_type & NLP_FABRIC) && + !(phba->nvmet_support)) { lpfc_els_rsp_acc(vport, ELS_CMD_PLOGI, cmdiocb, ndlp, NULL); return 1; From 86c6737963e1c6019168512743908c8ee4e80f06 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:05:04 -0700 Subject: [PATCH 29/39] Update ABORT processing for NVMET. The driver with nvme had this routine stubbed. Right now XRI_ABORTED_CQE is not handled and the FC NVMET Transport has a new API for the driver. Missing code path, new NVME abort API Update ABORT processing for NVMET There are 3 new FC NVMET Transport API/ template routines for NVMET: lpfc_nvmet_xmt_fcp_release This NVMET template callback routine called to release context associated with an IO This routine is ALWAYS called last, even if the IO was aborted or completed in error. lpfc_nvmet_xmt_fcp_abort This NVMET template callback routine called to abort an exchange that has an IO in progress nvmet_fc_rcv_fcp_req When the lpfc driver receives an ABTS, this NVME FC transport layer callback routine is called. For this case there are 2 paths thru the driver: the driver either has an outstanding exchange / context for the XRI to be aborted or not. If not, a BA_RJT is issued otherwise a BA_ACC NVMET Driver abort paths: There are 2 paths for aborting an IO. The first one is we receive an IO and decide not to process it because of lack of resources. An unsolicated ABTS is immediately sent back to the initiator as a response. lpfc_nvmet_unsol_fcp_buffer lpfc_nvmet_unsol_issue_abort (XMIT_SEQUENCE_WQE) The second one is we sent the IO up to the NVMET transport layer to process, and for some reason the NVME Transport layer decided to abort the IO before it completes all its phases. For this case there are 2 paths thru the driver: the driver either has an outstanding TSEND/TRECEIVE/TRSP WQE or no outstanding WQEs are present for the exchange / context. lpfc_nvmet_xmt_fcp_abort if (LPFC_NVMET_IO_INP) lpfc_nvmet_sol_fcp_issue_abort (ABORT_WQE) lpfc_nvmet_sol_fcp_abort_cmp else lpfc_nvmet_unsol_fcp_issue_abort lpfc_nvmet_unsol_issue_abort (XMIT_SEQUENCE_WQE) lpfc_nvmet_unsol_fcp_abort_cmp Context flags: LPFC_NVMET_IOP - his flag signifies an IO is in progress on the exchange. LPFC_NVMET_XBUSY - this flag indicates the IO completed but the firmware is still busy with the corresponding exchange. The exchange should not be reused until after a XRI_ABORTED_CQE is received for that exchange. LPFC_NVMET_ABORT_OP - this flag signifies an ABORT_WQE was issued on the exchange. LPFC_NVMET_CTX_RLS - this flag signifies a context free was requested, but we are deferring it due to an XBUSY or ABORT in progress. A ctxlock is added to the context structure that is used whenever these flags are set/read within the context of an IO. The LPFC_NVMET_CTX_RLS flag is only set in the defer_relase routine when the transport has resolved all IO associated with the buffer. The flag is cleared when the CTX is associated with a new IO. An exchange can has both an LPFC_NVMET_XBUSY and a LPFC_NVMET_ABORT_OP condition active simultaneously. Both conditions must complete before the exchange is freed. When the abort callback (lpfc_nvmet_xmt_fcp_abort) is envoked: If there is an outstanding IO, the driver will issue an ABORT_WQE. This should result in 3 completions for the exchange: 1) IO cmpl with XB bit set 2) Abort WQE cmpl 3) XRI_ABORTED_CQE cmpl For this scenerio, after completion #1, the NVMET Transport IO rsp callback is called. After completion #2, no action is taken with respect to the exchange / context. After completion #3, the exchange context is free for re-use on another IO. If there is no outstanding activity on the exchange, the driver will send a ABTS to the Initiator. Upon completion of this WQE, the exchange / context is freed for re-use on another IO. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_crtn.h | 7 + drivers/scsi/lpfc/lpfc_debugfs.c | 53 +++-- drivers/scsi/lpfc/lpfc_hw4.h | 3 + drivers/scsi/lpfc/lpfc_init.c | 52 +++-- drivers/scsi/lpfc/lpfc_mbox.c | 7 +- drivers/scsi/lpfc/lpfc_nvme.c | 45 ++-- drivers/scsi/lpfc/lpfc_nvmet.c | 348 ++++++++++++++++++++++++------- drivers/scsi/lpfc/lpfc_nvmet.h | 10 +- drivers/scsi/lpfc/lpfc_sli.c | 7 +- drivers/scsi/lpfc/lpfc_sli4.h | 2 +- 10 files changed, 409 insertions(+), 125 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 0079c6fc31fca..944b32ca49314 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -24,6 +24,7 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *); struct fc_rport; struct fc_frame_header; +struct lpfc_nvmet_rcv_ctx; void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli_read_link_ste(struct lpfc_hba *); void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t); @@ -245,6 +246,10 @@ struct hbq_dmabuf *lpfc_sli4_rb_alloc(struct lpfc_hba *); void lpfc_sli4_rb_free(struct lpfc_hba *, struct hbq_dmabuf *); struct rqb_dmabuf *lpfc_sli4_nvmet_alloc(struct lpfc_hba *phba); void lpfc_sli4_nvmet_free(struct lpfc_hba *phba, struct rqb_dmabuf *dmab); +void lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_dmabuf *mp); +int lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr); void lpfc_sli4_build_dflt_fcf_record(struct lpfc_hba *, struct fcf_record *, uint16_t); int lpfc_sli4_rq_put(struct lpfc_queue *hq, struct lpfc_queue *dq, @@ -302,6 +307,8 @@ int lpfc_sli_check_eratt(struct lpfc_hba *); void lpfc_sli_handle_slow_ring_event(struct lpfc_hba *, struct lpfc_sli_ring *, uint32_t); void lpfc_sli4_handle_received_buffer(struct lpfc_hba *, struct hbq_dmabuf *); +void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr, bool aborted); void lpfc_sli_def_mbox_cmpl(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli4_unreg_rpi_cmpl_clr(struct lpfc_hba *, LPFC_MBOXQ_t *); int lpfc_sli_issue_iocb(struct lpfc_hba *, uint32_t, diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 55a8d8ffcfd4d..fce549a91911c 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -745,73 +745,102 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; int len = 0; + int cnt; if (phba->nvmet_support) { if (!phba->targetport) return len; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "\nNVME Targetport Statistics\n"); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "LS: Rcv %08x Drop %08x Abort %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_drop), atomic_read(&tgtp->xmt_ls_abort)); if (atomic_read(&tgtp->rcv_ls_req_in) != atomic_read(&tgtp->rcv_ls_req_out)) { - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "Rcv LS: in %08x != out %08x\n", atomic_read(&tgtp->rcv_ls_req_in), atomic_read(&tgtp->rcv_ls_req_out)); } - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "LS: Xmt %08x Drop %08x Cmpl %08x Err %08x\n", atomic_read(&tgtp->xmt_ls_rsp), atomic_read(&tgtp->xmt_ls_drop), atomic_read(&tgtp->xmt_ls_rsp_cmpl), atomic_read(&tgtp->xmt_ls_rsp_error)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP: Rcv %08x Drop %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), atomic_read(&tgtp->rcv_fcp_cmd_drop)); if (atomic_read(&tgtp->rcv_fcp_cmd_in) != atomic_read(&tgtp->rcv_fcp_cmd_out)) { - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "Rcv FCP: in %08x != out %08x\n", atomic_read(&tgtp->rcv_fcp_cmd_in), atomic_read(&tgtp->rcv_fcp_cmd_out)); } - len += snprintf(buf+len, size-len, - "FCP Rsp: read %08x readrsp %08x write %08x rsp %08x\n", + len += snprintf(buf + len, size - len, + "FCP Rsp: read %08x readrsp %08x " + "write %08x rsp %08x\n", atomic_read(&tgtp->xmt_fcp_read), atomic_read(&tgtp->xmt_fcp_read_rsp), atomic_read(&tgtp->xmt_fcp_write), atomic_read(&tgtp->xmt_fcp_rsp)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP Rsp: abort %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_abort), atomic_read(&tgtp->xmt_fcp_drop)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "FCP Rsp Cmpl: %08x err %08x drop %08x\n", atomic_read(&tgtp->xmt_fcp_rsp_cmpl), atomic_read(&tgtp->xmt_fcp_rsp_error), atomic_read(&tgtp->xmt_fcp_rsp_drop)); - len += snprintf(buf+len, size-len, + len += snprintf(buf + len, size - len, "ABORT: Xmt %08x Err %08x Cmpl %08x", atomic_read(&tgtp->xmt_abort_rsp), atomic_read(&tgtp->xmt_abort_rsp_error), atomic_read(&tgtp->xmt_abort_cmpl)); - len += snprintf(buf+len, size-len, "\n"); + len += snprintf(buf + len, size - len, "\n"); + + cnt = 0; + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + cnt++; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + if (cnt) { + len += snprintf(buf + len, size - len, + "ABORT: %d ctx entries\n", cnt); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (len >= (size - LPFC_DEBUG_OUT_LINE_SZ)) + break; + len += snprintf(buf + len, size - len, + "Entry: oxid %x state %x " + "flag %x\n", + ctxp->oxid, ctxp->state, + ctxp->flag); + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + } } else { if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) return len; diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h index 90499f9e03883..1d12f2be36bcc 100644 --- a/drivers/scsi/lpfc/lpfc_hw4.h +++ b/drivers/scsi/lpfc/lpfc_hw4.h @@ -2720,6 +2720,9 @@ struct lpfc_mbx_request_features { #define lpfc_mbx_rq_ftr_rq_ifip_SHIFT 7 #define lpfc_mbx_rq_ftr_rq_ifip_MASK 0x00000001 #define lpfc_mbx_rq_ftr_rq_ifip_WORD word2 +#define lpfc_mbx_rq_ftr_rq_iaar_SHIFT 9 +#define lpfc_mbx_rq_ftr_rq_iaar_MASK 0x00000001 +#define lpfc_mbx_rq_ftr_rq_iaar_WORD word2 #define lpfc_mbx_rq_ftr_rq_perfh_SHIFT 11 #define lpfc_mbx_rq_ftr_rq_perfh_MASK 0x00000001 #define lpfc_mbx_rq_ftr_rq_perfh_WORD word2 diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4138d6155a4fd..74cec2232a864 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -42,6 +42,10 @@ #include #include #include +#include +#include + +#include #include "lpfc_hw4.h" #include "lpfc_hw.h" @@ -52,6 +56,7 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" +#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" @@ -1022,8 +1027,10 @@ static int lpfc_hba_down_post_s4(struct lpfc_hba *phba) { struct lpfc_scsi_buf *psb, *psb_next; + struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next; LIST_HEAD(aborts); LIST_HEAD(nvme_aborts); + LIST_HEAD(nvmet_aborts); unsigned long iflag = 0; struct lpfc_sglq *sglq_entry = NULL; @@ -1046,16 +1053,10 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) list_for_each_entry(sglq_entry, &phba->sli4_hba.lpfc_abts_els_sgl_list, list) sglq_entry->state = SGL_FREED; - list_for_each_entry(sglq_entry, - &phba->sli4_hba.lpfc_abts_nvmet_sgl_list, list) - sglq_entry->state = SGL_FREED; list_splice_init(&phba->sli4_hba.lpfc_abts_els_sgl_list, &phba->sli4_hba.lpfc_els_sgl_list); - if (phba->sli4_hba.nvme_wq) - list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list, - &phba->sli4_hba.lpfc_nvmet_sgl_list); spin_unlock(&phba->sli4_hba.sgl_list_lock); /* abts_scsi_buf_list_lock required because worker thread uses this @@ -1072,6 +1073,8 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); list_splice_init(&phba->sli4_hba.lpfc_abts_nvme_buf_list, &nvme_aborts); + list_splice_init(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + &nvmet_aborts); spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); } @@ -1085,13 +1088,20 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) list_splice(&aborts, &phba->lpfc_scsi_buf_list_put); spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, iflag); - list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) { - psb->pCmd = NULL; - psb->status = IOSTAT_SUCCESS; + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { + list_for_each_entry_safe(psb, psb_next, &nvme_aborts, list) { + psb->pCmd = NULL; + psb->status = IOSTAT_SUCCESS; + } + spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag); + list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put); + spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag); + + list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { + ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + } } - spin_lock_irqsave(&phba->nvme_buf_list_put_lock, iflag); - list_splice(&nvme_aborts, &phba->lpfc_nvme_buf_list_put); - spin_unlock_irqrestore(&phba->nvme_buf_list_put_lock, iflag); lpfc_sli4_free_sp_events(phba); return 0; @@ -5812,6 +5822,7 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba) /* Initialize the Abort nvme buffer list used by driver */ spin_lock_init(&phba->sli4_hba.abts_nvme_buf_list_lock); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); /* Fast-path XRI aborted CQ Event work queue list */ INIT_LIST_HEAD(&phba->sli4_hba.sp_nvme_xri_aborted_work_queue); } @@ -6439,7 +6450,7 @@ lpfc_init_sgl_list(struct lpfc_hba *phba) INIT_LIST_HEAD(&phba->sli4_hba.lpfc_els_sgl_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_els_sgl_list); INIT_LIST_HEAD(&phba->sli4_hba.lpfc_nvmet_sgl_list); - INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); + INIT_LIST_HEAD(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); /* els xri-sgl book keeping */ phba->sli4_hba.els_xri_cnt = 0; @@ -9966,17 +9977,19 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) { int wait_time = 0; int nvme_xri_cmpl = 1; + int nvmet_xri_cmpl = 1; int fcp_xri_cmpl = 1; int els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list); - int nvmet_xri_cmpl = - list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) fcp_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_scsi_buf_list); - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvme_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_nvme_buf_list); + nvmet_xri_cmpl = + list_empty(&phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + } while (!fcp_xri_cmpl || !els_xri_cmpl || !nvme_xri_cmpl || !nvmet_xri_cmpl) { @@ -10002,9 +10015,12 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) msleep(LPFC_XRI_EXCH_BUSY_WAIT_T1); wait_time += LPFC_XRI_EXCH_BUSY_WAIT_T1; } - if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) + if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { nvme_xri_cmpl = list_empty( &phba->sli4_hba.lpfc_abts_nvme_buf_list); + nvmet_xri_cmpl = list_empty( + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + } if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) fcp_xri_cmpl = list_empty( @@ -10013,8 +10029,6 @@ lpfc_sli4_xri_exchange_busy_wait(struct lpfc_hba *phba) els_xri_cmpl = list_empty(&phba->sli4_hba.lpfc_abts_els_sgl_list); - nvmet_xri_cmpl = - list_empty(&phba->sli4_hba.lpfc_abts_nvmet_sgl_list); } } diff --git a/drivers/scsi/lpfc/lpfc_mbox.c b/drivers/scsi/lpfc/lpfc_mbox.c index a928f5187fa46..ce25a18367b59 100644 --- a/drivers/scsi/lpfc/lpfc_mbox.c +++ b/drivers/scsi/lpfc/lpfc_mbox.c @@ -2083,9 +2083,12 @@ lpfc_request_features(struct lpfc_hba *phba, struct lpfcMboxq *mboxq) if (phba->max_vpi && phba->cfg_enable_npiv) bf_set(lpfc_mbx_rq_ftr_rq_npiv, &mboxq->u.mqe.un.req_ftrs, 1); - if (phba->nvmet_support) + if (phba->nvmet_support) { bf_set(lpfc_mbx_rq_ftr_rq_mrqp, &mboxq->u.mqe.un.req_ftrs, 1); - + /* iaab/iaar NOT set for now */ + bf_set(lpfc_mbx_rq_ftr_rq_iaab, &mboxq->u.mqe.un.req_ftrs, 0); + bf_set(lpfc_mbx_rq_ftr_rq_iaar, &mboxq->u.mqe.un.req_ftrs, 0); + } return; } diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index 278ae00dff83d..f98cbc24d862d 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -868,15 +868,18 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, break; lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR, "6081 NVME Completion Protocol Error: " - "status x%x result x%x placed x%x\n", + "xri %x status x%x result x%x " + "placed x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, lpfc_ncmd->status, lpfc_ncmd->result, wcqe->total_data_placed); break; default: out_err: lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_IOERR, - "6072 NVME Completion Error: " + "6072 NVME Completion Error: xri %x " "status x%x result x%x placed x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, lpfc_ncmd->status, lpfc_ncmd->result, wcqe->total_data_placed); nCmd->transferred_length = 0; @@ -1429,7 +1432,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, phba = vport->phba; /* Announce entry to new IO submit field. */ - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, "6002 Abort Request to rport DID x%06x " "for nvme_fc_req %p\n", pnvme_rport->port_id, @@ -1459,7 +1462,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* The remote node has to be ready to send an abort. */ if ((ndlp->nlp_state != NLP_STE_MAPPED_NODE) && !(ndlp->nlp_type & NLP_NVME_TARGET)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_ABTS, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6048 rport %p, DID x%06x not ready for " "IO. State x%x, Type x%x\n", rport, pnvme_rport->port_id, @@ -1474,7 +1477,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* driver queued commands are in process of being flushed */ if (phba->hba_flag & HBA_NVME_IOQ_FLUSH) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6139 Driver in reset cleanup - flushing " "NVME Req now. hba_flag x%x\n", phba->hba_flag); @@ -1484,13 +1487,13 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private; if (!lpfc_nbuf) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6140 NVME IO req has no matching lpfc nvme " "io buffer. Skipping abort req.\n"); return; } else if (!lpfc_nbuf->nvmeCmd) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6141 lpfc NVME IO req has no nvme_fcreq " "io buffer. Skipping abort req.\n"); return; @@ -1506,7 +1509,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, */ if (lpfc_nbuf->nvmeCmd != pnvme_fcreq) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6143 NVME req mismatch: " "lpfc_nbuf %p nvmeCmd %p, " "pnvme_fcreq %p. Skipping Abort xri x%x\n", @@ -1518,7 +1521,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Don't abort IOs no longer on the pending queue. */ if (!(nvmereq_wqe->iocb_flag & LPFC_IO_ON_TXCMPLQ)) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6142 NVME IO req %p not queued - skipping " "abort req xri x%x\n", pnvme_fcreq, nvmereq_wqe->sli4_xritag); @@ -1532,7 +1535,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, /* Outstanding abort is in progress */ if (nvmereq_wqe->iocb_flag & LPFC_DRIVER_ABORTED) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6144 Outstanding NVME I/O Abort Request " "still pending on nvme_fcreq %p, " "lpfc_ncmd %p xri x%x\n", @@ -1544,7 +1547,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, abts_buf = __lpfc_sli_get_iocbq(phba); if (!abts_buf) { spin_unlock_irqrestore(&phba->hbalock, flags); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6136 No available abort wqes. Skipping " "Abts req for nvme_fcreq %p xri x%x\n", pnvme_fcreq, nvmereq_wqe->sli4_xritag); @@ -1596,7 +1599,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, ret_val = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_buf); spin_unlock_irqrestore(&phba->hbalock, flags); if (ret_val == IOCB_ERROR) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, "6137 Failed abts issue_wqe with status x%x " "for nvme_fcreq %p.\n", ret_val, pnvme_fcreq); @@ -1604,7 +1607,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, return; } - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, "6138 Transport Abort NVME Request Issued for " "ox_id x%x on reqtag x%x\n", nvmereq_wqe->sli4_xritag, @@ -2108,6 +2111,12 @@ lpfc_release_nvme_buf(struct lpfc_hba *phba, struct lpfc_nvme_buf *lpfc_ncmd) lpfc_ncmd->nonsg_phys = 0; if (lpfc_ncmd->flags & LPFC_SBUF_XBUSY) { + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6310 XB release deferred for " + "ox_id x%x on reqtag x%x\n", + lpfc_ncmd->cur_iocbq.sli4_xritag, + lpfc_ncmd->cur_iocbq.iotag); + spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); lpfc_ncmd->nvmeCmd = NULL; @@ -2549,6 +2558,12 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, rxid, 1); lpfc_sli4_abts_err_handler(phba, ndlp, axri); } + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6311 XRI Aborted xri x%x tag x%x " + "released\n", + xri, lpfc_ncmd->cur_iocbq.iotag); + lpfc_release_nvme_buf(phba, lpfc_ncmd); if (rrq_empty) lpfc_worker_wake_up(phba); @@ -2557,4 +2572,8 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, } spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); spin_unlock_irqrestore(&phba->hbalock, iflag); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6312 XRI Aborted xri x%x not found\n", xri); + } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index d0dda428ecb47..f9aafe2a21e25 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -71,6 +71,26 @@ static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, struct lpfc_nvmet_rcv_ctx *, uint32_t, uint16_t); +void +lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) +{ + unsigned long iflag; + + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6313 NVMET Defer ctx release xri x%x flg x%x\n", + ctxp->oxid, ctxp->flag); + + spin_lock_irqsave(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); + if (ctxp->flag & LPFC_NVMET_CTX_RLS) { + spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, + iflag); + return; + } + ctxp->flag |= LPFC_NVMET_CTX_RLS; + list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list); + spin_unlock_irqrestore(&phba->sli4_hba.abts_nvme_buf_list_lock, iflag); +} + /** * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response * @phba: Pointer to HBA context object. @@ -139,6 +159,11 @@ lpfc_nvmet_rq_post(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp, struct lpfc_dmabuf *mp) { if (ctxp) { + if (ctxp->flag) + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6314 rq_post ctx xri x%x flag x%x\n", + ctxp->oxid, ctxp->flag); + if (ctxp->txrdy) { pci_pool_free(phba->txrdy_payload_pool, ctxp->txrdy, ctxp->txrdy_phys); @@ -337,39 +362,55 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, #endif ctxp = cmdwqe->context2; + ctxp->flag &= ~LPFC_NVMET_IO_INP; + rsp = &ctxp->ctx.fcp_req; op = rsp->op; - ctxp->flag &= ~LPFC_NVMET_IO_INP; status = bf_get(lpfc_wcqe_c_status, wcqe); result = wcqe->parameter; - if (!phba->targetport) - goto out; + if (phba->targetport) + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + else + tgtp = NULL; lpfc_nvmeio_data(phba, "NVMET FCP CMPL: xri x%x op x%x status x%x\n", ctxp->oxid, op, status); - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; if (status) { rsp->fcp_error = NVME_SC_DATA_XFER_ERROR; rsp->transferred_length = 0; - atomic_inc(&tgtp->xmt_fcp_rsp_error); + if (tgtp) + atomic_inc(&tgtp->xmt_fcp_rsp_error); + + /* pick up SLI4 exhange busy condition */ + if (bf_get(lpfc_wcqe_c_xb, wcqe)) { + ctxp->flag |= LPFC_NVMET_XBUSY; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6315 IO Cmpl XBUSY: xri x%x: %x/%x\n", + ctxp->oxid, status, result); + } else { + ctxp->flag &= ~LPFC_NVMET_XBUSY; + } + } else { rsp->fcp_error = NVME_SC_SUCCESS; if (op == NVMET_FCOP_RSP) rsp->transferred_length = rsp->rsplen; else rsp->transferred_length = rsp->transfer_length; - atomic_inc(&tgtp->xmt_fcp_rsp_cmpl); + if (tgtp) + atomic_inc(&tgtp->xmt_fcp_rsp_cmpl); } -out: if ((op == NVMET_FCOP_READDATA_RSP) || (op == NVMET_FCOP_RSP)) { /* Sanity check */ ctxp->state = LPFC_NVMET_STE_DONE; ctxp->entry_cnt++; + #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (phba->ktime_on) { if (rsp->op == NVMET_FCOP_READDATA_RSP) { @@ -542,10 +583,11 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, #endif /* Sanity check */ - if (ctxp->state == LPFC_NVMET_STE_ABORT) { + if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) || + (ctxp->state == LPFC_NVMET_STE_ABORT)) { atomic_inc(&lpfc_nvmep->xmt_fcp_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6102 Bad state IO x%x aborted\n", + "6102 IO xri x%x aborted\n", ctxp->oxid); rc = -ENXIO; goto aerr; @@ -615,16 +657,27 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, struct lpfc_nvmet_rcv_ctx *ctxp = container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; + unsigned long flags; lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6103 Abort op: oxri x%x %d cnt %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); + "6103 Abort op: oxri x%x flg x%x cnt %d\n", + ctxp->oxid, ctxp->flag, ctxp->entry_cnt); - lpfc_nvmeio_data(phba, "NVMET FCP ABRT: xri x%x state x%x cnt x%x\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); + lpfc_nvmeio_data(phba, "NVMET FCP ABRT: " + "xri x%x flg x%x cnt x%x\n", + ctxp->oxid, ctxp->flag, ctxp->entry_cnt); atomic_inc(&lpfc_nvmep->xmt_fcp_abort); ctxp->entry_cnt++; + spin_lock_irqsave(&ctxp->ctxlock, flags); + + /* Since iaab/iaar are NOT set, we need to check + * if the firmware is in process of aborting IO + */ + if (ctxp->flag & LPFC_NVMET_XBUSY) { + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + return; + } ctxp->flag |= LPFC_NVMET_ABORT_OP; if (ctxp->flag & LPFC_NVMET_IO_INP) lpfc_nvmet_sol_fcp_issue_abort(phba, ctxp, ctxp->sid, @@ -632,13 +685,13 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, else lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); + spin_unlock_irqrestore(&ctxp->ctxlock, flags); } static void lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { - struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; struct lpfc_nvmet_rcv_ctx *ctxp = container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; @@ -646,27 +699,20 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, bool aborting = false; spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_ABORT_OP) { + if ((ctxp->flag & LPFC_NVMET_ABORT_OP) || + (ctxp->flag & LPFC_NVMET_XBUSY)) { aborting = true; - ctxp->flag |= LPFC_NVMET_CTX_RLS; - } - spin_unlock_irqrestore(&ctxp->ctxlock, flags); - - if (aborting) /* let the abort path do the real release */ - return; - - /* Sanity check */ - if (ctxp->state != LPFC_NVMET_STE_DONE) { - atomic_inc(&lpfc_nvmep->xmt_fcp_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6117 Bad state IO x%x aborted\n", - ctxp->oxid); + lpfc_nvmet_defer_release(phba, ctxp); } + spin_unlock_irqrestore(&ctxp->ctxlock, flags); lpfc_nvmeio_data(phba, "NVMET FCP FREE: xri x%x ste %d\n", ctxp->oxid, ctxp->state, 0); + if (aborting) + return; + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); } @@ -801,7 +847,120 @@ void lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, struct sli4_wcqe_xri_aborted *axri) { - /* TODO: work in progress */ + uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); + uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_nodelist *ndlp; + unsigned long iflag = 0; + int rrq_empty = 0; + bool released = false; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6317 XB aborted xri x%x rxid x%x\n", xri, rxid); + + if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) + return; + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + continue; + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + if (ctxp->flag & LPFC_NVMET_CTX_RLS && + !(ctxp->flag & LPFC_NVMET_ABORT_OP)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_XBUSY; + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + + rrq_empty = list_empty(&phba->active_rrq_list); + spin_unlock_irqrestore(&phba->hbalock, iflag); + ndlp = lpfc_findnode_did(phba->pport, ctxp->sid); + if (ndlp && NLP_CHK_NODE_ACT(ndlp) && + (ndlp->nlp_state == NLP_STE_UNMAPPED_NODE || + ndlp->nlp_state == NLP_STE_MAPPED_NODE)) { + lpfc_set_rrq_active(phba, ndlp, + ctxp->rqb_buffer->sglq->sli4_lxritag, + rxid, 1); + lpfc_sli4_abts_err_handler(phba, ndlp, axri); + } + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6318 XB aborted %x flg x%x (%x)\n", + ctxp->oxid, ctxp->flag, released); + if (released) + lpfc_nvmet_rq_post(phba, ctxp, + &ctxp->rqb_buffer->hbuf); + if (rrq_empty) + lpfc_worker_wake_up(phba); + return; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); +} + +int +lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, + struct fc_frame_header *fc_hdr) + +{ +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) + struct lpfc_hba *phba = vport->phba; + struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct nvmefc_tgt_fcp_req *rsp; + uint16_t xri; + unsigned long iflag = 0; + + xri = be16_to_cpu(fc_hdr->fh_ox_id); + + spin_lock_irqsave(&phba->hbalock, iflag); + spin_lock(&phba->sli4_hba.abts_nvme_buf_list_lock); + list_for_each_entry_safe(ctxp, next_ctxp, + &phba->sli4_hba.lpfc_abts_nvmet_ctx_list, + list) { + if (ctxp->rqb_buffer->sglq->sli4_xritag != xri) + continue; + + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); + + spin_lock_irqsave(&ctxp->ctxlock, iflag); + ctxp->flag |= LPFC_NVMET_ABTS_RCV; + spin_unlock_irqrestore(&ctxp->ctxlock, iflag); + + lpfc_nvmeio_data(phba, + "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", + xri, smp_processor_id(), 0); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6319 NVMET Rcv ABTS:acc xri x%x\n", xri); + + rsp = &ctxp->ctx.fcp_req; + nvmet_fc_rcv_fcp_abort(phba->targetport, rsp); + + /* Respond with BA_ACC accordingly */ + lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 1); + return 0; + } + spin_unlock(&phba->sli4_hba.abts_nvme_buf_list_lock); + spin_unlock_irqrestore(&phba->hbalock, iflag); + + lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", + xri, smp_processor_id(), 1); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6320 NVMET Rcv ABTS:rjt xri x%x\n", xri); + + /* Respond with BA_RJT accordingly */ + lpfc_sli4_seq_abort_rsp(vport, fc_hdr, 0); + return 0; +#endif } void @@ -1655,18 +1814,27 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; atomic_inc(&tgtp->xmt_abort_cmpl); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6165 Abort cmpl: xri x%x WCQE: %08x %08x %08x %08x\n", - ctxp->oxid, wcqe->word0, wcqe->total_data_placed, - result, wcqe->word3); - ctxp->state = LPFC_NVMET_STE_DONE; + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_CTX_RLS) + if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && + !(ctxp->flag & LPFC_NVMET_XBUSY)) { + list_del(&ctxp->list); released = true; + } ctxp->flag &= ~LPFC_NVMET_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6165 ABORT cmpl: xri x%x flg x%x (%d) " + "WCQE: %08x %08x %08x %08x\n", + ctxp->oxid, ctxp->flag, released, + wcqe->word0, wcqe->total_data_placed, + result, wcqe->word3); + /* * if transport has released ctx, then can reuse it. Otherwise, * will be recycled by transport release call. @@ -1677,10 +1845,15 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, cmdwqe->context2 = NULL; cmdwqe->context3 = NULL; lpfc_sli_release_iocbq(phba, cmdwqe); + + /* Since iaab/iaar are NOT set, there is no work left. + * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * should have been called already. + */ } /** - * lpfc_nvmet_xmt_fcp_abort_cmp - Completion handler for ABTS + * lpfc_nvmet_unsol_fcp_abort_cmp - Completion handler for ABTS * @phba: Pointer to HBA context object. * @cmdwqe: Pointer to driver command WQE object. * @wcqe: Pointer to driver response CQE object. @@ -1690,8 +1863,8 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, * The function frees memory resources used for the NVME commands. **/ static void -lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - struct lpfc_wcqe_complete *wcqe) +lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) { struct lpfc_nvmet_rcv_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; @@ -1706,35 +1879,55 @@ lpfc_nvmet_xmt_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; atomic_inc(&tgtp->xmt_abort_cmpl); + if (!ctxp) { + /* if context is clear, related io alrady complete */ + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6070 ABTS cmpl: WCQE: %08x %08x %08x %08x\n", + wcqe->word0, wcqe->total_data_placed, + result, wcqe->word3); + return; + } + + /* Sanity check */ + if (ctxp->state != LPFC_NVMET_STE_ABORT) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6112 ABTS Wrong state:%d oxid x%x\n", + ctxp->state, ctxp->oxid); + } + + /* Check if we already received a free context call + * and we have completed processing an abort situation. + */ + ctxp->state = LPFC_NVMET_STE_DONE; + spin_lock_irqsave(&ctxp->ctxlock, flags); + if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && + !(ctxp->flag & LPFC_NVMET_XBUSY)) { + list_del(&ctxp->list); + released = true; + } + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + spin_unlock_irqrestore(&ctxp->ctxlock, flags); + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6070 Abort cmpl: ctx %p WCQE: %08x %08x %08x %08x\n", - ctxp, wcqe->word0, wcqe->total_data_placed, + "6316 ABTS cmpl xri x%x flg x%x (%x) " + "WCQE: %08x %08x %08x %08x\n", + ctxp->oxid, ctxp->flag, released, + wcqe->word0, wcqe->total_data_placed, result, wcqe->word3); + /* + * if transport has released ctx, then can reuse it. Otherwise, + * will be recycled by transport release call. + */ + if (released) + lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); - if (ctxp) { - /* Sanity check */ - if (ctxp->state != LPFC_NVMET_STE_ABORT) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6112 ABORT Wrong state:%d oxid x%x\n", - ctxp->state, ctxp->oxid); - } - ctxp->state = LPFC_NVMET_STE_DONE; - spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_CTX_RLS) - released = true; - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; - spin_unlock_irqrestore(&ctxp->ctxlock, flags); - - /* - * if transport has released ctx, then can reuse it. Otherwise, - * will be recycled by transport release call. - */ - if (released) - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + cmdwqe->context2 = NULL; + cmdwqe->context3 = NULL; - cmdwqe->context2 = NULL; - cmdwqe->context3 = NULL; - } + /* Since iaab/iaar are NOT set, there is no work left. + * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * should have been called already. + */ } /** @@ -1787,10 +1980,14 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp; lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, - "6067 Abort: sid %x xri x%x/x%x\n", + "6067 ABTS: sid %x xri x%x/x%x\n", sid, xri, ctxp->wqeq->sli4_xritag); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (!ctxp->wqeq) { + ctxp->wqeq = ctxp->rqb_buffer->iocbq; + ctxp->wqeq->hba_wqidx = 0; + } ndlp = lpfc_findnode_did(phba->pport, sid); if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || @@ -1896,10 +2093,11 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, - "6160 Drop ABTS - wrong NDLP state x%x.\n", + "6160 Drop ABORT - wrong NDLP state x%x.\n", (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); /* No failure to an ABTS request. */ + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1907,9 +2105,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->abort_wqeq = lpfc_sli_get_iocbq(phba); if (!ctxp->abort_wqeq) { lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, - "6161 Abort failed: No wqeqs: " + "6161 ABORT failed: No wqeqs: " "xri: x%x\n", ctxp->oxid); /* No failure to an ABTS request. */ + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } abts_wqeq = ctxp->abort_wqeq; @@ -1917,8 +2116,8 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->state = LPFC_NVMET_STE_ABORT; /* Announce entry to new IO submit field. */ - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, - "6162 Abort Request to rport DID x%06x " + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, + "6162 ABORT Request to rport DID x%06x " "for xri x%x x%x\n", ctxp->sid, ctxp->oxid, ctxp->wqeq->sli4_xritag); @@ -1934,6 +2133,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "NVME Req now. hba_flag x%x oxid x%x\n", phba->hba_flag, ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1945,6 +2145,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "still pending on oxid x%x\n", ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; return 0; } @@ -1992,9 +2193,10 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, if (rc == WQE_SUCCESS) return 0; + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; lpfc_sli_release_iocbq(phba, abts_wqeq); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME, - "6166 Failed abts issue_wqe with status x%x " + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, + "6166 Failed ABORT issue_wqe with status x%x " "for oxid x%x.\n", rc, ctxp->oxid); return 1; @@ -2023,8 +2225,8 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, spin_lock_irqsave(&phba->hbalock, flags); abts_wqeq = ctxp->wqeq; - abts_wqeq->wqe_cmpl = lpfc_nvmet_xmt_fcp_abort_cmp; - abts_wqeq->iocb_cmpl = 0; + abts_wqeq->wqe_cmpl = lpfc_nvmet_unsol_fcp_abort_cmp; + abts_wqeq->iocb_cmpl = NULL; abts_wqeq->iocb_flag |= LPFC_IO_NVMET; rc = lpfc_sli4_issue_wqe(phba, LPFC_FCP_RING, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); @@ -2034,7 +2236,7 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, } aerr: - lpfc_nvmet_rq_post(phba, ctxp, &ctxp->rqb_buffer->hbuf); + ctxp->flag &= ~LPFC_NVMET_ABORT_OP; atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_WARNING, LOG_NVME_ABTS, "6135 Failed to Issue ABTS for oxid x%x. Status x%x\n", diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h index d8bac4c61541b..128759fe66505 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ b/drivers/scsi/lpfc/lpfc_nvmet.h @@ -75,6 +75,7 @@ struct lpfc_nvmet_rcv_ctx { struct nvmefc_tgt_ls_req ls_req; struct nvmefc_tgt_fcp_req fcp_req; } ctx; + struct list_head list; struct lpfc_hba *phba; struct lpfc_iocbq *wqeq; struct lpfc_iocbq *abort_wqeq; @@ -96,10 +97,11 @@ struct lpfc_nvmet_rcv_ctx { #define LPFC_NVMET_STE_RSP 4 #define LPFC_NVMET_STE_DONE 5 uint16_t flag; -#define LPFC_NVMET_IO_INP 0x1 -#define LPFC_NVMET_ABORT_OP 0x2 -#define LPFC_NVMET_CTX_RLS 0x4 - +#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ +#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ +#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ +#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ +#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ struct rqb_dmabuf *rqb_buffer; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index a6adc2b04ba3e..cf19f4976f5fb 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -16521,7 +16521,7 @@ lpfc_sli4_xri_inrange(struct lpfc_hba *phba, * This function sends a basic response to a previous unsol sequence abort * event after aborting the sequence handling. **/ -static void +void lpfc_sli4_seq_abort_rsp(struct lpfc_vport *vport, struct fc_frame_header *fc_hdr, bool aborted) { @@ -16697,6 +16697,11 @@ lpfc_sli4_handle_unsol_abort(struct lpfc_vport *vport, } lpfc_in_buf_free(phba, &dmabuf->dbuf); + if (phba->nvmet_support) { + lpfc_nvmet_rcv_unsol_abort(vport, &fc_hdr); + return; + } + /* Respond with BA_ACC or BA_RJT accordingly */ lpfc_sli4_seq_abort_rsp(vport, &fc_hdr, aborted); } diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h index 710458cf11d62..da46471337c8a 100644 --- a/drivers/scsi/lpfc/lpfc_sli4.h +++ b/drivers/scsi/lpfc/lpfc_sli4.h @@ -620,7 +620,7 @@ struct lpfc_sli4_hba { struct list_head lpfc_els_sgl_list; struct list_head lpfc_abts_els_sgl_list; struct list_head lpfc_nvmet_sgl_list; - struct list_head lpfc_abts_nvmet_sgl_list; + struct list_head lpfc_abts_nvmet_ctx_list; struct list_head lpfc_abts_scsi_buf_list; struct list_head lpfc_abts_nvme_buf_list; struct lpfc_sglq **lpfc_sglq_active_list; From 7e04e21afa82ef024416f5413b5bdb66e0505bcd Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:05:05 -0700 Subject: [PATCH 30/39] Fix Express lane queue creation. The older sli4 adapters only supported the 64 byte WQE entry size. The new adapter (fw) support both 64 and 128 byte WQE entry sizies. The Express lane WQ was not being created with the 128 byte WQE sizes when it was supported. Not having the right WQE size created for the express lane work queue caused the the firmware to overwrite the lun indentifier in the FCP header. This patch correctly creates the express lane work queue with the supported size. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_init.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 74cec2232a864..90ae354a9c458 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -12040,6 +12040,7 @@ int lpfc_fof_queue_create(struct lpfc_hba *phba) { struct lpfc_queue *qdesc; + uint32_t wqesize; /* Create FOF EQ */ qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.eq_esize, @@ -12060,8 +12061,11 @@ lpfc_fof_queue_create(struct lpfc_hba *phba) phba->sli4_hba.oas_cq = qdesc; /* Create OAS WQ */ - qdesc = lpfc_sli4_queue_alloc(phba, phba->sli4_hba.wq_esize, + wqesize = (phba->fcp_embed_io) ? + LPFC_WQE128_SIZE : phba->sli4_hba.wq_esize; + qdesc = lpfc_sli4_queue_alloc(phba, wqesize, phba->sli4_hba.wq_ecount); + if (!qdesc) goto out_error; From 51f397642c890e6c54e2532110c84519f1d12e4c Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 16:05:06 -0700 Subject: [PATCH 31/39] lpfc revison 11.2.0.12 Update lpfc version to reflect this set of changes. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn --- drivers/scsi/lpfc/lpfc_version.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h index d4e95e28f4e3d..1c26dc67151b6 100644 --- a/drivers/scsi/lpfc/lpfc_version.h +++ b/drivers/scsi/lpfc/lpfc_version.h @@ -20,7 +20,7 @@ * included with this package. * *******************************************************************/ -#define LPFC_DRIVER_VERSION "11.2.0.10" +#define LPFC_DRIVER_VERSION "11.2.0.12" #define LPFC_DRIVER_NAME "lpfc" /* Used for SLI 2/3 */ From 76e4ad09a30cfb589eb436ad067f4abecb82cc04 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 21 Apr 2017 16:19:22 -0700 Subject: [PATCH 32/39] nvme: Fix APST comment There was a typo in the description of the timeout heuristic. Signed-off-by: Andy Lutomirski Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index bf6729b1d8bf7..30446e29dbd9f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1325,7 +1325,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) * heuristic: we are willing to spend at most 2% of the time * transitioning between power states. Therefore, when running * in any given state, we will enter the next lower-power - * non-operational state after waiting 100 * (enlat + exlat) + * non-operational state after waiting 50 * (enlat + exlat) * microseconds, as long as that state's total latency is under * the requested maximum latency. * From fb0dc3993b537e12ce63511d535ff86efff13c8f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 21 Apr 2017 16:19:23 -0700 Subject: [PATCH 33/39] nvme: Display raw APST configuration via DYNAMIC_DEBUG Debugging APST is currently a bit of a pain. This gives optional simple log messages that describe the APST state. The easiest way to use this is probably with the nvme_core.dyndbg=+p module parameter. Signed-off-by: Andy Lutomirski Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 30446e29dbd9f..3c9547b94541f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1336,6 +1336,8 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) unsigned apste; struct nvme_feat_auto_pst *table; + u64 max_lat_us = 0; + int max_ps = -1; int ret; /* @@ -1357,6 +1359,7 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) if (ctrl->ps_max_latency_us == 0) { /* Turn off APST. */ apste = 0; + dev_dbg(ctrl->device, "APST disabled\n"); } else { __le64 target = cpu_to_le64(0); int state; @@ -1406,9 +1409,22 @@ static void nvme_configure_apst(struct nvme_ctrl *ctrl) target = cpu_to_le64((state << 3) | (transition_ms << 8)); + + if (max_ps == -1) + max_ps = state; + + if (total_latency_us > max_lat_us) + max_lat_us = total_latency_us; } apste = 1; + + if (max_ps == -1) { + dev_dbg(ctrl->device, "APST enabled but no non-operational states are available\n"); + } else { + dev_dbg(ctrl->device, "APST enabled: max PS = %d, max round-trip latency = %lluus, table = %*phN\n", + max_ps, max_lat_us, (int)sizeof(*table), table); + } } ret = nvme_set_features(ctrl, NVME_FEAT_AUTO_PST, apste, From c35e30b4727b390ce7a6dd7ead31335320c2b83e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Fri, 21 Apr 2017 16:19:24 -0700 Subject: [PATCH 34/39] nvme: Add nvme_core.force_apst to ignore the NO_APST quirk We're probably going to be stuck quirking APST off on an over-broad range of devices for 4.11. Let's make it easy to override the quirk for testing. Signed-off-by: Andy Lutomirski Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3c9547b94541f..d5e0906262ead 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -61,6 +61,10 @@ module_param(default_ps_max_latency_us, ulong, 0644); MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device"); +static bool force_apst; +module_param(force_apst, bool, 0644); +MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off"); + static LIST_HEAD(nvme_ctrl_list); static DEFINE_SPINLOCK(dev_list_lock); @@ -1562,6 +1566,11 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) } } + if (force_apst && (ctrl->quirks & NVME_QUIRK_NO_DEEPEST_PS)) { + dev_warn(ctrl->dev, "forcibly allowing all power states due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->quirks &= ~NVME_QUIRK_NO_DEEPEST_PS; + } + ctrl->oacs = le16_to_cpu(id->oacs); ctrl->vid = le16_to_cpu(id->vid); ctrl->oncs = le16_to_cpup(&id->oncs); @@ -1584,7 +1593,16 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->npss = id->npss; prev_apsta = ctrl->apsta; - ctrl->apsta = (ctrl->quirks & NVME_QUIRK_NO_APST) ? 0 : id->apsta; + if (ctrl->quirks & NVME_QUIRK_NO_APST) { + if (force_apst && id->apsta) { + dev_warn(ctrl->dev, "forcibly allowing APST due to nvme_core.force_apst -- use at your own risk\n"); + ctrl->apsta = 1; + } else { + ctrl->apsta = 0; + } + } else { + ctrl->apsta = id->apsta; + } memcpy(ctrl->psd, id->psd, sizeof(ctrl->psd)); if (ctrl->ops->is_fabrics) { From bbe3012b73b592dfe5f4340a14373840f51887b6 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 21 Apr 2017 17:49:08 -0700 Subject: [PATCH 35/39] lpfc: Fix memory corruption of the lpfc_ncmd->list pointers lpfc was changing the private pointer that is set/maintained by the nvme_fc transport. This caused two issues: a) the transport, on teardown may erroneous attempt to free whatever address was set; and b) lfpc uses any value set in lpfc_nvme_fcp_abort() and assumes its a valid io request. Correct issue by properly defining a context structure for lpfc. Lpfc also updated to clear the private context structure on io completion. Since this bug caused scrutiny of the way lpfc moves local request structures between lists, also cleaned up list_del()'s to list_del_inits()'s. This is a nvme-specific bug. The patch was cut against the linux-block tree, for-4.12/block tree. It should be pulled in through that tree. Signed-off-by: Dick Kennedy Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/scsi/lpfc/lpfc_nvme.c | 17 +++++++++++------ drivers/scsi/lpfc/lpfc_nvme.h | 4 ++++ 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index f98cbc24d862d..8008c8205fb63 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -761,6 +761,7 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, struct nvme_fc_cmd_iu *cp; struct lpfc_nvme_rport *rport; struct lpfc_nodelist *ndlp; + struct lpfc_nvme_fcpreq_priv *freqpriv; unsigned long flags; uint32_t code; uint16_t cid, sqhd, data; @@ -918,6 +919,8 @@ lpfc_nvme_io_cmd_wqe_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *pwqeIn, phba->cpucheck_cmpl_io[lpfc_ncmd->cpu]++; } #endif + freqpriv = nCmd->private; + freqpriv->nvme_buf = NULL; nCmd->done(nCmd); spin_lock_irqsave(&phba->hbalock, flags); @@ -1214,6 +1217,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nvme_buf *lpfc_ncmd; struct lpfc_nvme_rport *rport; struct lpfc_nvme_qhandle *lpfc_queue_info; + struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS uint64_t start = 0; #endif @@ -1292,7 +1296,7 @@ lpfc_nvme_fcp_io_submit(struct nvme_fc_local_port *pnvme_lport, * Do not let the IO hang out forever. There is no midlayer issuing * an abort so inform the FW of the maximum IO pending time. */ - pnvme_fcreq->private = (void *)lpfc_ncmd; + freqpriv->nvme_buf = lpfc_ncmd; lpfc_ncmd->nvmeCmd = pnvme_fcreq; lpfc_ncmd->nrport = rport; lpfc_ncmd->ndlp = ndlp; @@ -1422,6 +1426,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, struct lpfc_nvme_buf *lpfc_nbuf; struct lpfc_iocbq *abts_buf; struct lpfc_iocbq *nvmereq_wqe; + struct lpfc_nvme_fcpreq_priv *freqpriv = pnvme_fcreq->private; union lpfc_wqe *abts_wqe; unsigned long flags; int ret_val; @@ -1484,7 +1489,7 @@ lpfc_nvme_fcp_abort(struct nvme_fc_local_port *pnvme_lport, return; } - lpfc_nbuf = (struct lpfc_nvme_buf *)pnvme_fcreq->private; + lpfc_nbuf = freqpriv->nvme_buf; if (!lpfc_nbuf) { spin_unlock_irqrestore(&phba->hbalock, flags); lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, @@ -1637,7 +1642,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = { .local_priv_sz = sizeof(struct lpfc_nvme_lport), .remote_priv_sz = sizeof(struct lpfc_nvme_rport), .lsrqst_priv_sz = 0, - .fcprqst_priv_sz = 0, + .fcprqst_priv_sz = sizeof(struct lpfc_nvme_fcpreq_priv), }; /** @@ -2068,7 +2073,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) if (lpfc_test_rrq_active(phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag)) continue; - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); found = 1; break; } @@ -2083,7 +2088,7 @@ lpfc_get_nvme_buf(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) if (lpfc_test_rrq_active( phba, ndlp, lpfc_ncmd->cur_iocbq.sli4_lxritag)) continue; - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); found = 1; break; } @@ -2542,7 +2547,7 @@ lpfc_sli4_nvme_xri_aborted(struct lpfc_hba *phba, &phba->sli4_hba.lpfc_abts_nvme_buf_list, list) { if (lpfc_ncmd->cur_iocbq.sli4_xritag == xri) { - list_del(&lpfc_ncmd->list); + list_del_init(&lpfc_ncmd->list); lpfc_ncmd->flags &= ~LPFC_SBUF_XBUSY; lpfc_ncmd->status = IOSTAT_SUCCESS; spin_unlock( diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 2582f46edf05c..ec32f45daa667 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -97,3 +97,7 @@ struct lpfc_nvme_buf { uint64_t ts_data_nvme; #endif }; + +struct lpfc_nvme_fcpreq_priv { + struct lpfc_nvme_buf *nvme_buf; +}; From de41447aac034c4acc8d9d1ddbdcb7ce4e8a3f6f Mon Sep 17 00:00:00 2001 From: "Ewan D. Milne" Date: Mon, 24 Apr 2017 13:24:16 -0400 Subject: [PATCH 36/39] nvme-fc: avoid memory corruption caused by calling nvmf_free_options() twice Do not call nvmf_free_options() from the nvme_fc_ctlr destructor if nvme_fc_create_ctrl() returns an error, because nvmf_create_ctrl() frees the options when an error is returned. Signed-off-by: Ewan D. Milne Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e73862ebb8b46..4976db56e3519 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1716,7 +1716,8 @@ nvme_fc_ctrl_free(struct kref *ref) nvme_fc_rport_put(ctrl->rport); ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); - nvmf_free_options(ctrl->ctrl.opts); + if (ctrl->ctrl.opts) + nvmf_free_options(ctrl->ctrl.opts); kfree(ctrl); } @@ -2807,6 +2808,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = nvme_fc_create_association(ctrl); if (ret) { + ctrl->ctrl.opts = NULL; /* initiate nvme ctrl ref counting teardown */ nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); From 7fad1fd46ccf3ee283052e948cf91edd0cd9b1c7 Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Mon, 24 Apr 2017 18:02:43 -0600 Subject: [PATCH 37/39] nvme-scsi: Consider LBA format in IO splitting calculation The current command submission code uses a sector-based value when considering the maximum number of blocks per command. With a 4k-formatted namespace and a command exceeding max hardware limits, this calculation doesn't split IOs which should be split and fails in the nvme layer. This patch fixes that calculation and enables IO splitting in these circumstances. Signed-off-by: Jon Derrick Reviewed-by: Jens Axboe Signed-off-by: Christoph Hellwig --- drivers/nvme/host/scsi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index f49ae2758bb70..988da610d6aaf 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -1609,7 +1609,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr, struct nvme_command c; u8 opcode = (is_write ? nvme_cmd_write : nvme_cmd_read); u16 control; - u32 max_blocks = queue_max_hw_sectors(ns->queue); + u32 max_blocks = queue_max_hw_sectors(ns->queue) >> (ns->lba_shift - 9); num_cmds = nvme_trans_io_get_num_cmds(hdr, cdb_info, max_blocks); From 25d9baa47505ead1bcae7334991363c3bbfa1831 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 21 Apr 2017 08:26:57 +0200 Subject: [PATCH 38/39] nvme-lightnvm: add missing endianess conversion in nvme_nvm_end_io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found by sparse. Signed-off-by: Christoph Hellwig Reviewed-by: Matias Bjørling --- drivers/nvme/host/lightnvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index de61a4a03d782..e4e4e60b1224f 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -483,7 +483,7 @@ static void nvme_nvm_end_io(struct request *rq, int error) { struct nvm_rq *rqd = rq->end_io_data; - rqd->ppa_status = nvme_req(rq)->result.u64; + rqd->ppa_status = le64_to_cpu(nvme_req(rq)->result.u64); rqd->error = nvme_req(rq)->status; nvm_end_io(rqd); From 7569b90a228ed7dfdc1f92f2c98d7a1b041f22eb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Apr 2017 18:56:44 +0200 Subject: [PATCH 39/39] nvme-scsi: remove nvme_trans_security_protocol This function just returns the same error code and sense data as the default statement in the switch in the caller. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/scsi.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/drivers/nvme/host/scsi.c b/drivers/nvme/host/scsi.c index 988da610d6aaf..1f7671e631dd0 100644 --- a/drivers/nvme/host/scsi.c +++ b/drivers/nvme/host/scsi.c @@ -2138,15 +2138,6 @@ static int nvme_trans_request_sense(struct nvme_ns *ns, struct sg_io_hdr *hdr, return res; } -static int nvme_trans_security_protocol(struct nvme_ns *ns, - struct sg_io_hdr *hdr, - u8 *cmd) -{ - return nvme_trans_completion(hdr, SAM_STAT_CHECK_CONDITION, - ILLEGAL_REQUEST, SCSI_ASC_ILLEGAL_COMMAND, - SCSI_ASCQ_CAUSE_NOT_REPORTABLE); -} - static int nvme_trans_synchronize_cache(struct nvme_ns *ns, struct sg_io_hdr *hdr) { @@ -2414,10 +2405,6 @@ static int nvme_scsi_translate(struct nvme_ns *ns, struct sg_io_hdr *hdr) case REQUEST_SENSE: retcode = nvme_trans_request_sense(ns, hdr, cmd); break; - case SECURITY_PROTOCOL_IN: - case SECURITY_PROTOCOL_OUT: - retcode = nvme_trans_security_protocol(ns, hdr, cmd); - break; case SYNCHRONIZE_CACHE: retcode = nvme_trans_synchronize_cache(ns, hdr); break;