Skip to content

Commit

Permalink
Merge branch 'nvme-4.20' of git://git.infradead.org/nvme into for-4.2…
Browse files Browse the repository at this point in the history
…0/block

Pull NVMe updates from Christoph:

"A relatively boring merge window:

 - better AEN tracing (Chaitanya)
 - NUMA aware PCIe multipathing (me)
 - RDMA workqueue fixes (Sagi)
 - better bio usage in the target (Sagi)
 - FC rework for target removal (James)
 - better multipath handling of ->queue_rq failures (James)
 - various cleanups (Milan)"

* 'nvme-4.20' of git://git.infradead.org/nvme:
  nvmet-rdma: use a private workqueue for delete
  nvme: take node locality into account when selecting a path
  nvmet: don't split large I/Os unconditionally
  nvme: call nvme_complete_rq when nvmf_check_ready fails for mpath I/O
  nvme-core: add async event trace helper
  nvme_fc: add 'nvme_discovery' sysfs attribute to fc transport device
  nvmet_fc: support target port removal with nvmet layer
  nvme-fc: fix for a minor typos
  nvmet: remove redundant module prefix
  nvme: fix typo in nvme_identify_ns_descs
  • Loading branch information
Jens Axboe committed Oct 5, 2018
2 parents 9305455 + 2acf70a commit 4f5735f
Show file tree
Hide file tree
Showing 12 changed files with 347 additions and 60 deletions.
20 changes: 16 additions & 4 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -971,7 +971,7 @@ static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid,
uuid_copy(&ids->uuid, data + pos + sizeof(*cur));
break;
default:
/* Skip unnkown types */
/* Skip unknown types */
len = cur->nidl;
break;
}
Expand Down Expand Up @@ -2908,9 +2908,14 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
unsigned nsid, struct nvme_id_ns *id)
{
struct nvme_ns_head *head;
size_t size = sizeof(*head);
int ret = -ENOMEM;

head = kzalloc(sizeof(*head), GFP_KERNEL);
#ifdef CONFIG_NVME_MULTIPATH
size += num_possible_nodes() * sizeof(struct nvme_ns *);
#endif

head = kzalloc(size, GFP_KERNEL);
if (!head)
goto out;
ret = ida_simple_get(&ctrl->subsys->ns_ida, 1, 0, GFP_KERNEL);
Expand Down Expand Up @@ -3408,16 +3413,21 @@ static void nvme_fw_act_work(struct work_struct *work)

static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
{
switch ((result & 0xff00) >> 8) {
u32 aer_notice_type = (result & 0xff00) >> 8;

switch (aer_notice_type) {
case NVME_AER_NOTICE_NS_CHANGED:
trace_nvme_async_event(ctrl, aer_notice_type);
set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
trace_nvme_async_event(ctrl, aer_notice_type);
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
trace_nvme_async_event(ctrl, aer_notice_type);
if (!ctrl->ana_log_buf)
break;
queue_work(nvme_wq, &ctrl->ana_work);
Expand All @@ -3432,18 +3442,20 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status,
volatile union nvme_result *res)
{
u32 result = le32_to_cpu(res->u32);
u32 aer_type = result & 0x07;

if (le16_to_cpu(status) >> 1 != NVME_SC_SUCCESS)
return;

switch (result & 0x7) {
switch (aer_type) {
case NVME_AER_NOTICE:
nvme_handle_aen_notice(ctrl, result);
break;
case NVME_AER_ERROR:
case NVME_AER_SMART:
case NVME_AER_CSS:
case NVME_AER_VS:
trace_nvme_async_event(ctrl, aer_type);
ctrl->aen_result = result;
break;
default:
Expand Down
7 changes: 5 additions & 2 deletions drivers/nvme/host/fabrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -552,8 +552,11 @@ blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
ctrl->state != NVME_CTRL_DEAD &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
return BLK_STS_RESOURCE;
nvme_req(rq)->status = NVME_SC_ABORT_REQ;
return BLK_STS_IOERR;

nvme_req(rq)->status = NVME_SC_HOST_PATH_ERROR;
blk_mq_start_request(rq);
nvme_complete_rq(rq);
return BLK_STS_OK;
}
EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);

Expand Down
108 changes: 97 additions & 11 deletions drivers/nvme/host/fc.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ struct nvme_fc_rport {
struct list_head endp_list; /* for lport->endp_list */
struct list_head ctrl_list;
struct list_head ls_req_list;
struct list_head disc_list;
struct device *dev; /* physical device for dma */
struct nvme_fc_lport *lport;
spinlock_t lock;
Expand Down Expand Up @@ -210,7 +211,6 @@ static DEFINE_IDA(nvme_fc_ctrl_cnt);
* These items are short-term. They will eventually be moved into
* a generic FC class. See comments in module init.
*/
static struct class *fc_class;
static struct device *fc_udev_device;


Expand Down Expand Up @@ -507,6 +507,7 @@ nvme_fc_free_rport(struct kref *ref)
list_del(&rport->endp_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);

WARN_ON(!list_empty(&rport->disc_list));
ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num);

kfree(rport);
Expand Down Expand Up @@ -694,6 +695,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
INIT_LIST_HEAD(&newrec->endp_list);
INIT_LIST_HEAD(&newrec->ctrl_list);
INIT_LIST_HEAD(&newrec->ls_req_list);
INIT_LIST_HEAD(&newrec->disc_list);
kref_init(&newrec->ref);
atomic_set(&newrec->act_ctrl_cnt, 0);
spin_lock_init(&newrec->lock);
Expand Down Expand Up @@ -1385,7 +1387,7 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)

__nvme_fc_finish_ls_req(lsop);

/* fc-nvme iniator doesn't care about success or failure of cmd */
/* fc-nvme initiator doesn't care about success or failure of cmd */

kfree(lsop);
}
Expand Down Expand Up @@ -3159,7 +3161,7 @@ nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen)
substring_t wwn = { name, &name[sizeof(name)-1] };
int nnoffset, pnoffset;

/* validate it string one of the 2 allowed formats */
/* validate if string is one of the 2 allowed formats */
if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH &&
!strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) &&
!strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET],
Expand Down Expand Up @@ -3254,6 +3256,90 @@ static struct nvmf_transport_ops nvme_fc_transport = {
.create_ctrl = nvme_fc_create_ctrl,
};

/* Arbitrary successive failures max. With lots of subsystems could be high */
#define DISCOVERY_MAX_FAIL 20

static ssize_t nvme_fc_nvme_discovery_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t count)
{
unsigned long flags;
LIST_HEAD(local_disc_list);
struct nvme_fc_lport *lport;
struct nvme_fc_rport *rport;
int failcnt = 0;

spin_lock_irqsave(&nvme_fc_lock, flags);
restart:
list_for_each_entry(lport, &nvme_fc_lport_list, port_list) {
list_for_each_entry(rport, &lport->endp_list, endp_list) {
if (!nvme_fc_lport_get(lport))
continue;
if (!nvme_fc_rport_get(rport)) {
/*
* This is a temporary condition. Upon restart
* this rport will be gone from the list.
*
* Revert the lport put and retry. Anything
* added to the list already will be skipped (as
* they are no longer list_empty). Loops should
* resume at rports that were not yet seen.
*/
nvme_fc_lport_put(lport);

if (failcnt++ < DISCOVERY_MAX_FAIL)
goto restart;

pr_err("nvme_discovery: too many reference "
"failures\n");
goto process_local_list;
}
if (list_empty(&rport->disc_list))
list_add_tail(&rport->disc_list,
&local_disc_list);
}
}

process_local_list:
while (!list_empty(&local_disc_list)) {
rport = list_first_entry(&local_disc_list,
struct nvme_fc_rport, disc_list);
list_del_init(&rport->disc_list);
spin_unlock_irqrestore(&nvme_fc_lock, flags);

lport = rport->lport;
/* signal discovery. Won't hurt if it repeats */
nvme_fc_signal_discovery_scan(lport, rport);
nvme_fc_rport_put(rport);
nvme_fc_lport_put(lport);

spin_lock_irqsave(&nvme_fc_lock, flags);
}
spin_unlock_irqrestore(&nvme_fc_lock, flags);

return count;
}
static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store);

static struct attribute *nvme_fc_attrs[] = {
&dev_attr_nvme_discovery.attr,
NULL
};

static struct attribute_group nvme_fc_attr_group = {
.attrs = nvme_fc_attrs,
};

static const struct attribute_group *nvme_fc_attr_groups[] = {
&nvme_fc_attr_group,
NULL
};

static struct class fc_class = {
.name = "fc",
.dev_groups = nvme_fc_attr_groups,
.owner = THIS_MODULE,
};

static int __init nvme_fc_init_module(void)
{
int ret;
Expand All @@ -3272,16 +3358,16 @@ static int __init nvme_fc_init_module(void)
* put in place, this code will move to a more generic
* location for the class.
*/
fc_class = class_create(THIS_MODULE, "fc");
if (IS_ERR(fc_class)) {
ret = class_register(&fc_class);
if (ret) {
pr_err("couldn't register class fc\n");
return PTR_ERR(fc_class);
return ret;
}

/*
* Create a device for the FC-centric udev events
*/
fc_udev_device = device_create(fc_class, NULL, MKDEV(0, 0), NULL,
fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL,
"fc_udev_device");
if (IS_ERR(fc_udev_device)) {
pr_err("couldn't create fc_udev device!\n");
Expand All @@ -3296,9 +3382,9 @@ static int __init nvme_fc_init_module(void)
return 0;

out_destroy_device:
device_destroy(fc_class, MKDEV(0, 0));
device_destroy(&fc_class, MKDEV(0, 0));
out_destroy_class:
class_destroy(fc_class);
class_unregister(&fc_class);
return ret;
}

Expand All @@ -3313,8 +3399,8 @@ static void __exit nvme_fc_exit_module(void)
ida_destroy(&nvme_fc_local_port_cnt);
ida_destroy(&nvme_fc_ctrl_cnt);

device_destroy(fc_class, MKDEV(0, 0));
class_destroy(fc_class);
device_destroy(&fc_class, MKDEV(0, 0));
class_unregister(&fc_class);
}

module_init(nvme_fc_init_module);
Expand Down
57 changes: 46 additions & 11 deletions drivers/nvme/host/multipath.c
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,13 @@ void nvme_failover_req(struct request *req)
queue_work(nvme_wq, &ns->ctrl->ana_work);
}
break;
case NVME_SC_HOST_PATH_ERROR:
/*
* Temporary transport disruption in talking to the controller.
* Try to send on a new path.
*/
nvme_mpath_clear_current_path(ns);
break;
default:
/*
* Reset the controller for any non-ANA error as we don't know
Expand Down Expand Up @@ -110,29 +117,55 @@ static const char *nvme_ana_state_names[] = {
[NVME_ANA_CHANGE] = "change",
};

static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head)
void nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
struct nvme_ns *ns, *fallback = NULL;
struct nvme_ns_head *head = ns->head;
int node;

if (!head)
return;

for_each_node(node) {
if (ns == rcu_access_pointer(head->current_path[node]))
rcu_assign_pointer(head->current_path[node], NULL);
}
}

static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
{
int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
struct nvme_ns *found = NULL, *fallback = NULL, *ns;

list_for_each_entry_rcu(ns, &head->list, siblings) {
if (ns->ctrl->state != NVME_CTRL_LIVE ||
test_bit(NVME_NS_ANA_PENDING, &ns->flags))
continue;

distance = node_distance(node, dev_to_node(ns->ctrl->dev));

switch (ns->ana_state) {
case NVME_ANA_OPTIMIZED:
rcu_assign_pointer(head->current_path, ns);
return ns;
if (distance < found_distance) {
found_distance = distance;
found = ns;
}
break;
case NVME_ANA_NONOPTIMIZED:
fallback = ns;
if (distance < fallback_distance) {
fallback_distance = distance;
fallback = ns;
}
break;
default:
break;
}
}

if (fallback)
rcu_assign_pointer(head->current_path, fallback);
return fallback;
if (!found)
found = fallback;
if (found)
rcu_assign_pointer(head->current_path[node], found);
return found;
}

static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
Expand All @@ -143,10 +176,12 @@ static inline bool nvme_path_is_optimized(struct nvme_ns *ns)

inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
{
struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu);
int node = numa_node_id();
struct nvme_ns *ns;

ns = srcu_dereference(head->current_path[node], &head->srcu);
if (unlikely(!ns || !nvme_path_is_optimized(ns)))
ns = __nvme_find_path(head);
ns = __nvme_find_path(head, node);
return ns;
}

Expand Down Expand Up @@ -193,7 +228,7 @@ static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc)
int srcu_idx;

srcu_idx = srcu_read_lock(&head->srcu);
ns = srcu_dereference(head->current_path, &head->srcu);
ns = srcu_dereference(head->current_path[numa_node_id()], &head->srcu);
if (likely(ns && nvme_path_is_optimized(ns)))
found = ns->queue->poll_fn(q, qc);
srcu_read_unlock(&head->srcu, srcu_idx);
Expand Down
Loading

0 comments on commit 4f5735f

Please sign in to comment.