Skip to content

Commit

Permalink
Merge branch 'nvme-5.9-rc' of git://git.infradead.org/nvme into block…
Browse files Browse the repository at this point in the history
…-5.9

Pull NVMe fixes from Sagi:

"- instance leak and io boundary fixes from Keith
 - fc locking fix from Christophe
 - various tcp/rdma reset during traffic fixes from Me
 - pci use-after-free fix from Tong
 - tcp target null deref fix from Ziye"

* 'nvme-5.9-rc' of git://git.infradead.org/nvme:
  nvme-pci: cancel nvme device request before disabling
  nvme: only use power of two io boundaries
  nvme: fix controller instance leak
  nvmet-fc: Fix a missed _irqsave version of spin_lock in 'nvmet_fc_fod_op_done()'
  nvme: Fix NULL dereference for pci nvme controllers
  nvme-rdma: fix reset hang if controller died in the middle of a reset
  nvme-rdma: fix timeout handler
  nvme-rdma: serialize controller teardown sequences
  nvme-tcp: fix reset hang if controller died in the middle of a reset
  nvme-tcp: fix timeout handler
  nvme-tcp: serialize controller teardown sequences
  nvme: have nvme_wait_freeze_timeout return if it timed out
  nvme-fabrics: don't check state NVME_CTRL_NEW for request acceptance
  nvmet-tcp: Fix NULL dereference when a connect data comes in h2cdata pdu
  • Loading branch information
Jens Axboe committed Aug 29, 2020
2 parents a433d72 + 7ad92f6 commit 5d220bc
Show file tree
Hide file tree
Showing 8 changed files with 167 additions and 58 deletions.
56 changes: 45 additions & 11 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -2026,13 +2026,49 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_unfreeze_queue(disk->queue);
}

static inline bool nvme_first_scan(struct gendisk *disk)
{
/* nvme_alloc_ns() scans the disk prior to adding it */
return !(disk->flags & GENHD_FL_UP);
}

static void nvme_set_chunk_sectors(struct nvme_ns *ns, struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
u32 iob;

if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors;
else
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

if (!iob)
return;

if (!is_power_of_2(iob)) {
if (nvme_first_scan(ns->disk))
pr_warn("%s: ignoring unaligned IO boundary:%u\n",
ns->disk->disk_name, iob);
return;
}

if (blk_queue_is_zoned(ns->disk->queue)) {
if (nvme_first_scan(ns->disk))
pr_warn("%s: ignoring zoned namespace IO boundary\n",
ns->disk->disk_name);
return;
}

blk_queue_chunk_sectors(ns->queue, iob);
}

static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
unsigned lbaf = id->flbas & NVME_NS_FLBAS_LBA_MASK;
struct nvme_ns *ns = disk->private_data;
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
u32 iob;

/*
* If identify namespace failed, use default 512 byte block size so
Expand Down Expand Up @@ -2060,12 +2096,6 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
return -ENODEV;
}

if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
is_power_of_2(ctrl->max_hw_sectors))
iob = ctrl->max_hw_sectors;
else
iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));

ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[lbaf].ms);
/* the PI implementation requires metadata equal t10 pi tuple size */
Expand Down Expand Up @@ -2097,8 +2127,7 @@ static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
}
}

if (iob && !blk_queue_is_zoned(ns->queue))
blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
nvme_set_chunk_sectors(ns, id);
nvme_update_disk_info(disk, ns, id);
#ifdef CONFIG_NVME_MULTIPATH
if (ns->head->disk) {
Expand Down Expand Up @@ -3676,6 +3705,10 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj,
return 0;
if (a == &dev_attr_hostid.attr && !ctrl->opts)
return 0;
if (a == &dev_attr_ctrl_loss_tmo.attr && !ctrl->opts)
return 0;
if (a == &dev_attr_reconnect_delay.attr && !ctrl->opts)
return 0;

return a->mode;
}
Expand Down Expand Up @@ -4390,7 +4423,7 @@ static void nvme_free_ctrl(struct device *dev)
struct nvme_subsystem *subsys = ctrl->subsys;
struct nvme_cel *cel, *next;

if (subsys && ctrl->instance != subsys->instance)
if (!subsys || ctrl->instance != subsys->instance)
ida_simple_remove(&nvme_instance_ida, ctrl->instance);

list_for_each_entry_safe(cel, next, &ctrl->cels, entry) {
Expand Down Expand Up @@ -4534,7 +4567,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_unfreeze);

void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
{
struct nvme_ns *ns;

Expand All @@ -4545,6 +4578,7 @@ void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)
break;
}
up_read(&ctrl->namespaces_rwsem);
return timeout;
}
EXPORT_SYMBOL_GPL(nvme_wait_freeze_timeout);

Expand Down
1 change: 0 additions & 1 deletion drivers/nvme/host/fabrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,6 @@ bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
* which is require to set the queue live in the appropinquate states.
*/
switch (ctrl->state) {
case NVME_CTRL_NEW:
case NVME_CTRL_CONNECTING:
if (nvme_is_fabrics(req->cmd) &&
req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
Expand Down
2 changes: 1 addition & 1 deletion drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -605,7 +605,7 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl);
void nvme_sync_queues(struct nvme_ctrl *ctrl);
void nvme_unfreeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze(struct nvme_ctrl *ctrl);
void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
void nvme_start_freeze(struct nvme_ctrl *ctrl);

#define NVME_QID_ANY -1
Expand Down
4 changes: 2 additions & 2 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1249,8 +1249,8 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn_ratelimited(dev->ctrl.device,
"I/O %d QID %d timeout, disable controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, true);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
nvme_dev_disable(dev, true);
return BLK_EH_DONE;
case NVME_CTRL_RESETTING:
return BLK_EH_RESET_TIMER;
Expand All @@ -1267,10 +1267,10 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
dev_warn(dev->ctrl.device,
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_req(req)->flags |= NVME_REQ_CANCELLED;
nvme_dev_disable(dev, false);
nvme_reset_ctrl(&dev->ctrl);

nvme_req(req)->flags |= NVME_REQ_CANCELLED;
return BLK_EH_DONE;
}

Expand Down
68 changes: 51 additions & 17 deletions drivers/nvme/host/rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ struct nvme_rdma_ctrl {
struct sockaddr_storage src_addr;

struct nvme_ctrl ctrl;
struct mutex teardown_lock;
bool use_inline_data;
u32 io_queues[HCTX_MAX_TYPES];
};
Expand Down Expand Up @@ -975,14 +976,25 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)

if (!new) {
nvme_start_queues(&ctrl->ctrl);
nvme_wait_freeze(&ctrl->ctrl);
if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
/*
* If we timed out waiting for freeze we are likely to
* be stuck. Fail the controller initialization just
* to be safe.
*/
ret = -ENODEV;
goto out_wait_freeze_timed_out;
}
blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
ctrl->ctrl.queue_count - 1);
nvme_unfreeze(&ctrl->ctrl);
}

return 0;

out_wait_freeze_timed_out:
nvme_stop_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
out_cleanup_connect_q:
if (new)
blk_cleanup_queue(ctrl->ctrl.connect_q);
Expand All @@ -997,6 +1009,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
mutex_lock(&ctrl->teardown_lock);
blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_stop_queue(&ctrl->queues[0]);
if (ctrl->ctrl.admin_tagset) {
Expand All @@ -1007,11 +1020,13 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
if (remove)
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
nvme_rdma_destroy_admin_queue(ctrl, remove);
mutex_unlock(&ctrl->teardown_lock);
}

static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
mutex_lock(&ctrl->teardown_lock);
if (ctrl->ctrl.queue_count > 1) {
nvme_start_freeze(&ctrl->ctrl);
nvme_stop_queues(&ctrl->ctrl);
Expand All @@ -1025,6 +1040,7 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
nvme_start_queues(&ctrl->ctrl);
nvme_rdma_destroy_io_queues(ctrl, remove);
}
mutex_unlock(&ctrl->teardown_lock);
}

static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
Expand Down Expand Up @@ -1180,6 +1196,7 @@ static void nvme_rdma_error_recovery(struct nvme_rdma_ctrl *ctrl)
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
return;

dev_warn(ctrl->ctrl.device, "starting error recovery\n");
queue_work(nvme_reset_wq, &ctrl->err_work);
}

Expand Down Expand Up @@ -1946,6 +1963,22 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id,
return 0;
}

static void nvme_rdma_complete_timed_out(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct nvme_rdma_ctrl *ctrl = queue->ctrl;

/* fence other contexts that may complete the command */
mutex_lock(&ctrl->teardown_lock);
nvme_rdma_stop_queue(queue);
if (!blk_mq_request_completed(rq)) {
nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD;
blk_mq_complete_request(rq);
}
mutex_unlock(&ctrl->teardown_lock);
}

static enum blk_eh_timer_return
nvme_rdma_timeout(struct request *rq, bool reserved)
{
Expand All @@ -1956,29 +1989,29 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
dev_warn(ctrl->ctrl.device, "I/O %d QID %d timeout\n",
rq->tag, nvme_rdma_queue_idx(queue));

/*
* Restart the timer if a controller reset is already scheduled. Any
* timed out commands would be handled before entering the connecting
* state.
*/
if (ctrl->ctrl.state == NVME_CTRL_RESETTING)
return BLK_EH_RESET_TIMER;

if (ctrl->ctrl.state != NVME_CTRL_LIVE) {
/*
* Teardown immediately if controller times out while starting
* or we are already started error recovery. all outstanding
* requests are completed on shutdown, so we return BLK_EH_DONE.
* If we are resetting, connecting or deleting we should
* complete immediately because we may block controller
* teardown or setup sequence
* - ctrl disable/shutdown fabrics requests
* - connect requests
* - initialization admin requests
* - I/O requests that entered after unquiescing and
* the controller stopped responding
*
* All other requests should be cancelled by the error
* recovery work, so it's fine that we fail it here.
*/
flush_work(&ctrl->err_work);
nvme_rdma_teardown_io_queues(ctrl, false);
nvme_rdma_teardown_admin_queue(ctrl, false);
nvme_rdma_complete_timed_out(rq);
return BLK_EH_DONE;
}

dev_warn(ctrl->ctrl.device, "starting error recovery\n");
/*
* LIVE state should trigger the normal error recovery which will
* handle completing this request.
*/
nvme_rdma_error_recovery(ctrl);

return BLK_EH_RESET_TIMER;
}

Expand Down Expand Up @@ -2278,6 +2311,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
return ERR_PTR(-ENOMEM);
ctrl->ctrl.opts = opts;
INIT_LIST_HEAD(&ctrl->list);
mutex_init(&ctrl->teardown_lock);

if (!(opts->mask & NVMF_OPT_TRSVCID)) {
opts->trsvcid =
Expand Down
Loading

0 comments on commit 5d220bc

Please sign in to comment.