Skip to content

Commit

Permalink
Merge tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:

 - NVMe pull request from Keith that address deadlocks, double resets,
   memory leaks, and other regression.

 - Fixup elv_support_iosched() for bio based devices (Damien)

 - Fixup for the ahci PCS quirk (Dan)

 - Socket O_NONBLOCK handling fix for io_uring (me)

 - Timeout sequence io_uring fixes (yangerkun)

 - MD warning fix for parameter default_layout (Song)

 - blkcg activation fixes (Tejun)

 - blk-rq-qos node deletion fix (Tejun)

* tag 'for-linus-2019-10-18' of git://git.kernel.dk/linux-block:
  nvme-pci: Set the prp2 correctly when using more than 4k page
  io_uring: fix logic error in io_timeout
  io_uring: fix up O_NONBLOCK handling for sockets
  md/raid0: fix warning message for parameter default_layout
  libata/ahci: Fix PCS quirk application
  blk-rq-qos: fix first node deletion of rq_qos_del()
  blkcg: Fix multiple bugs in blkcg_activate_policy()
  io_uring: consider the overflow of sequence for timeout req
  nvme-tcp: fix possible leakage during error flow
  nvmet-loop: fix possible leakage during error flow
  block: Fix elv_support_iosched()
  nvme-tcp: Initialize sk->sk_ll_usec only with NET_RX_BUSY_POLL
  nvme: Wait for reset state when required
  nvme: Prevent resets during paused controller state
  nvme: Restart request timers in resetting state
  nvme: Remove ADMIN_ONLY state
  nvme-pci: Free tagset if no IO queues
  nvme: retain split access workaround for capability reads
  nvme: fix possible deadlock when nvme_update_formats fails
  • Loading branch information
Linus Torvalds committed Oct 19, 2019
2 parents dfdcff3 + b55f009 commit d418d07
Show file tree
Hide file tree
Showing 13 changed files with 266 additions and 117 deletions.
69 changes: 51 additions & 18 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -1362,57 +1362,90 @@ int blkcg_activate_policy(struct request_queue *q,
const struct blkcg_policy *pol)
{
struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg;
struct blkcg_gq *blkg, *pinned_blkg = NULL;
int ret;

if (blkcg_policy_enabled(q, pol))
return 0;

if (queue_is_mq(q))
blk_mq_freeze_queue(q);
pd_prealloc:
if (!pd_prealloc) {
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q, &blkcg_root);
if (!pd_prealloc) {
ret = -ENOMEM;
goto out_bypass_end;
}
}

retry:
spin_lock_irq(&q->queue_lock);

/* blkg_list is pushed at the head, reverse walk to init parents first */
/* blkg_list is pushed at the head, reverse walk to allocate parents first */
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;

if (blkg->pd[pol->plid])
continue;

pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q, &blkcg_root);
if (!pd)
swap(pd, pd_prealloc);
/* If prealloc matches, use it; otherwise try GFP_NOWAIT */
if (blkg == pinned_blkg) {
pd = pd_prealloc;
pd_prealloc = NULL;
} else {
pd = pol->pd_alloc_fn(GFP_NOWAIT | __GFP_NOWARN, q,
blkg->blkcg);
}

if (!pd) {
/*
* GFP_NOWAIT failed. Free the existing one and
* prealloc for @blkg w/ GFP_KERNEL.
*/
if (pinned_blkg)
blkg_put(pinned_blkg);
blkg_get(blkg);
pinned_blkg = blkg;

spin_unlock_irq(&q->queue_lock);
goto pd_prealloc;

if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
pd_prealloc = pol->pd_alloc_fn(GFP_KERNEL, q,
blkg->blkcg);
if (pd_prealloc)
goto retry;
else
goto enomem;
}

blkg->pd[pol->plid] = pd;
pd->blkg = blkg;
pd->plid = pol->plid;
if (pol->pd_init_fn)
pol->pd_init_fn(pd);
}

/* all allocated, init in the same order */
if (pol->pd_init_fn)
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node)
pol->pd_init_fn(blkg->pd[pol->plid]);

__set_bit(pol->plid, q->blkcg_pols);
ret = 0;

spin_unlock_irq(&q->queue_lock);
out_bypass_end:
out:
if (queue_is_mq(q))
blk_mq_unfreeze_queue(q);
if (pinned_blkg)
blkg_put(pinned_blkg);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;

enomem:
/* alloc failed, nothing's initialized yet, free everything */
spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
if (blkg->pd[pol->plid]) {
pol->pd_free_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
}
}
spin_unlock_irq(&q->queue_lock);
ret = -ENOMEM;
goto out;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);

Expand Down
13 changes: 5 additions & 8 deletions block/blk-rq-qos.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)

static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
{
struct rq_qos *cur, *prev = NULL;
for (cur = q->rq_qos; cur; cur = cur->next) {
if (cur == rqos) {
if (prev)
prev->next = rqos->next;
else
q->rq_qos = cur;
struct rq_qos **cur;

for (cur = &q->rq_qos; *cur; cur = &(*cur)->next) {
if (*cur == rqos) {
*cur = rqos->next;
break;
}
prev = cur;
}

blk_mq_debugfs_unregister_rqos(rqos);
Expand Down
3 changes: 2 additions & 1 deletion block/elevator.c
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,8 @@ int elevator_switch_mq(struct request_queue *q,

static inline bool elv_support_iosched(struct request_queue *q)
{
if (q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED))
if (!q->mq_ops ||
(q->tag_set && (q->tag_set->flags & BLK_MQ_F_NO_SCHED)))
return false;
return true;
}
Expand Down
4 changes: 3 additions & 1 deletion drivers/ata/ahci.c
Original file line number Diff line number Diff line change
Expand Up @@ -1600,7 +1600,9 @@ static void ahci_intel_pcs_quirk(struct pci_dev *pdev, struct ahci_host_priv *hp
*/
if (!id || id->vendor != PCI_VENDOR_ID_INTEL)
return;
if (((enum board_ids) id->driver_data) < board_ahci_pcs7)

/* Skip applying the quirk on Denverton and beyond */
if (((enum board_ids) id->driver_data) >= board_ahci_pcs7)
return;

/*
Expand Down
2 changes: 1 addition & 1 deletion drivers/md/raid0.c
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid.default_layout to 1 or 2\n");
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -ENOTSUPP;
goto abort;
}
Expand Down
94 changes: 68 additions & 26 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,10 +116,26 @@ static void nvme_queue_scan(struct nvme_ctrl *ctrl)
/*
* Only new queue scan work when admin and IO queues are both alive
*/
if (ctrl->state == NVME_CTRL_LIVE)
if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}

/*
* Use this function to proceed with scheduling reset_work for a controller
* that had previously been set to the resetting state. This is intended for
* code paths that can't be interrupted by other reset attempts. A hot removal
* may prevent this from succeeding.
*/
int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
{
if (ctrl->state != NVME_CTRL_RESETTING)
return -EBUSY;
if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
return -EBUSY;
return 0;
}
EXPORT_SYMBOL_GPL(nvme_try_sched_reset);

int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
{
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
Expand All @@ -137,8 +153,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
ret = nvme_reset_ctrl(ctrl);
if (!ret) {
flush_work(&ctrl->reset_work);
if (ctrl->state != NVME_CTRL_LIVE &&
ctrl->state != NVME_CTRL_ADMIN_ONLY)
if (ctrl->state != NVME_CTRL_LIVE)
ret = -ENETRESET;
}

Expand Down Expand Up @@ -315,15 +330,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,

old_state = ctrl->state;
switch (new_state) {
case NVME_CTRL_ADMIN_ONLY:
switch (old_state) {
case NVME_CTRL_CONNECTING:
changed = true;
/* FALLTHRU */
default:
break;
}
break;
case NVME_CTRL_LIVE:
switch (old_state) {
case NVME_CTRL_NEW:
Expand All @@ -339,7 +345,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
switch (old_state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
changed = true;
/* FALLTHRU */
default:
Expand All @@ -359,7 +364,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
case NVME_CTRL_DELETING:
switch (old_state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
changed = true;
Expand All @@ -381,8 +385,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}

if (changed)
if (changed) {
ctrl->state = new_state;
wake_up_all(&ctrl->state_wq);
}

spin_unlock_irqrestore(&ctrl->lock, flags);
if (changed && ctrl->state == NVME_CTRL_LIVE)
Expand All @@ -391,6 +397,39 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
}
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);

/*
* Returns true for sink states that can't ever transition back to live.
*/
static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
{
switch (ctrl->state) {
case NVME_CTRL_NEW:
case NVME_CTRL_LIVE:
case NVME_CTRL_RESETTING:
case NVME_CTRL_CONNECTING:
return false;
case NVME_CTRL_DELETING:
case NVME_CTRL_DEAD:
return true;
default:
WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
return true;
}
}

/*
* Waits for the controller state to be resetting, or returns false if it is
* not possible to ever transition to that state.
*/
bool nvme_wait_reset(struct nvme_ctrl *ctrl)
{
wait_event(ctrl->state_wq,
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
nvme_state_terminal(ctrl));
return ctrl->state == NVME_CTRL_RESETTING;
}
EXPORT_SYMBOL_GPL(nvme_wait_reset);

static void nvme_free_ns_head(struct kref *ref)
{
struct nvme_ns_head *head =
Expand Down Expand Up @@ -1306,8 +1345,6 @@ static void nvme_update_formats(struct nvme_ctrl *ctrl)
if (ns->disk && nvme_revalidate_disk(ns->disk))
nvme_set_queue_dying(ns);
up_read(&ctrl->namespaces_rwsem);

nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
}

static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
Expand All @@ -1323,6 +1360,7 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
Expand Down Expand Up @@ -2874,7 +2912,6 @@ static int nvme_dev_open(struct inode *inode, struct file *file)

switch (ctrl->state) {
case NVME_CTRL_LIVE:
case NVME_CTRL_ADMIN_ONLY:
break;
default:
return -EWOULDBLOCK;
Expand Down Expand Up @@ -3168,7 +3205,6 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
static const char *const state_name[] = {
[NVME_CTRL_NEW] = "new",
[NVME_CTRL_LIVE] = "live",
[NVME_CTRL_ADMIN_ONLY] = "only-admin",
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
Expand Down Expand Up @@ -3679,11 +3715,10 @@ static void nvme_scan_work(struct work_struct *work)
struct nvme_id_ctrl *id;
unsigned nn;

if (ctrl->state != NVME_CTRL_LIVE)
/* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
return;

WARN_ON_ONCE(!ctrl->tagset);

if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
dev_info(ctrl->device, "rescanning namespaces.\n");
nvme_clear_changed_ns_log(ctrl);
Expand Down Expand Up @@ -3844,13 +3879,13 @@ static void nvme_fw_act_work(struct work_struct *work)
if (time_after(jiffies, fw_act_timeout)) {
dev_warn(ctrl->device,
"Fw activation timeout, reset controller\n");
nvme_reset_ctrl(ctrl);
break;
nvme_try_sched_reset(ctrl);
return;
}
msleep(100);
}

if (ctrl->state != NVME_CTRL_LIVE)
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
return;

nvme_start_queues(ctrl);
Expand All @@ -3870,7 +3905,13 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
nvme_queue_scan(ctrl);
break;
case NVME_AER_NOTICE_FW_ACT_STARTING:
queue_work(nvme_wq, &ctrl->fw_act_work);
/*
* We are (ab)using the RESETTING state to prevent subsequent
* recovery actions from interfering with the controller's
* firmware activation.
*/
if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
queue_work(nvme_wq, &ctrl->fw_act_work);
break;
#ifdef CONFIG_NVME_MULTIPATH
case NVME_AER_NOTICE_ANA:
Expand Down Expand Up @@ -3993,6 +4034,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
init_waitqueue_head(&ctrl->state_wq);

INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
Expand Down
3 changes: 1 addition & 2 deletions drivers/nvme/host/fabrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,7 @@ bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
bool queue_live)
{
if (likely(ctrl->state == NVME_CTRL_LIVE ||
ctrl->state == NVME_CTRL_ADMIN_ONLY))
if (likely(ctrl->state == NVME_CTRL_LIVE))
return true;
return __nvmf_check_ready(ctrl, rq, queue_live);
}
Expand Down
Loading

0 comments on commit d418d07

Please sign in to comment.