Skip to content

Commit

Permalink
Merge tag 'for-linus-2019-10-03' of git://git.kernel.dk/linux-block
Browse files Browse the repository at this point in the history
Pull block fixes from Jens Axboe:

 - Mandate timespec64 for the io_uring timeout ABI (Arnd)

 - Set of NVMe changes via Sagi:
     - controller removal race fix from Balbir
     - quirk additions from Gabriel and Jian-Hong
     - nvme-pci power state save fix from Mario
     - Add 64bit user commands (for 64bit registers) from Marta
     - nvme-rdma/nvme-tcp fixes from Max, Mark and Me
     - Minor cleanups and nits from James, Dan and John

 - Two s390 dasd fixes (Jan, Stefan)

 - Have loop change block size in DIO mode (Martijn)

 - paride pg header ifdef guard (Masahiro)

 - Two blk-mq queue scheduler tweaks, fixing an ordering issue on zoned
   devices and suboptimal performance on others (Ming)

* tag 'for-linus-2019-10-03' of git://git.kernel.dk/linux-block: (22 commits)
  block: sed-opal: fix sparse warning: convert __be64 data
  block: sed-opal: fix sparse warning: obsolete array init.
  block: pg: add header include guard
  Revert "s390/dasd: Add discard support for ESE volumes"
  s390/dasd: Fix error handling during online processing
  io_uring: use __kernel_timespec in timeout ABI
  loop: change queue block size to match when using DIO
  blk-mq: apply normal plugging for HDD
  blk-mq: honor IO scheduler for multiqueue devices
  nvme-rdma: fix possible use-after-free in connect timeout
  nvme: Move ctrl sqsize to generic space
  nvme: Add ctrl attributes for queue_count and sqsize
  nvme: allow 64-bit results in passthru commands
  nvme: Add quirk for Kingston NVME SSD running FW E8FK11.T
  nvmet-tcp: remove superflous check on request sgl
  Added QUIRKs for ADATA XPG SX8200 Pro 512GB
  nvme-rdma: Fix max_hw_sectors calculation
  nvme: fix an error code in nvme_init_subsystem()
  nvme-pci: Save PCI state before putting drive into deepest state
  nvme-tcp: fix wrong stop condition in io_work
  ...
  • Loading branch information
Linus Torvalds committed Oct 4, 2019
2 parents cc3a7bf + a9eb49c commit c4bd70e
Show file tree
Hide file tree
Showing 14 changed files with 218 additions and 132 deletions.
12 changes: 9 additions & 3 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -1992,10 +1992,14 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
/* bypass scheduler for flush rq */
blk_insert_flush(rq);
blk_mq_run_hw_queue(data.hctx, true);
} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs)) {
} else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
!blk_queue_nonrot(q))) {
/*
* Use plugging if we have a ->commit_rqs() hook as well, as
* we know the driver uses bd->last in a smart fashion.
*
* Use normal plugging if this disk is slow HDD, as sequential
* IO may benefit a lot from plug merging.
*/
unsigned int request_count = plug->rq_count;
struct request *last = NULL;
Expand All @@ -2012,6 +2016,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
}

blk_add_rq_to_plug(plug, rq);
} else if (q->elevator) {
blk_mq_sched_insert_request(rq, false, true, true);
} else if (plug && !blk_queue_nomerges(q)) {
/*
* We do limited plugging. If the bio can be merged, do that.
Expand All @@ -2035,8 +2041,8 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
&cookie);
}
} else if ((q->nr_hw_queues > 1 && is_sync) || (!q->elevator &&
!data.hctx->dispatch_busy)) {
} else if ((q->nr_hw_queues > 1 && is_sync) ||
!data.hctx->dispatch_busy) {
blk_mq_try_issue_directly(data.hctx, rq, &cookie);
} else {
blk_mq_sched_insert_request(rq, false, true, true);
Expand Down
6 changes: 3 additions & 3 deletions block/sed-opal.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = {
{ 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x84, 0x01 },

/* tables */
[OPAL_TABLE_TABLE]
[OPAL_TABLE_TABLE] =
{ 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01 },
[OPAL_LOCKINGRANGE_GLOBAL] =
{ 0x00, 0x00, 0x08, 0x02, 0x00, 0x00, 0x00, 0x01 },
Expand Down Expand Up @@ -372,8 +372,8 @@ static void check_geometry(struct opal_dev *dev, const void *data)
{
const struct d0_geometry_features *geo = data;

dev->align = geo->alignment_granularity;
dev->lowest_lba = geo->lowest_aligned_lba;
dev->align = be64_to_cpu(geo->alignment_granularity);
dev->lowest_lba = be64_to_cpu(geo->lowest_aligned_lba);
}

static int execute_step(struct opal_dev *dev,
Expand Down
10 changes: 10 additions & 0 deletions drivers/block/loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -994,6 +994,16 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_write_cache(lo->lo_queue, true, false);

if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
/* In case of direct I/O, match underlying block size */
unsigned short bsize = bdev_logical_block_size(
inode->i_sb->s_bdev);

blk_queue_logical_block_size(lo->lo_queue, bsize);
blk_queue_physical_block_size(lo->lo_queue, bsize);
blk_queue_io_min(lo->lo_queue, bsize);
}

loop_update_rotational(lo);
loop_update_dio(lo);
set_capacity(lo->lo_disk, size);
Expand Down
132 changes: 113 additions & 19 deletions drivers/nvme/host/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)
*/
if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))
return;
revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
/* Forcibly unquiesce queues to avoid blocking dispatch */
blk_mq_unquiesce_queue(ns->queue);
/*
* Revalidate after unblocking dispatchers that may be holding bd_butex
*/
revalidate_disk(ns->disk);
}

static void nvme_queue_scan(struct nvme_ctrl *ctrl)
Expand Down Expand Up @@ -847,7 +850,7 @@ static void *nvme_add_user_metadata(struct bio *bio, void __user *ubuf,
static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
u32 meta_seed, u32 *result, unsigned timeout)
u32 meta_seed, u64 *result, unsigned timeout)
{
bool write = nvme_is_write(cmd);
struct nvme_ns *ns = q->queuedata;
Expand Down Expand Up @@ -888,7 +891,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
else
ret = nvme_req(req)->status;
if (result)
*result = le32_to_cpu(nvme_req(req)->result.u32);
*result = le64_to_cpu(nvme_req(req)->result.u64);
if (meta && !ret && !write) {
if (copy_to_user(meta_buffer, meta, meta_len))
ret = -EFAULT;
Expand Down Expand Up @@ -1335,6 +1338,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_command c;
unsigned timeout = 0;
u32 effects;
u64 result;
int status;

if (!capable(CAP_SYS_ADMIN))
return -EACCES;
if (copy_from_user(&cmd, ucmd, sizeof(cmd)))
return -EFAULT;
if (cmd.flags)
return -EINVAL;

memset(&c, 0, sizeof(c));
c.common.opcode = cmd.opcode;
c.common.flags = cmd.flags;
c.common.nsid = cpu_to_le32(cmd.nsid);
c.common.cdw2[0] = cpu_to_le32(cmd.cdw2);
c.common.cdw2[1] = cpu_to_le32(cmd.cdw3);
c.common.cdw10 = cpu_to_le32(cmd.cdw10);
c.common.cdw11 = cpu_to_le32(cmd.cdw11);
c.common.cdw12 = cpu_to_le32(cmd.cdw12);
c.common.cdw13 = cpu_to_le32(cmd.cdw13);
c.common.cdw14 = cpu_to_le32(cmd.cdw14);
c.common.cdw15 = cpu_to_le32(cmd.cdw15);

if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);

effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
(void __user *)(uintptr_t)cmd.addr, cmd.data_len,
(void __user *)(uintptr_t)cmd.metadata,
cmd.metadata_len, 0, &result, timeout);
nvme_passthru_end(ctrl, effects);

if (status >= 0) {
if (put_user(result, &ucmd->result))
return -EFAULT;
}

return status;
}

static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd64 __user *ucmd)
{
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
unsigned timeout = 0;
u32 effects;
int status;

if (!capable(CAP_SYS_ADMIN))
Expand Down Expand Up @@ -1405,6 +1456,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)
srcu_read_unlock(&head->srcu, idx);
}

static bool is_ctrl_ioctl(unsigned int cmd)
{
if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD)
return true;
if (is_sed_ioctl(cmd))
return true;
return false;
}

static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
void __user *argp,
struct nvme_ns_head *head,
int srcu_idx)
{
struct nvme_ctrl *ctrl = ns->ctrl;
int ret;

nvme_get_ctrl(ns->ctrl);
nvme_put_ns_from_disk(head, srcu_idx);

switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
ret = nvme_user_cmd(ctrl, NULL, argp);
break;
case NVME_IOCTL_ADMIN64_CMD:
ret = nvme_user_cmd64(ctrl, NULL, argp);
break;
default:
ret = sed_ioctl(ctrl->opal_dev, cmd, argp);
break;
}
nvme_put_ctrl(ctrl);
return ret;
}

static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
Expand All @@ -1422,20 +1508,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
* seperately and drop the ns SRCU reference early. This avoids a
* deadlock when deleting namespaces using the passthrough interface.
*/
if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) {
struct nvme_ctrl *ctrl = ns->ctrl;

nvme_get_ctrl(ns->ctrl);
nvme_put_ns_from_disk(head, srcu_idx);

if (cmd == NVME_IOCTL_ADMIN_CMD)
ret = nvme_user_cmd(ctrl, NULL, argp);
else
ret = sed_ioctl(ctrl->opal_dev, cmd, argp);

nvme_put_ctrl(ctrl);
return ret;
}
if (is_ctrl_ioctl(cmd))
return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);

switch (cmd) {
case NVME_IOCTL_ID:
Expand All @@ -1448,6 +1522,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,
case NVME_IOCTL_SUBMIT_IO:
ret = nvme_submit_io(ns, argp);
break;
case NVME_IOCTL_IO64_CMD:
ret = nvme_user_cmd64(ns->ctrl, ns, argp);
break;
default:
if (ns->ndev)
ret = nvme_nvm_ioctl(ns, cmd, arg);
Expand Down Expand Up @@ -2289,6 +2366,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = {
.vid = 0x14a4,
.fr = "22301111",
.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
},
{
/*
* This Kingston E8FK11.T firmware version has no interrupt
* after resume with actions related to suspend to idle
* https://bugzilla.kernel.org/show_bug.cgi?id=204887
*/
.vid = 0x2646,
.fr = "E8FK11.T",
.quirks = NVME_QUIRK_SIMPLE_SUSPEND,
}
};

Expand Down Expand Up @@ -2540,8 +2627,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
list_add_tail(&subsys->entry, &nvme_subsystems);
}

if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
dev_name(ctrl->device))) {
ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj,
dev_name(ctrl->device));
if (ret) {
dev_err(ctrl->device,
"failed to create sysfs link from subsystem.\n");
goto out_put_subsystem;
Expand Down Expand Up @@ -2838,6 +2926,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,
switch (cmd) {
case NVME_IOCTL_ADMIN_CMD:
return nvme_user_cmd(ctrl, NULL, argp);
case NVME_IOCTL_ADMIN64_CMD:
return nvme_user_cmd64(ctrl, NULL, argp);
case NVME_IOCTL_IO_CMD:
return nvme_dev_user_cmd(ctrl, argp);
case NVME_IOCTL_RESET:
Expand Down Expand Up @@ -3045,6 +3135,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);

nvme_show_int_function(cntlid);
nvme_show_int_function(numa_node);
nvme_show_int_function(queue_count);
nvme_show_int_function(sqsize);

static ssize_t nvme_sysfs_delete(struct device *dev,
struct device_attribute *attr, const char *buf,
Expand Down Expand Up @@ -3125,6 +3217,8 @@ static struct attribute *nvme_dev_attrs[] = {
&dev_attr_address.attr,
&dev_attr_state.attr,
&dev_attr_numa_node.attr,
&dev_attr_queue_count.attr,
&dev_attr_sqsize.attr,
NULL
};

Expand Down
2 changes: 1 addition & 1 deletion drivers/nvme/host/nvme.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ struct nvme_ctrl {
u16 oacs;
u16 nssa;
u16 nr_streams;
u16 sqsize;
u32 max_namespaces;
atomic_t abort_limit;
u8 vwc;
Expand Down Expand Up @@ -269,7 +270,6 @@ struct nvme_ctrl {
u16 hmmaxd;

/* Fabrics only */
u16 sqsize;
u32 ioccsz;
u32 iorcsz;
u16 icdoff;
Expand Down
20 changes: 13 additions & 7 deletions drivers/nvme/host/pci.c
Original file line number Diff line number Diff line change
Expand Up @@ -2946,26 +2946,29 @@ static int nvme_suspend(struct device *dev)
if (ret < 0)
goto unfreeze;

/*
* A saved state prevents pci pm from generically controlling the
* device's power. If we're using protocol specific settings, we don't
* want pci interfering.
*/
pci_save_state(pdev);

ret = nvme_set_power_state(ctrl, ctrl->npss);
if (ret < 0)
goto unfreeze;

if (ret) {
/* discard the saved state */
pci_load_saved_state(pdev, NULL);

/*
* Clearing npss forces a controller reset on resume. The
* correct value will be resdicovered then.
*/
nvme_dev_disable(ndev, true);
ctrl->npss = 0;
ret = 0;
goto unfreeze;
}
/*
* A saved state prevents pci pm from generically controlling the
* device's power. If we're using protocol specific settings, we don't
* want pci interfering.
*/
pci_save_state(pdev);
unfreeze:
nvme_unfreeze(ctrl);
return ret;
Expand Down Expand Up @@ -3090,6 +3093,9 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
Expand Down
Loading

0 comments on commit c4bd70e

Please sign in to comment.