From 4e893ca8117022de68ce1b61c0309e3d17bb8a25 Mon Sep 17 00:00:00 2001 From: Stuart Hayes Date: Wed, 17 Jul 2024 13:55:50 -0500 Subject: [PATCH 01/13] nvme_core: scan namespaces asynchronously Use async function calls to make namespace scanning happen in parallel. Without the patch, NVME namespaces are scanned serially, so it can take a long time for all of a controller's namespaces to become available, especially with a slower (TCP) interface with large number of namespaces. It is not uncommon to have large numbers (hundreds or thousands) of namespaces on nvme-of with storage servers. The time it took for all namespaces to show up after connecting (via TCP) to a controller with 1002 namespaces was measured on one system: network latency without patch with patch 0 6s 1s 50ms 210s 10s 100ms 417s 18s Measurements taken on another system show the effect of the patch on the time nvme_scan_work() took to complete, when connecting to a linux nvme-of target with varying numbers of namespaces, on a network of 400us. namespaces without patch with patch 1 16ms 14ms 2 24ms 16ms 4 49ms 22ms 8 101ms 33ms 16 207ms 56ms 100 1.4s 0.6s 1000 12.9s 2.0s On the same system, connecting to a local PCIe NVMe drive (a Samsung PM1733) instead of a network target: namespaces without patch with patch 1 13ms 12ms 2 41ms 13ms Signed-off-by: Stuart Hayes Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 053d5b4909cda..b132887429c0b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4,6 +4,7 @@ * Copyright (c) 2011-2014, Intel Corporation. */ +#include #include #include #include @@ -4040,6 +4041,35 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid) } } +/** + * struct async_scan_info - keeps track of controller & NSIDs to scan + * @ctrl: Controller on which namespaces are being scanned + * @next_nsid: Index of next NSID to scan in ns_list + * @ns_list: Pointer to list of NSIDs to scan + * + * Note: There is a single async_scan_info structure shared by all instances + * of nvme_scan_ns_async() scanning a given controller, so the atomic + * operations on next_nsid are critical to ensure each instance scans a unique + * NSID. + */ +struct async_scan_info { + struct nvme_ctrl *ctrl; + atomic_t next_nsid; + __le32 *ns_list; +}; + +static void nvme_scan_ns_async(void *data, async_cookie_t cookie) +{ + struct async_scan_info *scan_info = data; + int idx; + u32 nsid; + + idx = (u32)atomic_fetch_inc(&scan_info->next_nsid); + nsid = le32_to_cpu(scan_info->ns_list[idx]); + + nvme_scan_ns(scan_info->ctrl, nsid); +} + static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, unsigned nsid) { @@ -4066,11 +4096,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) __le32 *ns_list; u32 prev = 0; int ret = 0, i; + ASYNC_DOMAIN(domain); + struct async_scan_info scan_info; ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; + scan_info.ctrl = ctrl; + scan_info.ns_list = ns_list; for (;;) { struct nvme_command cmd = { .identify.opcode = nvme_admin_identify, @@ -4086,19 +4120,23 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) goto free; } + atomic_set(&scan_info.next_nsid, 0); for (i = 0; i < nr_entries; i++) { u32 nsid = le32_to_cpu(ns_list[i]); if (!nsid) /* end of the list? */ goto out; - nvme_scan_ns(ctrl, nsid); + async_schedule_domain(nvme_scan_ns_async, &scan_info, + &domain); while (++prev < nsid) nvme_ns_remove_by_nsid(ctrl, prev); } + async_synchronize_full_domain(&domain); } out: nvme_remove_invalid_namespaces(ctrl, prev); free: + async_synchronize_full_domain(&domain); kfree(ns_list); return ret; } From 79559c75332458985ab8a21f11b08bf7c9b833b0 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:18 +0200 Subject: [PATCH 02/13] nvme-keyring: restrict match length for version '1' identifiers TP8018 introduced a new TLS PSK identifier version (version 1), which appended a PSK hash value to the existing identifier (cf NVMe TCP specification v1.1, section 3.6.1.3 'TLS PSK and PSK Identity Derivation'). An original (version 0) identifier has the form: NVMe0 and a version 1 identifier has the form: NVMe1 This patch modifies the lookup algorthm to compare only the first part of the identifier (excluding the hash value) to handle both version 0 and version 1 identifiers. And the spec declares 'version 0' identifiers obsolete, so the lookup algorithm is modified to prever v1 identifiers. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/common/keyring.c | 36 +++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index 6f7e7a8fa5ae4..05e89307c8aa3 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -36,14 +36,12 @@ static bool nvme_tls_psk_match(const struct key *key, pr_debug("%s: no key description\n", __func__); return false; } - match_len = strlen(key->description); - pr_debug("%s: id %s len %zd\n", __func__, key->description, match_len); - if (!match_data->raw_data) { pr_debug("%s: no match data\n", __func__); return false; } match_id = match_data->raw_data; + match_len = strlen(match_id); pr_debug("%s: match '%s' '%s' len %zd\n", __func__, match_id, key->description, match_len); return !memcmp(key->description, match_id, match_len); @@ -71,7 +69,7 @@ static struct key_type nvme_tls_psk_key_type = { static struct key *nvme_tls_psk_lookup(struct key *keyring, const char *hostnqn, const char *subnqn, - int hmac, bool generated) + u8 hmac, u8 psk_ver, bool generated) { char *identity; size_t identity_len = (NVMF_NQN_SIZE) * 2 + 11; @@ -82,8 +80,8 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring, if (!identity) return ERR_PTR(-ENOMEM); - snprintf(identity, identity_len, "NVMe0%c%02d %s %s", - generated ? 'G' : 'R', hmac, hostnqn, subnqn); + snprintf(identity, identity_len, "NVMe%u%c%02u %s %s", + psk_ver, generated ? 'G' : 'R', hmac, hostnqn, subnqn); if (!keyring) keyring = nvme_keyring; @@ -107,21 +105,38 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring, /* * NVMe PSK priority list * - * 'Retained' PSKs (ie 'generated == false') - * should be preferred to 'generated' PSKs, - * and SHA-384 should be preferred to SHA-256. + * 'Retained' PSKs (ie 'generated == false') should be preferred to 'generated' + * PSKs, PSKs with hash (psk_ver 1) should be preferred to PSKs without hash + * (psk_ver 0), and SHA-384 should be preferred to SHA-256. */ static struct nvme_tls_psk_priority_list { bool generated; + u8 psk_ver; enum nvme_tcp_tls_cipher cipher; } nvme_tls_psk_prio[] = { { .generated = false, + .psk_ver = 1, + .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, + { .generated = false, + .psk_ver = 1, + .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, + { .generated = false, + .psk_ver = 0, .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, { .generated = false, + .psk_ver = 0, + .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, + { .generated = true, + .psk_ver = 1, + .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, + { .generated = true, + .psk_ver = 1, .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, { .generated = true, + .psk_ver = 0, .cipher = NVME_TCP_TLS_CIPHER_SHA384, }, { .generated = true, + .psk_ver = 0, .cipher = NVME_TCP_TLS_CIPHER_SHA256, }, }; @@ -137,10 +152,11 @@ key_serial_t nvme_tls_psk_default(struct key *keyring, for (prio = 0; prio < ARRAY_SIZE(nvme_tls_psk_prio); prio++) { bool generated = nvme_tls_psk_prio[prio].generated; + u8 ver = nvme_tls_psk_prio[prio].psk_ver; enum nvme_tcp_tls_cipher cipher = nvme_tls_psk_prio[prio].cipher; tls_key = nvme_tls_psk_lookup(keyring, hostnqn, subnqn, - cipher, generated); + cipher, ver, generated); if (!IS_ERR(tls_key)) { tls_key_id = tls_key->serial; key_put(tls_key); From 363895767fbfa05891b0b4d9e06ebde7a10c6a07 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:19 +0200 Subject: [PATCH 03/13] nvme-tcp: sanitize TLS key handling There is a difference between TLS configured (ie the user has provisioned/requested a key) and TLS enabled (ie the connection is encrypted with TLS). This becomes important for secure concatenation, where the initial authentication is run on an unencrypted connection (ie with TLS configured, but not enabled), and then the queue is reset to run over TLS (ie TLS configured _and_ enabled). So to differentiate between those two states store the generated key in opts->tls_key (as we're using the same TLS key for all queues), the key serial of the resulting TLS handshake in ctrl->tls_pskid (to signal that TLS on the admin queue is enabled), and a simple flag for the queues to indicated that TLS has been enabled. Signed-off-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 1 - drivers/nvme/host/nvme.h | 2 +- drivers/nvme/host/sysfs.c | 4 +-- drivers/nvme/host/tcp.c | 53 +++++++++++++++++++++++++++++---------- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b132887429c0b..93aa4c10adb73 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4715,7 +4715,6 @@ static void nvme_free_ctrl(struct device *dev) if (!subsys || ctrl->instance != subsys->instance) ida_free(&nvme_instance_ida, ctrl->instance); - key_put(ctrl->tls_key); nvme_free_cels(ctrl); nvme_mpath_uninit(ctrl); cleanup_srcu_struct(&ctrl->srcu); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f900e44243aef..af22368800e77 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -373,7 +373,7 @@ struct nvme_ctrl { struct nvme_dhchap_key *ctrl_key; u16 transaction; #endif - struct key *tls_key; + key_serial_t tls_pskid; /* Power saving configuration */ u64 ps_max_latency_us; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index ba05faaac562d..72675b59a7a73 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -670,9 +670,9 @@ static ssize_t tls_key_show(struct device *dev, { struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - if (!ctrl->tls_key) + if (!ctrl->tls_pskid) return 0; - return sysfs_emit(buf, "%08x", key_serial(ctrl->tls_key)); + return sysfs_emit(buf, "%08x", ctrl->tls_pskid); } static DEVICE_ATTR_RO(tls_key); #endif diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 9ea6be0b0392d..b40e95bee8495 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -165,6 +165,7 @@ struct nvme_tcp_queue { bool hdr_digest; bool data_digest; + bool tls_enabled; struct ahash_request *rcv_hash; struct ahash_request *snd_hash; __le32 exp_ddgst; @@ -213,7 +214,21 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } -static inline bool nvme_tcp_tls(struct nvme_ctrl *ctrl) +/* + * Check if the queue is TLS encrypted + */ +static inline bool nvme_tcp_queue_tls(struct nvme_tcp_queue *queue) +{ + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) + return 0; + + return queue->tls_enabled; +} + +/* + * Check if TLS is configured for the controller. + */ +static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl) { if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) return 0; @@ -368,7 +383,7 @@ static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue) static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) { - return !nvme_tcp_tls(&queue->ctrl->ctrl) && + return !nvme_tcp_queue_tls(queue) && nvme_tcp_queue_has_pending(queue); } @@ -1427,7 +1442,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) memset(&msg, 0, sizeof(msg)); iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); - if (nvme_tcp_tls(&queue->ctrl->ctrl)) { + if (nvme_tcp_queue_tls(queue)) { msg.msg_control = cbuf; msg.msg_controllen = sizeof(cbuf); } @@ -1439,7 +1454,7 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) goto free_icresp; } ret = -ENOTCONN; - if (nvme_tcp_tls(&queue->ctrl->ctrl)) { + if (nvme_tcp_queue_tls(queue)) { ctype = tls_get_record_type(queue->sock->sk, (struct cmsghdr *)cbuf); if (ctype != TLS_RECORD_TYPE_DATA) { @@ -1587,7 +1602,10 @@ static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) qid, pskid); queue->tls_err = -ENOKEY; } else { - ctrl->ctrl.tls_key = tls_key; + queue->tls_enabled = true; + if (qid == 0) + ctrl->ctrl.tls_pskid = key_serial(tls_key); + key_put(tls_key); queue->tls_err = 0; } @@ -1768,7 +1786,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, } /* If PSKs are configured try to start TLS */ - if (IS_ENABLED(CONFIG_NVME_TCP_TLS) && pskid) { + if (nvme_tcp_tls_configured(nctrl) && pskid) { ret = nvme_tcp_start_tls(nctrl, queue, pskid); if (ret) goto err_init_connect; @@ -1829,6 +1847,8 @@ static void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) mutex_lock(&queue->queue_lock); if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) __nvme_tcp_stop_queue(queue); + /* Stopping the queue will disable TLS */ + queue->tls_enabled = false; mutex_unlock(&queue->queue_lock); } @@ -1925,16 +1945,17 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) int ret; key_serial_t pskid = 0; - if (nvme_tcp_tls(ctrl)) { + if (nvme_tcp_tls_configured(ctrl)) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); - else + else { pskid = nvme_tls_psk_default(ctrl->opts->keyring, ctrl->opts->host->nqn, ctrl->opts->subsysnqn); - if (!pskid) { - dev_err(ctrl->device, "no valid PSK found\n"); - return -ENOKEY; + if (!pskid) { + dev_err(ctrl->device, "no valid PSK found\n"); + return -ENOKEY; + } } } @@ -1957,13 +1978,14 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; - if (nvme_tcp_tls(ctrl) && !ctrl->tls_key) { + if (nvme_tcp_tls_configured(ctrl) && !ctrl->tls_pskid) { dev_err(ctrl->device, "no PSK negotiated\n"); return -ENOKEY; } + for (i = 1; i < ctrl->queue_count; i++) { ret = nvme_tcp_alloc_queue(ctrl, i, - key_serial(ctrl->tls_key)); + ctrl->tls_pskid); if (ret) goto out_free_queues; } @@ -2144,6 +2166,11 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, if (remove) nvme_unquiesce_admin_queue(ctrl); nvme_tcp_destroy_admin_queue(ctrl, remove); + if (ctrl->tls_pskid) { + dev_dbg(ctrl->device, "Wipe negotiated TLS_PSK %08x\n", + ctrl->tls_pskid); + ctrl->tls_pskid = 0; + } } static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, From 5bc46b49c828a6dfaab80b71ecb63fe76a1096d2 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:20 +0200 Subject: [PATCH 04/13] nvme-tcp: check for invalidated or revoked key key_lookup() will always return a key, even if that key is revoked or invalidated. So check for invalid keys before continuing. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/common/keyring.c | 22 ++++++++++++++++++++++ drivers/nvme/host/Kconfig | 1 + drivers/nvme/host/fabrics.c | 2 +- drivers/nvme/host/tcp.c | 2 +- include/linux/nvme-keyring.h | 6 +++++- 5 files changed, 30 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index 05e89307c8aa3..ed5167f942d89 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -20,6 +20,28 @@ key_serial_t nvme_keyring_id(void) } EXPORT_SYMBOL_GPL(nvme_keyring_id); +static bool nvme_tls_psk_revoked(struct key *psk) +{ + return test_bit(KEY_FLAG_REVOKED, &psk->flags) || + test_bit(KEY_FLAG_INVALIDATED, &psk->flags); +} + +struct key *nvme_tls_key_lookup(key_serial_t key_id) +{ + struct key *key = key_lookup(key_id); + + if (IS_ERR(key)) { + pr_err("key id %08x not found\n", key_id); + return key; + } + if (nvme_tls_psk_revoked(key)) { + pr_err("key id %08x revoked\n", key_id); + return ERR_PTR(-EKEYREVOKED); + } + return key; +} +EXPORT_SYMBOL_GPL(nvme_tls_key_lookup); + static void nvme_tls_psk_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index a3caef75aa0a8..883aaab2d83e3 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -109,6 +109,7 @@ config NVME_HOST_AUTH bool "NVMe over Fabrics In-Band Authentication in host side" depends on NVME_CORE select NVME_AUTH + select NVME_KEYRING if NVME_TCP_TLS help This provides support for NVMe over Fabrics In-Band Authentication in host side. diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index f5f545fa01035..432efcbf9e2f5 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -665,7 +665,7 @@ static struct key *nvmf_parse_key(int key_id) return ERR_PTR(-EINVAL); } - key = key_lookup(key_id); + key = nvme_tls_key_lookup(key_id); if (IS_ERR(key)) pr_err("key id %08x not found\n", key_id); else diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b40e95bee8495..89c44413c5939 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1596,7 +1596,7 @@ static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid) goto out_complete; } - tls_key = key_lookup(pskid); + tls_key = nvme_tls_key_lookup(pskid); if (IS_ERR(tls_key)) { dev_warn(ctrl->ctrl.device, "queue %d: Invalid key %x\n", qid, pskid); diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index e10333d78dbbe..19d2b256180fd 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -12,7 +12,7 @@ key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); - +struct key *nvme_tls_key_lookup(key_serial_t key_id); #else static inline key_serial_t nvme_tls_psk_default(struct key *keyring, @@ -24,5 +24,9 @@ static inline key_serial_t nvme_keyring_id(void) { return 0; } +static inline struct key *nvme_tls_key_lookup(key_serial_t key_id) +{ + return ERR_PTR(-ENOTSUPP); +} #endif /* !CONFIG_NVME_KEYRING */ #endif /* _NVME_KEYRING_H */ From c5f2ca52d00de5d0348a758a28b51ddf5e685a89 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:21 +0200 Subject: [PATCH 05/13] nvme: add a newline to the 'tls_key' sysfs attribute Print a newline for easier userspace handling. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 72675b59a7a73..c391ad6c5a88f 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -672,7 +672,7 @@ static ssize_t tls_key_show(struct device *dev, if (!ctrl->tls_pskid) return 0; - return sysfs_emit(buf, "%08x", ctrl->tls_pskid); + return sysfs_emit(buf, "%08x\n", ctrl->tls_pskid); } static DEVICE_ATTR_RO(tls_key); #endif From 1e48b34c9bc79aa36700fccbfdf87e61e4431d2b Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:22 +0200 Subject: [PATCH 06/13] nvme: split off TLS sysfs attributes into a separate group Split off TLS sysfs attributes into a separate group to improve readability and to keep all TLS related handling in one section. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 62 ++++++++++++++++++++++++++------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index c391ad6c5a88f..dc7ceb53147fc 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -664,19 +664,6 @@ static DEVICE_ATTR(dhchap_ctrl_secret, S_IRUGO | S_IWUSR, nvme_ctrl_dhchap_ctrl_secret_show, nvme_ctrl_dhchap_ctrl_secret_store); #endif -#ifdef CONFIG_NVME_TCP_TLS -static ssize_t tls_key_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct nvme_ctrl *ctrl = dev_get_drvdata(dev); - - if (!ctrl->tls_pskid) - return 0; - return sysfs_emit(buf, "%08x\n", ctrl->tls_pskid); -} -static DEVICE_ATTR_RO(tls_key); -#endif - static struct attribute *nvme_dev_attrs[] = { &dev_attr_reset_controller.attr, &dev_attr_rescan_controller.attr, @@ -703,9 +690,6 @@ static struct attribute *nvme_dev_attrs[] = { #ifdef CONFIG_NVME_HOST_AUTH &dev_attr_dhchap_secret.attr, &dev_attr_dhchap_ctrl_secret.attr, -#endif -#ifdef CONFIG_NVME_TCP_TLS - &dev_attr_tls_key.attr, #endif &dev_attr_adm_passthru_err_log_enabled.attr, NULL @@ -737,11 +721,6 @@ static umode_t nvme_dev_attrs_are_visible(struct kobject *kobj, if (a == &dev_attr_dhchap_ctrl_secret.attr && !ctrl->opts) return 0; #endif -#ifdef CONFIG_NVME_TCP_TLS - if (a == &dev_attr_tls_key.attr && - (!ctrl->opts || strcmp(ctrl->opts->transport, "tcp"))) - return 0; -#endif return a->mode; } @@ -752,8 +731,49 @@ const struct attribute_group nvme_dev_attrs_group = { }; EXPORT_SYMBOL_GPL(nvme_dev_attrs_group); +#ifdef CONFIG_NVME_TCP_TLS +static ssize_t tls_key_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (!ctrl->tls_pskid) + return 0; + return sysfs_emit(buf, "%08x\n", ctrl->tls_pskid); +} +static DEVICE_ATTR_RO(tls_key); + +static struct attribute *nvme_tls_attrs[] = { + &dev_attr_tls_key.attr, +}; + +static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, + struct attribute *a, int n) +{ + struct device *dev = container_of(kobj, struct device, kobj); + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + + if (!ctrl->opts || strcmp(ctrl->opts->transport, "tcp")) + return 0; + + if (a == &dev_attr_tls_key.attr && + !ctrl->opts->tls) + return 0; + + return a->mode; +} + +const struct attribute_group nvme_tls_attrs_group = { + .attrs = nvme_tls_attrs, + .is_visible = nvme_tls_attrs_are_visible, +}; +#endif + const struct attribute_group *nvme_dev_attr_groups[] = { &nvme_dev_attrs_group, +#ifdef CONFIG_NVME_TCP_TLS + &nvme_tls_attrs_group, +#endif NULL, }; From f5eb7397471bbc24d63011f8cb2d422ac606085d Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:23 +0200 Subject: [PATCH 07/13] nvme-sysfs: add 'tls_configured_key' sysfs attribute There is a difference between the negotiated TLS key (which is always present for a TLS encrypted connection) and the configured TLS key (which is specified with the --tls_key command line option). To differentate between these two add a new sysfs attribute 'tls_configured_key' to hold the specified on the command line. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index dc7ceb53147fc..2055dad7bc632 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -743,8 +743,19 @@ static ssize_t tls_key_show(struct device *dev, } static DEVICE_ATTR_RO(tls_key); +static ssize_t tls_configured_key_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct key *key = ctrl->opts->tls_key; + + return sysfs_emit(buf, "%08x\n", key_serial(key)); +} +static DEVICE_ATTR_RO(tls_configured_key); + static struct attribute *nvme_tls_attrs[] = { &dev_attr_tls_key.attr, + &dev_attr_tls_configured_key.attr, }; static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, @@ -759,6 +770,9 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, if (a == &dev_attr_tls_key.attr && !ctrl->opts->tls) return 0; + if (a == &dev_attr_tls_configured_key.attr && + !ctrl->opts->tls_key) + return 0; return a->mode; } From 02a3688c53d648e027ca9c27423bf864a189d7c7 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:24 +0200 Subject: [PATCH 08/13] nvme-sysfs: add 'tls_keyring' attribute Add a 'tls_keyring' attribute to display the contents of the --keyring option from the connect string. Adding this attribute allows us to recreate the original connect string from sysfs settings. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/sysfs.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 2055dad7bc632..eb345551d6fe3 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -753,9 +753,20 @@ static ssize_t tls_configured_key_show(struct device *dev, } static DEVICE_ATTR_RO(tls_configured_key); +static ssize_t tls_keyring_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ctrl *ctrl = dev_get_drvdata(dev); + struct key *keyring = ctrl->opts->keyring; + + return sysfs_emit(buf, "%s\n", keyring->description); +} +static DEVICE_ATTR_RO(tls_keyring); + static struct attribute *nvme_tls_attrs[] = { &dev_attr_tls_key.attr, &dev_attr_tls_configured_key.attr, + &dev_attr_tls_keyring.attr, }; static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, @@ -773,6 +784,9 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, if (a == &dev_attr_tls_configured_key.attr && !ctrl->opts->tls_key) return 0; + if (a == &dev_attr_tls_keyring.attr && + !ctrl->opts->keyring) + return 0; return a->mode; } From bb2df18958b4287104c38541cf48c4a60c90e721 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:25 +0200 Subject: [PATCH 09/13] nvmet-auth: allow to clear DH-HMAC-CHAP keys As we can set DH-HMAC-CHAP keys, we should also be able to unset them. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index 8bc3f431c77f6..7897d02c681da 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -25,6 +25,18 @@ int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, unsigned char key_hash; char *dhchap_secret; + if (!strlen(secret)) { + if (set_ctrl) { + kfree(host->dhchap_ctrl_secret); + host->dhchap_ctrl_secret = NULL; + host->dhchap_ctrl_key_hash = 0; + } else { + kfree(host->dhchap_secret); + host->dhchap_secret = NULL; + host->dhchap_key_hash = 0; + } + return 0; + } if (sscanf(secret, "DHHC-1:%hhd:%*s", &key_hash) != 1) return -EINVAL; if (key_hash > 3) { From ff4a0a4088adc6a37293a9cce25bb56ad2f26a16 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 22 Jul 2024 14:02:26 +0200 Subject: [PATCH 10/13] nvme-target: do not check authentication status for admin commands twice nvmet_check_ctrl_status() checks the authentication status, so we don't need to do that prior to calling it. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/admin-cmd.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index f7e1156ac7ecc..d64480f01f4a8 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -1005,8 +1005,6 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) if (nvme_is_fabrics(cmd)) return nvmet_parse_fabrics_admin_cmd(req); - if (unlikely(!nvmet_check_auth_status(req))) - return NVME_SC_AUTH_REQUIRED | NVME_STATUS_DNR; if (nvmet_is_disc_subsys(nvmet_req_subsys(req))) return nvmet_parse_discovery_cmd(req); From 03c3d7c74371a46d967fbf41628874ec04ddda96 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Thu, 15 Aug 2024 22:11:31 +0200 Subject: [PATCH 11/13] nvme-rdma: send cntlid in the RDMA_CM_REQUEST Private Data When sending a RDMA_CM_REQUEST, the NVMe RDMA Transport Specification allows you to populate the cntlid field in the RDMA_CM_REQUEST Private Data. The cntlid is returned by the target on completion of the first RDMA_CM_REQUEST command (which creates the admin queue). The cntlid field can then be populated by the host when the I/O queues are created (using additional RDMA_CM_REQUEST commands), such that the target can perform extra validation for additional RDMA_CM_REQUEST commands. This additional error code and error message is also added, such that nvme_rdma_cm_msg() will display the proper error message if the target fails the RDMA_CM_REQUEST command because of this extra validation. Signed-off-by: Niklas Cassel Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/rdma.c | 2 ++ include/linux/nvme-rdma.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2eb33842f9711..e3520a91147b1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1876,6 +1876,8 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) */ priv.hrqsize = cpu_to_le16(queue->queue_size); priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); + /* cntlid should only be set when creating an I/O queue */ + priv.cntlid = cpu_to_le16(ctrl->ctrl.cntlid); } ret = rdma_connect_locked(queue->cm_id, ¶m); diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index eb2f04d636c89..97c5f00b9aa31 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -25,6 +25,7 @@ enum nvme_rdma_cm_status { NVME_RDMA_CM_NO_RSC = 0x06, NVME_RDMA_CM_INVALID_IRD = 0x07, NVME_RDMA_CM_INVALID_ORD = 0x08, + NVME_RDMA_CM_INVALID_CNTLID = 0x09, }; static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) @@ -46,6 +47,8 @@ static inline const char *nvme_rdma_cm_msg(enum nvme_rdma_cm_status status) return "invalid IRD"; case NVME_RDMA_CM_INVALID_ORD: return "Invalid ORD"; + case NVME_RDMA_CM_INVALID_CNTLID: + return "invalid controller ID"; default: return "unrecognized reason"; } @@ -64,7 +67,8 @@ struct nvme_rdma_cm_req { __le16 qid; __le16 hrqsize; __le16 hsqsize; - u8 rsvd[24]; + __le16 cntlid; + u8 rsvd[22]; }; /** From cead0b8991713bdeb5b947758dd62287fcf8da40 Mon Sep 17 00:00:00 2001 From: Anuj Gupta Date: Mon, 26 Aug 2024 16:09:43 +0530 Subject: [PATCH 12/13] nvme: rename apptag and appmask to lbat and lbatm Rename apptag and appmask to lbat and lbatm so that it matches the field names used in NVMe spec. Signed-off-by: Anuj Gupta Signed-off-by: Kanchan Joshi Suggested-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/ioctl.c | 4 ++-- drivers/nvme/host/rdma.c | 4 ++-- drivers/nvme/target/rdma.c | 4 ++-- include/linux/nvme.h | 8 ++++---- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 93aa4c10adb73..75ab62cb4aa45 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -987,8 +987,8 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->head->lba_shift) - 1); cmnd->rw.reftag = 0; - cmnd->rw.apptag = 0; - cmnd->rw.appmask = 0; + cmnd->rw.lbat = 0; + cmnd->rw.lbatm = 0; if (ns->head->ms) { /* diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index f1d58e70933f5..850f81e08e7d8 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -260,8 +260,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) c.rw.control = cpu_to_le16(io.control); c.rw.dsmgmt = cpu_to_le32(io.dsmgmt); c.rw.reftag = cpu_to_le32(io.reftag); - c.rw.apptag = cpu_to_le16(io.apptag); - c.rw.appmask = cpu_to_le16(io.appmask); + c.rw.lbat = cpu_to_le16(io.apptag); + c.rw.lbatm = cpu_to_le16(io.appmask); return nvme_submit_user_cmd(ns->queue, &c, io.addr, length, metadata, meta_len, lower_32_bits(io.slba), NULL, 0, 0); diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e3520a91147b1..15b5e06039a5f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1363,8 +1363,8 @@ static void nvme_rdma_set_sig_domain(struct blk_integrity *bi, if (control & NVME_RW_PRINFO_PRCHK_REF) domain->sig.dif.ref_remap = true; - domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); - domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.lbat); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.lbatm); domain->sig.dif.app_escape = true; if (pi_type == NVME_NS_DPS_PI_TYPE3) domain->sig.dif.ref_escape = true; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 1eff8ca6a5f12..1b6264fa58039 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -578,8 +578,8 @@ static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, if (control & NVME_RW_PRINFO_PRCHK_REF) domain->sig.dif.ref_remap = true; - domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); - domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.lbat); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.lbatm); domain->sig.dif.app_escape = true; if (pi_type == NVME_NS_DPS_PI_TYPE3) domain->sig.dif.ref_escape = true; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7b2ae2e435447..b58d9405d65e0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -987,8 +987,8 @@ struct nvme_rw_command { __le16 control; __le32 dsmgmt; __le32 reftag; - __le16 apptag; - __le16 appmask; + __le16 lbat; + __le16 lbatm; }; enum { @@ -1057,8 +1057,8 @@ struct nvme_write_zeroes_cmd { __le16 control; __le32 dsmgmt; __le32 reftag; - __le16 apptag; - __le16 appmask; + __le16 lbat; + __le16 lbatm; }; enum nvme_zone_mgmt_action { From 7c2fd76048e95dd267055b5f5e0a48e6e7c81fd9 Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Thu, 29 Aug 2024 13:32:17 +0000 Subject: [PATCH 13/13] nvme: fix metadata handling in nvme-passthrough On an NVMe namespace that does not support metadata, it is possible to send an IO command with metadata through io-passthru. This allows issues like [1] to trigger in the completion code path. nvme_map_user_request() doesn't check if the namespace supports metadata before sending it forward. It also allows admin commands with metadata to be processed as it ignores metadata when bdev == NULL and may report success. Reject an IO command with metadata when the NVMe namespace doesn't support it and reject an admin command if it has metadata. [1] https://lore.kernel.org/all/mb61pcylvnym8.fsf@amazon.com/ Suggested-by: Christoph Hellwig Signed-off-by: Puranjay Mohan Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Anuj Gupta Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index 850f81e08e7d8..1d769c842fbf5 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -4,6 +4,7 @@ * Copyright (c) 2017-2021 Christoph Hellwig. */ #include +#include #include /* for force_successful_syscall_return */ #include #include @@ -119,9 +120,14 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; struct block_device *bdev = ns ? ns->disk->part0 : NULL; + bool supports_metadata = bdev && blk_get_integrity(bdev->bd_disk); + bool has_metadata = meta_buffer && meta_len; struct bio *bio = NULL; int ret; + if (has_metadata && !supports_metadata) + return -EINVAL; + if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { struct iov_iter iter; @@ -143,15 +149,15 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, goto out; bio = req->bio; - if (bdev) { + if (bdev) bio_set_dev(bio, bdev); - if (meta_buffer && meta_len) { - ret = bio_integrity_map_user(bio, meta_buffer, meta_len, - meta_seed); - if (ret) - goto out_unmap; - req->cmd_flags |= REQ_INTEGRITY; - } + + if (has_metadata) { + ret = bio_integrity_map_user(bio, meta_buffer, meta_len, + meta_seed); + if (ret) + goto out_unmap; + req->cmd_flags |= REQ_INTEGRITY; } return ret;