From 3241cd0c6c17919b5b984c6b770ced3d797ddc4c Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:09 +0100 Subject: [PATCH 01/22] crypto,fs: Separate out hkdf_extract() and hkdf_expand() Separate out the HKDF functions into a separate module to to make them available to other callers. And add a testsuite to the module with test vectors from RFC 5869 (and additional vectors for SHA384 and SHA512) to ensure the integrity of the algorithm. Signed-off-by: Hannes Reinecke Acked-by: Eric Biggers Acked-by: Herbert Xu Signed-off-by: Keith Busch --- crypto/Kconfig | 6 + crypto/Makefile | 1 + crypto/hkdf.c | 573 ++++++++++++++++++++++++++++++++++++++++++ fs/crypto/Kconfig | 1 + fs/crypto/hkdf.c | 85 ++----- include/crypto/hkdf.h | 20 ++ 6 files changed, 616 insertions(+), 70 deletions(-) create mode 100644 crypto/hkdf.c create mode 100644 include/crypto/hkdf.h diff --git a/crypto/Kconfig b/crypto/Kconfig index 74ae5f52b7840..90c17fb5498be 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -141,6 +141,12 @@ config CRYPTO_ACOMP select CRYPTO_ALGAPI select CRYPTO_ACOMP2 +config CRYPTO_HKDF + tristate + select CRYPTO_SHA256 if !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS + select CRYPTO_SHA512 if !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS + select CRYPTO_HASH2 + config CRYPTO_MANAGER tristate "Cryptographic algorithm manager" select CRYPTO_MANAGER2 diff --git a/crypto/Makefile b/crypto/Makefile index f67e853c46902..71344adb35827 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o obj-$(CONFIG_CRYPTO_AKCIPHER2) += akcipher.o obj-$(CONFIG_CRYPTO_SIG2) += sig.o obj-$(CONFIG_CRYPTO_KPP2) += kpp.o +obj-$(CONFIG_CRYPTO_HKDF) += hkdf.o dh_generic-y := dh.o dh_generic-y += dh_helper.o diff --git a/crypto/hkdf.c b/crypto/hkdf.c new file mode 100644 index 0000000000000..2434c5c425456 --- /dev/null +++ b/crypto/hkdf.c @@ -0,0 +1,573 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Implementation of HKDF ("HMAC-based Extract-and-Expand Key Derivation + * Function"), aka RFC 5869. See also the original paper (Krawczyk 2010): + * "Cryptographic Extraction and Key Derivation: The HKDF Scheme". + * + * Copyright 2019 Google LLC + */ + +#include +#include +#include +#include + +/* + * HKDF consists of two steps: + * + * 1. HKDF-Extract: extract a pseudorandom key from the input keying material + * and optional salt. + * 2. HKDF-Expand: expand the pseudorandom key into output keying material of + * any length, parameterized by an application-specific info string. + * + */ + +/** + * hkdf_extract - HKDF-Extract (RFC 5869 section 2.2) + * @hmac_tfm: an HMAC transform using the hash function desired for HKDF. The + * caller is responsible for setting the @prk afterwards. + * @ikm: input keying material + * @ikmlen: length of @ikm + * @salt: input salt value + * @saltlen: length of @salt + * @prk: resulting pseudorandom key + * + * Extracts a pseudorandom key @prk from the input keying material + * @ikm with length @ikmlen and salt @salt with length @saltlen. + * The length of @prk is given by the digest size of @hmac_tfm. + * For an 'unsalted' version of HKDF-Extract @salt must be set + * to all zeroes and @saltlen must be set to the length of @prk. + * + * Returns 0 on success with the pseudorandom key stored in @prk, + * or a negative errno value otherwise. + */ +int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm, + unsigned int ikmlen, const u8 *salt, unsigned int saltlen, + u8 *prk) +{ + int err; + + err = crypto_shash_setkey(hmac_tfm, salt, saltlen); + if (!err) + err = crypto_shash_tfm_digest(hmac_tfm, ikm, ikmlen, prk); + + return err; +} +EXPORT_SYMBOL_GPL(hkdf_extract); + +/** + * hkdf_expand - HKDF-Expand (RFC 5869 section 2.3) + * @hmac_tfm: hash context keyed with pseudorandom key + * @info: application-specific information + * @infolen: length of @info + * @okm: output keying material + * @okmlen: length of @okm + * + * This expands the pseudorandom key, which was already keyed into @hmac_tfm, + * into @okmlen bytes of output keying material parameterized by the + * application-specific @info of length @infolen bytes. + * This is thread-safe and may be called by multiple threads in parallel. + * + * Returns 0 on success with output keying material stored in @okm, + * or a negative errno value otherwise. + */ +int hkdf_expand(struct crypto_shash *hmac_tfm, + const u8 *info, unsigned int infolen, + u8 *okm, unsigned int okmlen) +{ + SHASH_DESC_ON_STACK(desc, hmac_tfm); + unsigned int i, hashlen = crypto_shash_digestsize(hmac_tfm); + int err; + const u8 *prev = NULL; + u8 counter = 1; + u8 tmp[HASH_MAX_DIGESTSIZE] = {}; + + if (WARN_ON(okmlen > 255 * hashlen)) + return -EINVAL; + + desc->tfm = hmac_tfm; + + for (i = 0; i < okmlen; i += hashlen) { + err = crypto_shash_init(desc); + if (err) + goto out; + + if (prev) { + err = crypto_shash_update(desc, prev, hashlen); + if (err) + goto out; + } + + if (infolen) { + err = crypto_shash_update(desc, info, infolen); + if (err) + goto out; + } + + BUILD_BUG_ON(sizeof(counter) != 1); + if (okmlen - i < hashlen) { + err = crypto_shash_finup(desc, &counter, 1, tmp); + if (err) + goto out; + memcpy(&okm[i], tmp, okmlen - i); + memzero_explicit(tmp, sizeof(tmp)); + } else { + err = crypto_shash_finup(desc, &counter, 1, &okm[i]); + if (err) + goto out; + } + counter++; + prev = &okm[i]; + } + err = 0; +out: + if (unlikely(err)) + memzero_explicit(okm, okmlen); /* so caller doesn't need to */ + shash_desc_zero(desc); + memzero_explicit(tmp, HASH_MAX_DIGESTSIZE); + return err; +} +EXPORT_SYMBOL_GPL(hkdf_expand); + +struct hkdf_testvec { + const char *test; + const u8 *ikm; + const u8 *salt; + const u8 *info; + const u8 *prk; + const u8 *okm; + u16 ikm_size; + u16 salt_size; + u16 info_size; + u16 prk_size; + u16 okm_size; +}; + +/* + * HKDF test vectors from RFC5869 + * + * Additional HKDF test vectors from + * https://github.com/brycx/Test-Vector-Generation/blob/master/HKDF/hkdf-hmac-sha2-test-vectors.md + */ +static const struct hkdf_testvec hkdf_sha256_tv[] = { + { + .test = "basic hdkf test", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 22, + .salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c", + .salt_size = 13, + .info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", + .info_size = 10, + .prk = "\x07\x77\x09\x36\x2c\x2e\x32\xdf\x0d\xdc\x3f\x0d\xc4\x7b\xba\x63" + "\x90\xb6\xc7\x3b\xb5\x0f\x9c\x31\x22\xec\x84\x4a\xd7\xc2\xb3\xe5", + .prk_size = 32, + .okm = "\x3c\xb2\x5f\x25\xfa\xac\xd5\x7a\x90\x43\x4f\x64\xd0\x36\x2f\x2a" + "\x2d\x2d\x0a\x90\xcf\x1a\x5a\x4c\x5d\xb0\x2d\x56\xec\xc4\xc5\xbf" + "\x34\x00\x72\x08\xd5\xb8\x87\x18\x58\x65", + .okm_size = 42, + }, { + .test = "hkdf test with long input", + .ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f", + .ikm_size = 80, + .salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf", + .salt_size = 80, + .info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + .info_size = 80, + .prk = "\x06\xa6\xb8\x8c\x58\x53\x36\x1a\x06\x10\x4c\x9c\xeb\x35\xb4\x5c" + "\xef\x76\x00\x14\x90\x46\x71\x01\x4a\x19\x3f\x40\xc1\x5f\xc2\x44", + .prk_size = 32, + .okm = "\xb1\x1e\x39\x8d\xc8\x03\x27\xa1\xc8\xe7\xf7\x8c\x59\x6a\x49\x34" + "\x4f\x01\x2e\xda\x2d\x4e\xfa\xd8\xa0\x50\xcc\x4c\x19\xaf\xa9\x7c" + "\x59\x04\x5a\x99\xca\xc7\x82\x72\x71\xcb\x41\xc6\x5e\x59\x0e\x09" + "\xda\x32\x75\x60\x0c\x2f\x09\xb8\x36\x77\x93\xa9\xac\xa3\xdb\x71" + "\xcc\x30\xc5\x81\x79\xec\x3e\x87\xc1\x4c\x01\xd5\xc1\xf3\x43\x4f" + "\x1d\x87", + .okm_size = 82, + }, { + .test = "hkdf test with zero salt and info", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 22, + .salt = NULL, + .salt_size = 0, + .info = NULL, + .info_size = 0, + .prk = "\x19\xef\x24\xa3\x2c\x71\x7b\x16\x7f\x33\xa9\x1d\x6f\x64\x8b\xdf" + "\x96\x59\x67\x76\xaf\xdb\x63\x77\xac\x43\x4c\x1c\x29\x3c\xcb\x04", + .prk_size = 32, + .okm = "\x8d\xa4\xe7\x75\xa5\x63\xc1\x8f\x71\x5f\x80\x2a\x06\x3c\x5a\x31" + "\xb8\xa1\x1f\x5c\x5e\xe1\x87\x9e\xc3\x45\x4e\x5f\x3c\x73\x8d\x2d" + "\x9d\x20\x13\x95\xfa\xa4\xb6\x1a\x96\xc8", + .okm_size = 42, + }, { + .test = "hkdf test with short input", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 11, + .salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c", + .salt_size = 13, + .info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", + .info_size = 10, + .prk = "\x82\x65\xf6\x9d\x7f\xf7\xe5\x01\x37\x93\x01\x5c\xa0\xef\x92\x0c" + "\xb1\x68\x21\x99\xc8\xbc\x3a\x00\xda\x0c\xab\x47\xb7\xb0\x0f\xdf", + .prk_size = 32, + .okm = "\x58\xdc\xe1\x0d\x58\x01\xcd\xfd\xa8\x31\x72\x6b\xfe\xbc\xb7\x43" + "\xd1\x4a\x7e\xe8\x3a\xa0\x57\xa9\x3d\x59\xb0\xa1\x31\x7f\xf0\x9d" + "\x10\x5c\xce\xcf\x53\x56\x92\xb1\x4d\xd5", + .okm_size = 42, + }, { + .test = "unsalted hkdf test with zero info", + .ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c" + "\x0c\x0c\x0c\x0c\x0c\x0c", + .ikm_size = 22, + .salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + .salt_size = 32, + .info = NULL, + .info_size = 0, + .prk = "\xaa\x84\x1e\x1f\x35\x74\xf3\x2d\x13\xfb\xa8\x00\x5f\xcd\x9b\x8d" + "\x77\x67\x82\xa5\xdf\xa1\x92\x38\x92\xfd\x8b\x63\x5d\x3a\x89\xdf", + .prk_size = 32, + .okm = "\x59\x68\x99\x17\x9a\xb1\xbc\x00\xa7\xc0\x37\x86\xff\x43\xee\x53" + "\x50\x04\xbe\x2b\xb9\xbe\x68\xbc\x14\x06\x63\x6f\x54\xbd\x33\x8a" + "\x66\xa2\x37\xba\x2a\xcb\xce\xe3\xc9\xa7", + .okm_size = 42, + } +}; + +static const struct hkdf_testvec hkdf_sha384_tv[] = { + { + .test = "basic hkdf test", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 22, + .salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c", + .salt_size = 13, + .info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", + .info_size = 10, + .prk = "\x70\x4b\x39\x99\x07\x79\xce\x1d\xc5\x48\x05\x2c\x7d\xc3\x9f\x30" + "\x35\x70\xdd\x13\xfb\x39\xf7\xac\xc5\x64\x68\x0b\xef\x80\xe8\xde" + "\xc7\x0e\xe9\xa7\xe1\xf3\xe2\x93\xef\x68\xec\xeb\x07\x2a\x5a\xde", + .prk_size = 48, + .okm = "\x9b\x50\x97\xa8\x60\x38\xb8\x05\x30\x90\x76\xa4\x4b\x3a\x9f\x38" + "\x06\x3e\x25\xb5\x16\xdc\xbf\x36\x9f\x39\x4c\xfa\xb4\x36\x85\xf7" + "\x48\xb6\x45\x77\x63\xe4\xf0\x20\x4f\xc5", + .okm_size = 42, + }, { + .test = "hkdf test with long input", + .ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f", + .ikm_size = 80, + .salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf", + .salt_size = 80, + .info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + .info_size = 80, + .prk = "\xb3\x19\xf6\x83\x1d\xff\x93\x14\xef\xb6\x43\xba\xa2\x92\x63\xb3" + "\x0e\x4a\x8d\x77\x9f\xe3\x1e\x9c\x90\x1e\xfd\x7d\xe7\x37\xc8\x5b" + "\x62\xe6\x76\xd4\xdc\x87\xb0\x89\x5c\x6a\x7d\xc9\x7b\x52\xce\xbb", + .prk_size = 48, + .okm = "\x48\x4c\xa0\x52\xb8\xcc\x72\x4f\xd1\xc4\xec\x64\xd5\x7b\x4e\x81" + "\x8c\x7e\x25\xa8\xe0\xf4\x56\x9e\xd7\x2a\x6a\x05\xfe\x06\x49\xee" + "\xbf\x69\xf8\xd5\xc8\x32\x85\x6b\xf4\xe4\xfb\xc1\x79\x67\xd5\x49" + "\x75\x32\x4a\x94\x98\x7f\x7f\x41\x83\x58\x17\xd8\x99\x4f\xdb\xd6" + "\xf4\xc0\x9c\x55\x00\xdc\xa2\x4a\x56\x22\x2f\xea\x53\xd8\x96\x7a" + "\x8b\x2e", + .okm_size = 82, + }, { + .test = "hkdf test with zero salt and info", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 22, + .salt = NULL, + .salt_size = 0, + .info = NULL, + .info_size = 0, + .prk = "\x10\xe4\x0c\xf0\x72\xa4\xc5\x62\x6e\x43\xdd\x22\xc1\xcf\x72\x7d" + "\x4b\xb1\x40\x97\x5c\x9a\xd0\xcb\xc8\xe4\x5b\x40\x06\x8f\x8f\x0b" + "\xa5\x7c\xdb\x59\x8a\xf9\xdf\xa6\x96\x3a\x96\x89\x9a\xf0\x47\xe5", + .prk_size = 48, + .okm = "\xc8\xc9\x6e\x71\x0f\x89\xb0\xd7\x99\x0b\xca\x68\xbc\xde\xc8\xcf" + "\x85\x40\x62\xe5\x4c\x73\xa7\xab\xc7\x43\xfa\xde\x9b\x24\x2d\xaa" + "\xcc\x1c\xea\x56\x70\x41\x5b\x52\x84\x9c", + .okm_size = 42, + }, { + .test = "hkdf test with short input", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 11, + .salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c", + .salt_size = 13, + .info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", + .info_size = 10, + .prk = "\x6d\x31\x69\x98\x28\x79\x80\x88\xb3\x59\xda\xd5\x0b\x8f\x01\xb0" + "\x15\xf1\x7a\xa3\xbd\x4e\x27\xa6\xe9\xf8\x73\xb7\x15\x85\xca\x6a" + "\x00\xd1\xf0\x82\x12\x8a\xdb\x3c\xf0\x53\x0b\x57\xc0\xf9\xac\x72", + .prk_size = 48, + .okm = "\xfb\x7e\x67\x43\xeb\x42\xcd\xe9\x6f\x1b\x70\x77\x89\x52\xab\x75" + "\x48\xca\xfe\x53\x24\x9f\x7f\xfe\x14\x97\xa1\x63\x5b\x20\x1f\xf1" + "\x85\xb9\x3e\x95\x19\x92\xd8\x58\xf1\x1a", + .okm_size = 42, + }, { + .test = "unsalted hkdf test with zero info", + .ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c" + "\x0c\x0c\x0c\x0c\x0c\x0c", + .ikm_size = 22, + .salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + .salt_size = 48, + .info = NULL, + .info_size = 0, + .prk = "\x9d\x2d\xa5\x06\x6f\x05\xd1\x6c\x59\xfe\xdf\x6c\x5f\x32\xc7\x5e" + "\xda\x9a\x47\xa7\x9c\x93\x6a\xa4\x4c\xb7\x63\xa8\xe2\x2f\xfb\xfc" + "\xd8\xfe\x55\x43\x58\x53\x47\x21\x90\x39\xd1\x68\x28\x36\x33\xf5", + .prk_size = 48, + .okm = "\x6a\xd7\xc7\x26\xc8\x40\x09\x54\x6a\x76\xe0\x54\x5d\xf2\x66\x78" + "\x7e\x2b\x2c\xd6\xca\x43\x73\xa1\xf3\x14\x50\xa7\xbd\xf9\x48\x2b" + "\xfa\xb8\x11\xf5\x54\x20\x0e\xad\x8f\x53", + .okm_size = 42, + } +}; + +static const struct hkdf_testvec hkdf_sha512_tv[] = { + { + .test = "basic hkdf test", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 22, + .salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c", + .salt_size = 13, + .info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", + .info_size = 10, + .prk = "\x66\x57\x99\x82\x37\x37\xde\xd0\x4a\x88\xe4\x7e\x54\xa5\x89\x0b" + "\xb2\xc3\xd2\x47\xc7\xa4\x25\x4a\x8e\x61\x35\x07\x23\x59\x0a\x26" + "\xc3\x62\x38\x12\x7d\x86\x61\xb8\x8c\xf8\x0e\xf8\x02\xd5\x7e\x2f" + "\x7c\xeb\xcf\x1e\x00\xe0\x83\x84\x8b\xe1\x99\x29\xc6\x1b\x42\x37", + .prk_size = 64, + .okm = "\x83\x23\x90\x08\x6c\xda\x71\xfb\x47\x62\x5b\xb5\xce\xb1\x68\xe4" + "\xc8\xe2\x6a\x1a\x16\xed\x34\xd9\xfc\x7f\xe9\x2c\x14\x81\x57\x93" + "\x38\xda\x36\x2c\xb8\xd9\xf9\x25\xd7\xcb", + .okm_size = 42, + }, { + .test = "hkdf test with long input", + .ikm = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f", + .ikm_size = 80, + .salt = "\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf", + .salt_size = 80, + .info = "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + .info_size = 80, + .prk = "\x35\x67\x25\x42\x90\x7d\x4e\x14\x2c\x00\xe8\x44\x99\xe7\x4e\x1d" + "\xe0\x8b\xe8\x65\x35\xf9\x24\xe0\x22\x80\x4a\xd7\x75\xdd\xe2\x7e" + "\xc8\x6c\xd1\xe5\xb7\xd1\x78\xc7\x44\x89\xbd\xbe\xb3\x07\x12\xbe" + "\xb8\x2d\x4f\x97\x41\x6c\x5a\x94\xea\x81\xeb\xdf\x3e\x62\x9e\x4a", + .prk_size = 64, + .okm = "\xce\x6c\x97\x19\x28\x05\xb3\x46\xe6\x16\x1e\x82\x1e\xd1\x65\x67" + "\x3b\x84\xf4\x00\xa2\xb5\x14\xb2\xfe\x23\xd8\x4c\xd1\x89\xdd\xf1" + "\xb6\x95\xb4\x8c\xbd\x1c\x83\x88\x44\x11\x37\xb3\xce\x28\xf1\x6a" + "\xa6\x4b\xa3\x3b\xa4\x66\xb2\x4d\xf6\xcf\xcb\x02\x1e\xcf\xf2\x35" + "\xf6\xa2\x05\x6c\xe3\xaf\x1d\xe4\x4d\x57\x20\x97\xa8\x50\x5d\x9e" + "\x7a\x93", + .okm_size = 82, + }, { + .test = "hkdf test with zero salt and info", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 22, + .salt = NULL, + .salt_size = 0, + .info = NULL, + .info_size = 0, + .prk = "\xfd\x20\x0c\x49\x87\xac\x49\x13\x13\xbd\x4a\x2a\x13\x28\x71\x21" + "\x24\x72\x39\xe1\x1c\x9e\xf8\x28\x02\x04\x4b\x66\xef\x35\x7e\x5b" + "\x19\x44\x98\xd0\x68\x26\x11\x38\x23\x48\x57\x2a\x7b\x16\x11\xde" + "\x54\x76\x40\x94\x28\x63\x20\x57\x8a\x86\x3f\x36\x56\x2b\x0d\xf6", + .prk_size = 64, + .okm = "\xf5\xfa\x02\xb1\x82\x98\xa7\x2a\x8c\x23\x89\x8a\x87\x03\x47\x2c" + "\x6e\xb1\x79\xdc\x20\x4c\x03\x42\x5c\x97\x0e\x3b\x16\x4b\xf9\x0f" + "\xff\x22\xd0\x48\x36\xd0\xe2\x34\x3b\xac", + .okm_size = 42, + }, { + .test = "hkdf test with short input", + .ikm = "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b", + .ikm_size = 11, + .salt = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c", + .salt_size = 13, + .info = "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9", + .info_size = 10, + .prk = "\x67\x40\x9c\x9c\xac\x28\xb5\x2e\xe9\xfa\xd9\x1c\x2f\xda\x99\x9f" + "\x7c\xa2\x2e\x34\x34\xf0\xae\x77\x28\x63\x83\x65\x68\xad\x6a\x7f" + "\x10\xcf\x11\x3b\xfd\xdd\x56\x01\x29\xa5\x94\xa8\xf5\x23\x85\xc2" + "\xd6\x61\xd7\x85\xd2\x9c\xe9\x3a\x11\x40\x0c\x92\x06\x83\x18\x1d", + .prk_size = 64, + .okm = "\x74\x13\xe8\x99\x7e\x02\x06\x10\xfb\xf6\x82\x3f\x2c\xe1\x4b\xff" + "\x01\x87\x5d\xb1\xca\x55\xf6\x8c\xfc\xf3\x95\x4d\xc8\xaf\xf5\x35" + "\x59\xbd\x5e\x30\x28\xb0\x80\xf7\xc0\x68", + .okm_size = 42, + }, { + .test = "unsalted hkdf test with zero info", + .ikm = "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c" + "\x0c\x0c\x0c\x0c\x0c\x0c", + .ikm_size = 22, + .salt = "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + .salt_size = 64, + .info = NULL, + .info_size = 0, + .prk = "\x53\x46\xb3\x76\xbf\x3a\xa9\xf8\x4f\x8f\x6e\xd5\xb1\xc4\xf4\x89" + "\x17\x2e\x24\x4d\xac\x30\x3d\x12\xf6\x8e\xcc\x76\x6e\xa6\x00\xaa" + "\x88\x49\x5e\x7f\xb6\x05\x80\x31\x22\xfa\x13\x69\x24\xa8\x40\xb1" + "\xf0\x71\x9d\x2d\x5f\x68\xe2\x9b\x24\x22\x99\xd7\x58\xed\x68\x0c", + .prk_size = 64, + .okm = "\x14\x07\xd4\x60\x13\xd9\x8b\xc6\xde\xce\xfc\xfe\xe5\x5f\x0f\x90" + "\xb0\xc7\xf6\x3d\x68\xeb\x1a\x80\xea\xf0\x7e\x95\x3c\xfc\x0a\x3a" + "\x52\x40\xa1\x55\xd6\xe4\xda\xa9\x65\xbb", + .okm_size = 42, + } +}; + +static int hkdf_test(const char *shash, const struct hkdf_testvec *tv) +{ struct crypto_shash *tfm = NULL; + u8 *prk = NULL, *okm = NULL; + unsigned int prk_size; + const char *driver; + int err; + + tfm = crypto_alloc_shash(shash, 0, 0); + if (IS_ERR(tfm)) { + pr_err("%s(%s): failed to allocate transform: %ld\n", + tv->test, shash, PTR_ERR(tfm)); + return PTR_ERR(tfm); + } + driver = crypto_shash_driver_name(tfm); + + prk_size = crypto_shash_digestsize(tfm); + prk = kzalloc(prk_size, GFP_KERNEL); + if (!prk) { + err = -ENOMEM; + goto out_free; + } + + if (tv->prk_size != prk_size) { + pr_err("%s(%s): prk size mismatch (vec %u, digest %u\n", + tv->test, driver, tv->prk_size, prk_size); + err = -EINVAL; + goto out_free; + } + + err = hkdf_extract(tfm, tv->ikm, tv->ikm_size, + tv->salt, tv->salt_size, prk); + if (err) { + pr_err("%s(%s): hkdf_extract failed with %d\n", + tv->test, driver, err); + goto out_free; + } + + if (memcmp(prk, tv->prk, tv->prk_size)) { + pr_err("%s(%s): hkdf_extract prk mismatch\n", + tv->test, driver); + print_hex_dump(KERN_ERR, "prk: ", DUMP_PREFIX_NONE, + 16, 1, prk, tv->prk_size, false); + err = -EINVAL; + goto out_free; + } + + okm = kzalloc(tv->okm_size, GFP_KERNEL); + if (!okm) { + err = -ENOMEM; + goto out_free; + } + + err = crypto_shash_setkey(tfm, tv->prk, tv->prk_size); + if (err) { + pr_err("%s(%s): failed to set prk, error %d\n", + tv->test, driver, err); + goto out_free; + } + + err = hkdf_expand(tfm, tv->info, tv->info_size, + okm, tv->okm_size); + if (err) { + pr_err("%s(%s): hkdf_expand() failed with %d\n", + tv->test, driver, err); + } else if (memcmp(okm, tv->okm, tv->okm_size)) { + pr_err("%s(%s): hkdf_expand() okm mismatch\n", + tv->test, driver); + print_hex_dump(KERN_ERR, "okm: ", DUMP_PREFIX_NONE, + 16, 1, okm, tv->okm_size, false); + err = -EINVAL; + } +out_free: + kfree(okm); + kfree(prk); + crypto_free_shash(tfm); + return err; +} + +static int __init crypto_hkdf_module_init(void) +{ + int ret = 0, i; + + if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return 0; + + for (i = 0; i < ARRAY_SIZE(hkdf_sha256_tv); i++) { + ret = hkdf_test("hmac(sha256)", &hkdf_sha256_tv[i]); + if (ret) + return ret; + } + for (i = 0; i < ARRAY_SIZE(hkdf_sha384_tv); i++) { + ret = hkdf_test("hmac(sha384)", &hkdf_sha384_tv[i]); + if (ret) + return ret; + } + for (i = 0; i < ARRAY_SIZE(hkdf_sha512_tv); i++) { + ret = hkdf_test("hmac(sha512)", &hkdf_sha512_tv[i]); + if (ret) + return ret; + } + return 0; +} + +static void __exit crypto_hkdf_module_exit(void) {} + +module_init(crypto_hkdf_module_init); +module_exit(crypto_hkdf_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("HMAC-based Key Derivation Function (HKDF)"); diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig index 5aff5934baa12..0ea56f588a4ad 100644 --- a/fs/crypto/Kconfig +++ b/fs/crypto/Kconfig @@ -3,6 +3,7 @@ config FS_ENCRYPTION bool "FS Encryption (Per-file encryption)" select CRYPTO select CRYPTO_HASH + select CRYPTO_HKDF select CRYPTO_SKCIPHER select CRYPTO_LIB_SHA256 select KEYS diff --git a/fs/crypto/hkdf.c b/fs/crypto/hkdf.c index 5a384dad2c72f..855a0f4b7318b 100644 --- a/fs/crypto/hkdf.c +++ b/fs/crypto/hkdf.c @@ -1,9 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Implementation of HKDF ("HMAC-based Extract-and-Expand Key Derivation - * Function"), aka RFC 5869. See also the original paper (Krawczyk 2010): - * "Cryptographic Extraction and Key Derivation: The HKDF Scheme". - * * This is used to derive keys from the fscrypt master keys. * * Copyright 2019 Google LLC @@ -11,6 +7,7 @@ #include #include +#include #include "fscrypt_private.h" @@ -44,20 +41,6 @@ * there's no way to persist a random salt per master key from kernel mode. */ -/* HKDF-Extract (RFC 5869 section 2.2), unsalted */ -static int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm, - unsigned int ikmlen, u8 prk[HKDF_HASHLEN]) -{ - static const u8 default_salt[HKDF_HASHLEN]; - int err; - - err = crypto_shash_setkey(hmac_tfm, default_salt, HKDF_HASHLEN); - if (err) - return err; - - return crypto_shash_tfm_digest(hmac_tfm, ikm, ikmlen, prk); -} - /* * Compute HKDF-Extract using the given master key as the input keying material, * and prepare an HMAC transform object keyed by the resulting pseudorandom key. @@ -69,6 +52,7 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, unsigned int master_key_size) { struct crypto_shash *hmac_tfm; + static const u8 default_salt[HKDF_HASHLEN]; u8 prk[HKDF_HASHLEN]; int err; @@ -84,7 +68,8 @@ int fscrypt_init_hkdf(struct fscrypt_hkdf *hkdf, const u8 *master_key, goto err_free_tfm; } - err = hkdf_extract(hmac_tfm, master_key, master_key_size, prk); + err = hkdf_extract(hmac_tfm, master_key, master_key_size, + default_salt, HKDF_HASHLEN, prk); if (err) goto err_free_tfm; @@ -118,61 +103,21 @@ int fscrypt_hkdf_expand(const struct fscrypt_hkdf *hkdf, u8 context, u8 *okm, unsigned int okmlen) { SHASH_DESC_ON_STACK(desc, hkdf->hmac_tfm); - u8 prefix[9]; - unsigned int i; + u8 *full_info; int err; - const u8 *prev = NULL; - u8 counter = 1; - u8 tmp[HKDF_HASHLEN]; - - if (WARN_ON_ONCE(okmlen > 255 * HKDF_HASHLEN)) - return -EINVAL; + full_info = kzalloc(infolen + 9, GFP_KERNEL); + if (!full_info) + return -ENOMEM; desc->tfm = hkdf->hmac_tfm; - memcpy(prefix, "fscrypt\0", 8); - prefix[8] = context; - - for (i = 0; i < okmlen; i += HKDF_HASHLEN) { - - err = crypto_shash_init(desc); - if (err) - goto out; - - if (prev) { - err = crypto_shash_update(desc, prev, HKDF_HASHLEN); - if (err) - goto out; - } - - err = crypto_shash_update(desc, prefix, sizeof(prefix)); - if (err) - goto out; - - err = crypto_shash_update(desc, info, infolen); - if (err) - goto out; - - BUILD_BUG_ON(sizeof(counter) != 1); - if (okmlen - i < HKDF_HASHLEN) { - err = crypto_shash_finup(desc, &counter, 1, tmp); - if (err) - goto out; - memcpy(&okm[i], tmp, okmlen - i); - memzero_explicit(tmp, sizeof(tmp)); - } else { - err = crypto_shash_finup(desc, &counter, 1, &okm[i]); - if (err) - goto out; - } - counter++; - prev = &okm[i]; - } - err = 0; -out: - if (unlikely(err)) - memzero_explicit(okm, okmlen); /* so caller doesn't need to */ - shash_desc_zero(desc); + memcpy(full_info, "fscrypt\0", 8); + full_info[8] = context; + memcpy(full_info + 9, info, infolen); + + err = hkdf_expand(hkdf->hmac_tfm, full_info, infolen + 9, + okm, okmlen); + kfree_sensitive(full_info); return err; } diff --git a/include/crypto/hkdf.h b/include/crypto/hkdf.h new file mode 100644 index 0000000000000..6a9678f508f5d --- /dev/null +++ b/include/crypto/hkdf.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * HKDF: HMAC-based Key Derivation Function (HKDF), RFC 5869 + * + * Extracted from fs/crypto/hkdf.c, which has + * Copyright 2019 Google LLC + */ + +#ifndef _CRYPTO_HKDF_H +#define _CRYPTO_HKDF_H + +#include + +int hkdf_extract(struct crypto_shash *hmac_tfm, const u8 *ikm, + unsigned int ikmlen, const u8 *salt, unsigned int saltlen, + u8 *prk); +int hkdf_expand(struct crypto_shash *hmac_tfm, + const u8 *info, unsigned int infolen, + u8 *okm, unsigned int okmlen); +#endif From 5c12a9cdb5ad54621f1b7c02df7993a4a1b86a46 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:10 +0100 Subject: [PATCH 02/22] nvme: add nvme_auth_generate_psk() Add a function to generate a NVMe PSK from the shared credentials negotiated by DH-HMAC-CHAP. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/auth.c | 87 ++++++++++++++++++++++++++++++++++++++ include/linux/nvme-auth.h | 3 ++ 2 files changed, 90 insertions(+) diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index 9b7126e1a19d9..76e4a12f005e8 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -471,5 +472,91 @@ int nvme_auth_generate_key(u8 *secret, struct nvme_dhchap_key **ret_key) } EXPORT_SYMBOL_GPL(nvme_auth_generate_key); +/** + * nvme_auth_generate_psk - Generate a PSK for TLS + * @hmac_id: Hash function identifier + * @skey: Session key + * @skey_len: Length of @skey + * @c1: Value of challenge C1 + * @c2: Value of challenge C2 + * @hash_len: Hash length of the hash algorithm + * @ret_psk: Pointer too the resulting generated PSK + * @ret_len: length of @ret_psk + * + * Generate a PSK for TLS as specified in NVMe base specification, section + * 8.13.5.9: Generated PSK for TLS + * + * The generated PSK for TLS shall be computed applying the HMAC function + * using the hash function H( ) selected by the HashID parameter in the + * DH-HMAC-CHAP_Challenge message with the session key KS as key to the + * concatenation of the two challenges C1 and C2 (i.e., generated + * PSK = HMAC(KS, C1 || C2)). + * + * Returns 0 on success with a valid generated PSK pointer in @ret_psk and + * the length of @ret_psk in @ret_len, or a negative error number otherwise. + */ +int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, + u8 *c1, u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len) +{ + struct crypto_shash *tfm; + SHASH_DESC_ON_STACK(shash, tfm); + u8 *psk; + const char *hmac_name; + int ret, psk_len; + + if (!c1 || !c2) + return -EINVAL; + + hmac_name = nvme_auth_hmac_name(hmac_id); + if (!hmac_name) { + pr_warn("%s: invalid hash algorithm %d\n", + __func__, hmac_id); + return -EINVAL; + } + + tfm = crypto_alloc_shash(hmac_name, 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + psk_len = crypto_shash_digestsize(tfm); + psk = kzalloc(psk_len, GFP_KERNEL); + if (!psk) { + ret = -ENOMEM; + goto out_free_tfm; + } + + shash->tfm = tfm; + ret = crypto_shash_setkey(tfm, skey, skey_len); + if (ret) + goto out_free_psk; + + ret = crypto_shash_init(shash); + if (ret) + goto out_free_psk; + + ret = crypto_shash_update(shash, c1, hash_len); + if (ret) + goto out_free_psk; + + ret = crypto_shash_update(shash, c2, hash_len); + if (ret) + goto out_free_psk; + + ret = crypto_shash_final(shash, psk); + if (!ret) { + *ret_psk = psk; + *ret_len = psk_len; + } + +out_free_psk: + if (ret) + kfree_sensitive(psk); +out_free_tfm: + crypto_free_shash(tfm); + + return ret; +} +EXPORT_SYMBOL_GPL(nvme_auth_generate_psk); + MODULE_DESCRIPTION("NVMe Authentication framework"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index c1d0bc5d96244..b13884b04dfd8 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -40,5 +40,8 @@ int nvme_auth_gen_pubkey(struct crypto_kpp *dh_tfm, int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, u8 *ctrl_key, size_t ctrl_key_len, u8 *sess_key, size_t sess_key_len); +int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, + u8 *c1, u8 *c2, size_t hash_len, + u8 **ret_psk, size_t *ret_len); #endif /* _NVME_AUTH_H */ From 71972b9ffe1efe183a87d76d094236f9ec30656e Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:11 +0100 Subject: [PATCH 03/22] nvme: add nvme_auth_generate_digest() Add a function to calculate the PSK digest as specified in TP8018. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/auth.c | 134 +++++++++++++++++++++++++++++++++++++ include/linux/nvme-auth.h | 2 + 2 files changed, 136 insertions(+) diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index 76e4a12f005e8..54b75d1771016 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -558,5 +558,139 @@ int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, } EXPORT_SYMBOL_GPL(nvme_auth_generate_psk); +/** + * nvme_auth_generate_digest - Generate TLS PSK digest + * @hmac_id: Hash function identifier + * @psk: Generated input PSK + * @psk_len: Length of @psk + * @subsysnqn: NQN of the subsystem + * @hostnqn: NQN of the host + * @ret_digest: Pointer to the returned digest + * + * Generate a TLS PSK digest as specified in TP8018 Section 3.6.1.3: + * TLS PSK and PSK identity Derivation + * + * The PSK digest shall be computed by encoding in Base64 (refer to RFC + * 4648) the result of the application of the HMAC function using the hash + * function specified in item 4 above (ie the hash function of the cipher + * suite associated with the PSK identity) with the PSK as HMAC key to the + * concatenation of: + * - the NQN of the host (i.e., NQNh) not including the null terminator; + * - a space character; + * - the NQN of the NVM subsystem (i.e., NQNc) not including the null + * terminator; + * - a space character; and + * - the seventeen ASCII characters "NVMe-over-Fabrics" + * (i.e., = Base64(HMAC(PSK, NQNh || " " || NQNc || " " || + * "NVMe-over-Fabrics"))). + * The length of the PSK digest depends on the hash function used to compute + * it as follows: + * - If the SHA-256 hash function is used, the resulting PSK digest is 44 + * characters long; or + * - If the SHA-384 hash function is used, the resulting PSK digest is 64 + * characters long. + * + * Returns 0 on success with a valid digest pointer in @ret_digest, or a + * negative error number on failure. + */ +int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len, + char *subsysnqn, char *hostnqn, u8 **ret_digest) +{ + struct crypto_shash *tfm; + SHASH_DESC_ON_STACK(shash, tfm); + u8 *digest, *enc; + const char *hmac_name; + size_t digest_len, hmac_len; + int ret; + + if (WARN_ON(!subsysnqn || !hostnqn)) + return -EINVAL; + + hmac_name = nvme_auth_hmac_name(hmac_id); + if (!hmac_name) { + pr_warn("%s: invalid hash algorithm %d\n", + __func__, hmac_id); + return -EINVAL; + } + + switch (nvme_auth_hmac_hash_len(hmac_id)) { + case 32: + hmac_len = 44; + break; + case 48: + hmac_len = 64; + break; + default: + pr_warn("%s: invalid hash algorithm '%s'\n", + __func__, hmac_name); + return -EINVAL; + } + + enc = kzalloc(hmac_len + 1, GFP_KERNEL); + if (!enc) + return -ENOMEM; + + tfm = crypto_alloc_shash(hmac_name, 0, 0); + if (IS_ERR(tfm)) { + ret = PTR_ERR(tfm); + goto out_free_enc; + } + + digest_len = crypto_shash_digestsize(tfm); + digest = kzalloc(digest_len, GFP_KERNEL); + if (!digest) { + ret = -ENOMEM; + goto out_free_tfm; + } + + shash->tfm = tfm; + ret = crypto_shash_setkey(tfm, psk, psk_len); + if (ret) + goto out_free_digest; + + ret = crypto_shash_init(shash); + if (ret) + goto out_free_digest; + + ret = crypto_shash_update(shash, hostnqn, strlen(hostnqn)); + if (ret) + goto out_free_digest; + + ret = crypto_shash_update(shash, " ", 1); + if (ret) + goto out_free_digest; + + ret = crypto_shash_update(shash, subsysnqn, strlen(subsysnqn)); + if (ret) + goto out_free_digest; + + ret = crypto_shash_update(shash, " NVMe-over-Fabrics", 18); + if (ret) + goto out_free_digest; + + ret = crypto_shash_final(shash, digest); + if (ret) + goto out_free_digest; + + ret = base64_encode(digest, digest_len, enc); + if (ret < hmac_len) { + ret = -ENOKEY; + goto out_free_digest; + } + *ret_digest = enc; + ret = 0; + +out_free_digest: + kfree_sensitive(digest); +out_free_tfm: + crypto_free_shash(tfm); +out_free_enc: + if (ret) + kfree_sensitive(enc); + + return ret; +} +EXPORT_SYMBOL_GPL(nvme_auth_generate_digest); + MODULE_DESCRIPTION("NVMe Authentication framework"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index b13884b04dfd8..998f06bf10fd3 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -43,5 +43,7 @@ int nvme_auth_gen_shared_secret(struct crypto_kpp *dh_tfm, int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, u8 *c1, u8 *c2, size_t hash_len, u8 **ret_psk, size_t *ret_len); +int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len, + char *subsysnqn, char *hostnqn, u8 **ret_digest); #endif /* _NVME_AUTH_H */ From 9d5c0fffee266f61ccc745faa6298dafe2b8c5bf Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:12 +0100 Subject: [PATCH 04/22] nvme: add nvme_auth_derive_tls_psk() Add a function to derive the TLS PSK as specified TP8018. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/Kconfig | 1 + drivers/nvme/common/auth.c | 116 ++++++++++++++++++++++++++++++++++++ include/linux/nvme-auth.h | 2 + 3 files changed, 119 insertions(+) diff --git a/drivers/nvme/common/Kconfig b/drivers/nvme/common/Kconfig index 244432e0b73d8..da963e4f3f1f8 100644 --- a/drivers/nvme/common/Kconfig +++ b/drivers/nvme/common/Kconfig @@ -12,3 +12,4 @@ config NVME_AUTH select CRYPTO_SHA512 select CRYPTO_DH select CRYPTO_DH_RFC7919_GROUPS + select CRYPTO_HKDF diff --git a/drivers/nvme/common/auth.c b/drivers/nvme/common/auth.c index 54b75d1771016..2c092ec8c0a94 100644 --- a/drivers/nvme/common/auth.c +++ b/drivers/nvme/common/auth.c @@ -15,6 +15,8 @@ #include #include +#define HKDF_MAX_HASHLEN 64 + static u32 nvme_dhchap_seqnum; static DEFINE_MUTEX(nvme_dhchap_mutex); @@ -692,5 +694,119 @@ int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len, } EXPORT_SYMBOL_GPL(nvme_auth_generate_digest); +/** + * nvme_auth_derive_tls_psk - Derive TLS PSK + * @hmac_id: Hash function identifier + * @psk: generated input PSK + * @psk_len: size of @psk + * @psk_digest: TLS PSK digest + * @ret_psk: Pointer to the resulting TLS PSK + * + * Derive a TLS PSK as specified in TP8018 Section 3.6.1.3: + * TLS PSK and PSK identity Derivation + * + * The TLS PSK shall be derived as follows from an input PSK + * (i.e., either a retained PSK or a generated PSK) and a PSK + * identity using the HKDF-Extract and HKDF-Expand-Label operations + * (refer to RFC 5869 and RFC 8446) where the hash function is the + * one specified by the hash specifier of the PSK identity: + * 1. PRK = HKDF-Extract(0, Input PSK); and + * 2. TLS PSK = HKDF-Expand-Label(PRK, "nvme-tls-psk", PskIdentityContext, L), + * where PskIdentityContext is the hash identifier indicated in + * the PSK identity concatenated to a space character and to the + * Base64 PSK digest (i.e., " ") and L is the + * output size in bytes of the hash function (i.e., 32 for SHA-256 + * and 48 for SHA-384). + * + * Returns 0 on success with a valid psk pointer in @ret_psk or a negative + * error number otherwise. + */ +int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len, + u8 *psk_digest, u8 **ret_psk) +{ + struct crypto_shash *hmac_tfm; + const char *hmac_name; + const char *psk_prefix = "tls13 nvme-tls-psk"; + static const char default_salt[HKDF_MAX_HASHLEN]; + size_t info_len, prk_len; + char *info; + unsigned char *prk, *tls_key; + int ret; + + hmac_name = nvme_auth_hmac_name(hmac_id); + if (!hmac_name) { + pr_warn("%s: invalid hash algorithm %d\n", + __func__, hmac_id); + return -EINVAL; + } + if (hmac_id == NVME_AUTH_HASH_SHA512) { + pr_warn("%s: unsupported hash algorithm %s\n", + __func__, hmac_name); + return -EINVAL; + } + + hmac_tfm = crypto_alloc_shash(hmac_name, 0, 0); + if (IS_ERR(hmac_tfm)) + return PTR_ERR(hmac_tfm); + + prk_len = crypto_shash_digestsize(hmac_tfm); + prk = kzalloc(prk_len, GFP_KERNEL); + if (!prk) { + ret = -ENOMEM; + goto out_free_shash; + } + + if (WARN_ON(prk_len > HKDF_MAX_HASHLEN)) { + ret = -EINVAL; + goto out_free_prk; + } + ret = hkdf_extract(hmac_tfm, psk, psk_len, + default_salt, prk_len, prk); + if (ret) + goto out_free_prk; + + ret = crypto_shash_setkey(hmac_tfm, prk, prk_len); + if (ret) + goto out_free_prk; + + /* + * 2 addtional bytes for the length field from HDKF-Expand-Label, + * 2 addtional bytes for the HMAC ID, and one byte for the space + * separator. + */ + info_len = strlen(psk_digest) + strlen(psk_prefix) + 5; + info = kzalloc(info_len + 1, GFP_KERNEL); + if (!info) { + ret = -ENOMEM; + goto out_free_prk; + } + + put_unaligned_be16(psk_len, info); + memcpy(info + 2, psk_prefix, strlen(psk_prefix)); + sprintf(info + 2 + strlen(psk_prefix), "%02d %s", hmac_id, psk_digest); + + tls_key = kzalloc(psk_len, GFP_KERNEL); + if (!tls_key) { + ret = -ENOMEM; + goto out_free_info; + } + ret = hkdf_expand(hmac_tfm, info, info_len, tls_key, psk_len); + if (ret) { + kfree(tls_key); + goto out_free_info; + } + *ret_psk = tls_key; + +out_free_info: + kfree(info); +out_free_prk: + kfree(prk); +out_free_shash: + crypto_free_shash(hmac_tfm); + + return ret; +} +EXPORT_SYMBOL_GPL(nvme_auth_derive_tls_psk); + MODULE_DESCRIPTION("NVMe Authentication framework"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/nvme-auth.h b/include/linux/nvme-auth.h index 998f06bf10fd3..60e069a6757ff 100644 --- a/include/linux/nvme-auth.h +++ b/include/linux/nvme-auth.h @@ -45,5 +45,7 @@ int nvme_auth_generate_psk(u8 hmac_id, u8 *skey, size_t skey_len, u8 **ret_psk, size_t *ret_len); int nvme_auth_generate_digest(u8 hmac_id, u8 *psk, size_t psk_len, char *subsysnqn, char *hostnqn, u8 **ret_digest); +int nvme_auth_derive_tls_psk(int hmac_id, u8 *psk, size_t psk_len, + u8 *psk_digest, u8 **ret_psk); #endif /* _NVME_AUTH_H */ From 62eb89323cb08f1d6a3b41b84972ff4f373a1960 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:13 +0100 Subject: [PATCH 05/22] nvme-keyring: add nvme_tls_psk_refresh() Add a function to refresh a generated PSK in the specified keyring. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/common/keyring.c | 65 ++++++++++++++++++++++++++++++++++- drivers/nvme/host/tcp.c | 1 - drivers/nvme/target/tcp.c | 1 - include/linux/nvme-keyring.h | 12 ++++++- 4 files changed, 75 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/common/keyring.c b/drivers/nvme/common/keyring.c index ed5167f942d89..32d16c53133be 100644 --- a/drivers/nvme/common/keyring.c +++ b/drivers/nvme/common/keyring.c @@ -5,7 +5,6 @@ #include #include -#include #include #include #include @@ -124,6 +123,70 @@ static struct key *nvme_tls_psk_lookup(struct key *keyring, return key_ref_to_ptr(keyref); } +/** + * nvme_tls_psk_refresh - Refresh TLS PSK + * @keyring: Keyring holding the TLS PSK + * @hostnqn: Host NQN to use + * @subnqn: Subsystem NQN to use + * @hmac_id: Hash function identifier + * @data: TLS PSK key material + * @data_len: Length of @data + * @digest: TLS PSK digest + * + * Refresh a generated version 1 TLS PSK with the identity generated + * from @hmac_id, @hostnqn, @subnqn, and @digest in the keyring given + * by @keyring. + * + * Returns the updated key success or an error pointer otherwise. + */ +struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, const char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest) +{ + key_perm_t keyperm = + KEY_POS_SEARCH | KEY_POS_VIEW | KEY_POS_READ | + KEY_POS_WRITE | KEY_POS_LINK | KEY_POS_SETATTR | + KEY_USR_SEARCH | KEY_USR_VIEW | KEY_USR_READ; + char *identity; + key_ref_t keyref; + key_serial_t keyring_id; + struct key *key; + + if (!hostnqn || !subnqn || !data || !data_len) + return ERR_PTR(-EINVAL); + + identity = kasprintf(GFP_KERNEL, "NVMe1G%02d %s %s %s", + hmac_id, hostnqn, subnqn, digest); + if (!identity) + return ERR_PTR(-ENOMEM); + + if (!keyring) + keyring = nvme_keyring; + keyring_id = key_serial(keyring); + pr_debug("keyring %x refresh tls psk '%s'\n", + keyring_id, identity); + keyref = key_create_or_update(make_key_ref(keyring, true), + "psk", identity, data, data_len, + keyperm, KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(keyref)) { + pr_debug("refresh tls psk '%s' failed, error %ld\n", + identity, PTR_ERR(keyref)); + kfree(identity); + return ERR_PTR(-ENOKEY); + } + kfree(identity); + /* + * Set the default timeout to 1 hour + * as suggested in TP8018. + */ + key = key_ref_to_ptr(keyref); + key_set_timeout(key, 3600); + return key; +} +EXPORT_SYMBOL_GPL(nvme_tls_psk_refresh); + /* * NVMe PSK priority list * diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 841238f38fdda..b50972257e49d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c51c2a8c109a..fa59a7996efa2 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/nvme-keyring.h b/include/linux/nvme-keyring.h index 19d2b256180fd..ab8971afa9734 100644 --- a/include/linux/nvme-keyring.h +++ b/include/linux/nvme-keyring.h @@ -6,15 +6,25 @@ #ifndef _NVME_KEYRING_H #define _NVME_KEYRING_H +#include + #if IS_ENABLED(CONFIG_NVME_KEYRING) +struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, const char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest); key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn); key_serial_t nvme_keyring_id(void); struct key *nvme_tls_key_lookup(key_serial_t key_id); #else - +static inline struct key *nvme_tls_psk_refresh(struct key *keyring, + const char *hostnqn, char *subnqn, u8 hmac_id, + u8 *data, size_t data_len, const char *digest) +{ + return ERR_PTR(-ENOTSUPP); +} static inline key_serial_t nvme_tls_psk_default(struct key *keyring, const char *hostnqn, const char *subnqn) { From e88a7595b57f2a04f1be796419444b4a14a55d18 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:14 +0100 Subject: [PATCH 06/22] nvme-tcp: request secure channel concatenation Add a fabrics option 'concat' to request secure channel concatenation as specified the NVME Base Specification v2.1, section 8.3.4.3: Secure Channel Concatenation. When secure channel concatenation is enabled a 'generated PSK' is inserted into the keyring such that it's available after reset. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/Kconfig | 2 +- drivers/nvme/host/auth.c | 115 +++++++++++++++++++++++++++++++++++- drivers/nvme/host/fabrics.c | 34 ++++++++++- drivers/nvme/host/fabrics.h | 3 + drivers/nvme/host/nvme.h | 2 + drivers/nvme/host/sysfs.c | 4 +- drivers/nvme/host/tcp.c | 53 +++++++++++++++-- include/linux/nvme.h | 7 +++ 8 files changed, 205 insertions(+), 15 deletions(-) diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 486afe5981845..10e453b2436e8 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -109,7 +109,7 @@ config NVME_HOST_AUTH bool "NVMe over Fabrics In-Band Authentication in host side" depends on NVME_CORE select NVME_AUTH - select NVME_KEYRING if NVME_TCP_TLS + select NVME_KEYRING help This provides support for NVMe over Fabrics In-Band Authentication in host side. diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c index 5ea0e21709da3..6115fef74c1e9 100644 --- a/drivers/nvme/host/auth.c +++ b/drivers/nvme/host/auth.c @@ -12,6 +12,7 @@ #include "nvme.h" #include "fabrics.h" #include +#include #define CHAP_BUF_SIZE 4096 static struct kmem_cache *nvme_chap_buf_cache; @@ -131,7 +132,13 @@ static int nvme_auth_set_dhchap_negotiate_data(struct nvme_ctrl *ctrl, data->auth_type = NVME_AUTH_COMMON_MESSAGES; data->auth_id = NVME_AUTH_DHCHAP_MESSAGE_NEGOTIATE; data->t_id = cpu_to_le16(chap->transaction); - data->sc_c = 0; /* No secure channel concatenation */ + if (ctrl->opts->concat && chap->qid == 0) { + if (ctrl->opts->tls_key) + data->sc_c = NVME_AUTH_SECP_REPLACETLSPSK; + else + data->sc_c = NVME_AUTH_SECP_NEWTLSPSK; + } else + data->sc_c = NVME_AUTH_SECP_NOSC; data->napd = 1; data->auth_protocol[0].dhchap.authid = NVME_AUTH_DHCHAP_AUTH_ID; data->auth_protocol[0].dhchap.halen = 3; @@ -311,8 +318,9 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, data->hl = chap->hash_len; data->dhvlen = cpu_to_le16(chap->host_key_len); memcpy(data->rval, chap->response, chap->hash_len); - if (ctrl->ctrl_key) { + if (ctrl->ctrl_key) chap->bi_directional = true; + if (ctrl->ctrl_key || ctrl->opts->concat) { get_random_bytes(chap->c2, chap->hash_len); data->cvalid = 1; memcpy(data->rval + chap->hash_len, chap->c2, @@ -322,7 +330,10 @@ static int nvme_auth_set_dhchap_reply_data(struct nvme_ctrl *ctrl, } else { memset(chap->c2, 0, chap->hash_len); } - chap->s2 = nvme_auth_get_seqnum(); + if (ctrl->opts->concat) + chap->s2 = 0; + else + chap->s2 = nvme_auth_get_seqnum(); data->seqnum = cpu_to_le32(chap->s2); if (chap->host_key_len) { dev_dbg(ctrl->device, "%s: qid %d host public key %*ph\n", @@ -677,6 +688,92 @@ static void nvme_auth_free_dhchap(struct nvme_dhchap_queue_context *chap) crypto_free_kpp(chap->dh_tfm); } +void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) +{ + dev_dbg(ctrl->device, "Wipe generated TLS PSK %08x\n", + key_serial(ctrl->opts->tls_key)); + key_revoke(ctrl->opts->tls_key); + key_put(ctrl->opts->tls_key); + ctrl->opts->tls_key = NULL; +} +EXPORT_SYMBOL_GPL(nvme_auth_revoke_tls_key); + +static int nvme_auth_secure_concat(struct nvme_ctrl *ctrl, + struct nvme_dhchap_queue_context *chap) +{ + u8 *psk, *digest, *tls_psk; + struct key *tls_key; + size_t psk_len; + int ret = 0; + + if (!chap->sess_key) { + dev_warn(ctrl->device, + "%s: qid %d no session key negotiated\n", + __func__, chap->qid); + return -ENOKEY; + } + + if (chap->qid) { + dev_warn(ctrl->device, + "qid %d: secure concatenation not supported on I/O queues\n", + chap->qid); + return -EINVAL; + } + ret = nvme_auth_generate_psk(chap->hash_id, chap->sess_key, + chap->sess_key_len, + chap->c1, chap->c2, + chap->hash_len, &psk, &psk_len); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to generate PSK, error %d\n", + __func__, chap->qid, ret); + return ret; + } + dev_dbg(ctrl->device, + "%s: generated psk %*ph\n", __func__, (int)psk_len, psk); + + ret = nvme_auth_generate_digest(chap->hash_id, psk, psk_len, + ctrl->opts->subsysnqn, + ctrl->opts->host->nqn, &digest); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to generate digest, error %d\n", + __func__, chap->qid, ret); + goto out_free_psk; + }; + dev_dbg(ctrl->device, "%s: generated digest %s\n", + __func__, digest); + ret = nvme_auth_derive_tls_psk(chap->hash_id, psk, psk_len, + digest, &tls_psk); + if (ret) { + dev_warn(ctrl->device, + "%s: qid %d failed to derive TLS psk, error %d\n", + __func__, chap->qid, ret); + goto out_free_digest; + }; + + tls_key = nvme_tls_psk_refresh(ctrl->opts->keyring, + ctrl->opts->host->nqn, + ctrl->opts->subsysnqn, chap->hash_id, + tls_psk, psk_len, digest); + if (IS_ERR(tls_key)) { + ret = PTR_ERR(tls_key); + dev_warn(ctrl->device, + "%s: qid %d failed to insert generated key, error %d\n", + __func__, chap->qid, ret); + tls_key = NULL; + } + kfree_sensitive(tls_psk); + if (ctrl->opts->tls_key) + nvme_auth_revoke_tls_key(ctrl); + ctrl->opts->tls_key = tls_key; +out_free_digest: + kfree_sensitive(digest); +out_free_psk: + kfree_sensitive(psk); + return ret; +} + static void nvme_queue_auth_work(struct work_struct *work) { struct nvme_dhchap_queue_context *chap = @@ -833,6 +930,13 @@ static void nvme_queue_auth_work(struct work_struct *work) } if (!ret) { chap->error = 0; + if (ctrl->opts->concat && + (ret = nvme_auth_secure_concat(ctrl, chap))) { + dev_warn(ctrl->device, + "%s: qid %d failed to enable secure concatenation\n", + __func__, chap->qid); + chap->error = ret; + } return; } @@ -912,6 +1016,11 @@ static void nvme_ctrl_auth_work(struct work_struct *work) "qid 0: authentication failed\n"); return; } + /* + * Only run authentication on the admin queue for secure concatenation. + */ + if (ctrl->opts->concat) + return; for (q = 1; q < ctrl->queue_count; q++) { ret = nvme_auth_negotiate(ctrl, q); diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 432efcbf9e2f5..93e9041b9657e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -472,8 +472,9 @@ int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl) result = le32_to_cpu(res.u32); ctrl->cntlid = result & 0xFFFF; if (result & (NVME_CONNECT_AUTHREQ_ATR | NVME_CONNECT_AUTHREQ_ASCR)) { - /* Secure concatenation is not implemented */ - if (result & NVME_CONNECT_AUTHREQ_ASCR) { + /* Check for secure concatenation */ + if ((result & NVME_CONNECT_AUTHREQ_ASCR) && + !ctrl->opts->concat) { dev_warn(ctrl->device, "qid 0: secure concatenation is not supported\n"); ret = -EOPNOTSUPP; @@ -550,7 +551,7 @@ int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid) /* Secure concatenation is not implemented */ if (result & NVME_CONNECT_AUTHREQ_ASCR) { dev_warn(ctrl->device, - "qid 0: secure concatenation is not supported\n"); + "qid %d: secure concatenation is not supported\n", qid); ret = -EOPNOTSUPP; goto out_free_data; } @@ -706,6 +707,7 @@ static const match_table_t opt_tokens = { #endif #ifdef CONFIG_NVME_TCP_TLS { NVMF_OPT_TLS, "tls" }, + { NVMF_OPT_CONCAT, "concat" }, #endif { NVMF_OPT_ERR, NULL } }; @@ -735,6 +737,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, opts->tls = false; opts->tls_key = NULL; opts->keyring = NULL; + opts->concat = false; options = o = kstrdup(buf, GFP_KERNEL); if (!options) @@ -1053,6 +1056,14 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, } opts->tls = true; break; + case NVMF_OPT_CONCAT: + if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) { + pr_err("TLS is not supported\n"); + ret = -EINVAL; + goto out; + } + opts->concat = true; + break; default: pr_warn("unknown parameter or missing value '%s' in ctrl creation request\n", p); @@ -1079,6 +1090,23 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, pr_warn("failfast tmo (%d) larger than controller loss tmo (%d)\n", opts->fast_io_fail_tmo, ctrl_loss_tmo); } + if (opts->concat) { + if (opts->tls) { + pr_err("Secure concatenation over TLS is not supported\n"); + ret = -EINVAL; + goto out; + } + if (opts->tls_key) { + pr_err("Cannot specify a TLS key for secure concatenation\n"); + ret = -EINVAL; + goto out; + } + if (!opts->dhchap_secret) { + pr_err("Need to enable DH-CHAP for secure concatenation\n"); + ret = -EINVAL; + goto out; + } + } opts->host = nvmf_host_add(hostnqn, &hostid); if (IS_ERR(opts->host)) { diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 21d75dc4a3a09..9cf5b020adba2 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h @@ -66,6 +66,7 @@ enum { NVMF_OPT_TLS = 1 << 25, NVMF_OPT_KEYRING = 1 << 26, NVMF_OPT_TLS_KEY = 1 << 27, + NVMF_OPT_CONCAT = 1 << 28, }; /** @@ -101,6 +102,7 @@ enum { * @keyring: Keyring to use for key lookups * @tls_key: TLS key for encrypted connections (TCP) * @tls: Start TLS encrypted connections (TCP) + * @concat: Enabled Secure channel concatenation (TCP) * @disable_sqflow: disable controller sq flow control * @hdr_digest: generate/verify header digest (TCP) * @data_digest: generate/verify data digest (TCP) @@ -130,6 +132,7 @@ struct nvmf_ctrl_options { struct key *keyring; struct key *tls_key; bool tls; + bool concat; bool disable_sqflow; bool hdr_digest; bool data_digest; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7be92d07430e9..daa4c91e4848c 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -1147,6 +1147,7 @@ void nvme_auth_stop(struct nvme_ctrl *ctrl); int nvme_auth_negotiate(struct nvme_ctrl *ctrl, int qid); int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid); void nvme_auth_free(struct nvme_ctrl *ctrl); +void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl); #else static inline int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl) { @@ -1169,6 +1170,7 @@ static inline int nvme_auth_wait(struct nvme_ctrl *ctrl, int qid) return -EPROTONOSUPPORT; } static inline void nvme_auth_free(struct nvme_ctrl *ctrl) {}; +static inline void nvme_auth_revoke_tls_key(struct nvme_ctrl *ctrl) {}; #endif u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 3a41b9ab0f13c..52683943be152 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -780,10 +780,10 @@ static umode_t nvme_tls_attrs_are_visible(struct kobject *kobj, return 0; if (a == &dev_attr_tls_key.attr && - !ctrl->opts->tls) + !ctrl->opts->tls && !ctrl->opts->concat) return 0; if (a == &dev_attr_tls_configured_key.attr && - !ctrl->opts->tls_key) + (!ctrl->opts->tls_key || ctrl->opts->concat)) return 0; if (a == &dev_attr_tls_keyring.attr && !ctrl->opts->keyring) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index b50972257e49d..196d363188539 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -235,7 +235,7 @@ static inline bool nvme_tcp_tls_configured(struct nvme_ctrl *ctrl) if (!IS_ENABLED(CONFIG_NVME_TCP_TLS)) return 0; - return ctrl->opts->tls; + return ctrl->opts->tls || ctrl->opts->concat; } static inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) @@ -1987,7 +1987,7 @@ static int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) if (nvme_tcp_tls_configured(ctrl)) { if (ctrl->opts->tls_key) pskid = key_serial(ctrl->opts->tls_key); - else { + else if (ctrl->opts->tls) { pskid = nvme_tls_psk_default(ctrl->opts->keyring, ctrl->opts->host->nqn, ctrl->opts->subsysnqn); @@ -2017,9 +2017,25 @@ static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; - if (nvme_tcp_tls_configured(ctrl) && !ctrl->tls_pskid) { - dev_err(ctrl->device, "no PSK negotiated\n"); - return -ENOKEY; + if (nvme_tcp_tls_configured(ctrl)) { + if (ctrl->opts->concat) { + /* + * The generated PSK is stored in the + * fabric options + */ + if (!ctrl->opts->tls_key) { + dev_err(ctrl->device, "no PSK generated\n"); + return -ENOKEY; + } + if (ctrl->tls_pskid && + ctrl->tls_pskid != key_serial(ctrl->opts->tls_key)) { + dev_err(ctrl->device, "Stale PSK id %08x\n", ctrl->tls_pskid); + ctrl->tls_pskid = 0; + } + } else if (!ctrl->tls_pskid) { + dev_err(ctrl->device, "no PSK negotiated\n"); + return -ENOKEY; + } } for (i = 1; i < ctrl->queue_count; i++) { @@ -2237,6 +2253,27 @@ static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl, } } +/* + * The TLS key is set by secure concatenation after negotiation has been + * completed on the admin queue. We need to revoke the key when: + * - concatenation is enabled (otherwise it's a static key set by the user) + * and + * - the generated key is present in ctrl->tls_key (otherwise there's nothing + * to revoke) + * and + * - a valid PSK key ID has been set in ctrl->tls_pskid (otherwise TLS + * negotiation has not run). + * + * We cannot always revoke the key as nvme_tcp_alloc_admin_queue() is called + * twice during secure concatenation, once on a 'normal' connection to run the + * DH-HMAC-CHAP negotiation (which generates the key, so it _must not_ be set), + * and once after the negotiation (which uses the key, so it _must_ be set). + */ +static bool nvme_tcp_key_revoke_needed(struct nvme_ctrl *ctrl) +{ + return ctrl->opts->concat && ctrl->opts->tls_key && ctrl->tls_pskid; +} + static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) { struct nvmf_ctrl_options *opts = ctrl->opts; @@ -2342,6 +2379,8 @@ static void nvme_tcp_error_recovery_work(struct work_struct *work) struct nvme_tcp_ctrl, err_work); struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); nvme_tcp_teardown_io_queues(ctrl, false); @@ -2382,6 +2421,8 @@ static void nvme_reset_ctrl_work(struct work_struct *work) container_of(work, struct nvme_ctrl, reset_work); int ret; + if (nvme_tcp_key_revoke_needed(ctrl)) + nvme_auth_revoke_tls_key(ctrl); nvme_stop_ctrl(ctrl); nvme_tcp_teardown_ctrl(ctrl, false); @@ -2877,7 +2918,7 @@ static struct nvmf_transport_ops nvme_tcp_transport = { NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE | NVMF_OPT_TLS | - NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY, + NVMF_OPT_KEYRING | NVMF_OPT_TLS_KEY | NVMF_OPT_CONCAT, .create_ctrl = nvme_tcp_create_ctrl, }; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index fe3b60818fdcf..bfb5688363b0d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1746,6 +1746,13 @@ enum { NVME_AUTH_DHGROUP_INVALID = 0xff, }; +enum { + NVME_AUTH_SECP_NOSC = 0x00, + NVME_AUTH_SECP_SC = 0x01, + NVME_AUTH_SECP_NEWTLSPSK = 0x02, + NVME_AUTH_SECP_REPLACETLSPSK = 0x03, +}; + union nvmf_auth_protocol { struct nvmf_auth_dhchap_protocol_descriptor dhchap; }; From 104d0e2f622233477ef7e57e59e8a4c3bb062c82 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:15 +0100 Subject: [PATCH 07/22] nvme-fabrics: reset admin connection for secure concatenation When secure concatenation is requested the connection needs to be reset to enable TLS encryption on the new cnnection. That implies that the original connection used for the DH-CHAP negotiation really shouldn't be used, and we should reset as soon as the DH-CHAP negotiation has succeeded on the admin queue. Based on an idea from Sagi. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 196d363188539..feb2d7e17c4a1 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2283,6 +2283,16 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) if (ret) return ret; + if (ctrl->opts && ctrl->opts->concat && !ctrl->tls_pskid) { + /* See comments for nvme_tcp_key_revoke_needed() */ + dev_dbg(ctrl->device, "restart admin queue for secure concatenation\n"); + nvme_stop_keep_alive(ctrl); + nvme_tcp_teardown_admin_queue(ctrl, false); + ret = nvme_tcp_configure_admin_queue(ctrl, false); + if (ret) + return ret; + } + if (ctrl->icdoff) { ret = -EOPNOTSUPP; dev_err(ctrl->device, "icdoff is not supported!\n"); From 5032167264eea2d2f11b42083119efedcf146b53 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:16 +0100 Subject: [PATCH 08/22] nvmet: Add 'sq' argument to alloc_ctrl_args For secure concatenation the result of the TLS handshake will be stored in the 'sq' struct, so add it to the alloc_ctrl_args struct. Cc: Damien Le Moal Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Reviewed-by: Damien Le Moal Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 2 +- drivers/nvme/target/core.c | 2 +- drivers/nvme/target/fabrics-cmd-auth.c | 2 +- drivers/nvme/target/fabrics-cmd.c | 11 +++++++---- drivers/nvme/target/nvmet.h | 6 ++++-- 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index b47d675232d2d..d0392ccda2a1d 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -139,7 +139,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id) return ret; } -u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl) +u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { int ret = 0; struct nvmet_host_link *p; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cdc4a09a6e8a4..112df8970954b 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1649,7 +1649,7 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) if (args->hostid) uuid_copy(&ctrl->hostid, args->hostid); - dhchap_status = nvmet_setup_auth(ctrl); + dhchap_status = nvmet_setup_auth(ctrl, args->sq); if (dhchap_status) { pr_err("Failed to setup authentication, dhchap status %u\n", dhchap_status); diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 2022757f08dc7..43b684af816cb 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -246,7 +246,7 @@ void nvmet_execute_auth_send(struct nvmet_req *req) pr_debug("%s: ctrl %d qid %d reset negotiation\n", __func__, ctrl->cntlid, req->sq->qid); if (!req->sq->qid) { - dhchap_status = nvmet_setup_auth(ctrl); + dhchap_status = nvmet_setup_auth(ctrl, req->sq); if (dhchap_status) { pr_err("ctrl %d qid 0 failed to setup re-authentication\n", ctrl->cntlid); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index eb406c90c1679..5e8a3e1ca79cb 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -234,10 +234,12 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) return ret; } -static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl) +static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { + bool needs_auth = nvmet_has_auth(ctrl); + return (u32)ctrl->cntlid | - (nvmet_has_auth(ctrl) ? NVME_CONNECT_AUTHREQ_ATR : 0); + (needs_auth ? NVME_CONNECT_AUTHREQ_ATR : 0); } static void nvmet_execute_admin_connect(struct nvmet_req *req) @@ -247,6 +249,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; struct nvmet_alloc_ctrl_args args = { .port = req->port, + .sq = req->sq, .ops = req->ops, .p2p_client = req->p2p_client, .kato = le32_to_cpu(c->kato), @@ -299,7 +302,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - args.result = cpu_to_le32(nvmet_connect_result(ctrl)); + args.result = cpu_to_le32(nvmet_connect_result(ctrl, req->sq)); out: kfree(d); complete: @@ -357,7 +360,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out_ctrl_put; pr_debug("adding queue %d to ctrl %d.\n", qid, ctrl->cntlid); - req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl)); + req->cqe->result.u32 = cpu_to_le32(nvmet_connect_result(ctrl, req->sq)); out: kfree(d); complete: diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 4be8d22d2d8d4..e0cfdae8af6de 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -583,6 +583,7 @@ void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new); struct nvmet_alloc_ctrl_args { struct nvmet_port *port; + struct nvmet_sq *sq; char *subsysnqn; char *hostnqn; uuid_t *hostid; @@ -860,7 +861,7 @@ void nvmet_execute_auth_receive(struct nvmet_req *req); int nvmet_auth_set_key(struct nvmet_host *host, const char *secret, bool set_ctrl); int nvmet_auth_set_host_hash(struct nvmet_host *host, const char *hash); -u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl); +u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq); void nvmet_auth_sq_init(struct nvmet_sq *sq); void nvmet_destroy_auth(struct nvmet_ctrl *ctrl); void nvmet_auth_sq_free(struct nvmet_sq *sq); @@ -879,7 +880,8 @@ int nvmet_auth_ctrl_exponential(struct nvmet_req *req, int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, u8 *buf, int buf_size); #else -static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl) +static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, + struct nvmet_sq *sq) { return 0; } From fa2e0f8bbc68908d14a97407bbbf8d8cccaf90a4 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:17 +0100 Subject: [PATCH 09/22] nvmet-tcp: support secure channel concatenation Evaluate the SC_C flag during DH-CHAP-HMAC negotiation to check if secure concatenation as specified in the NVMe Base Specification v2.1, section 8.3.4.3: "Secure Channel Concatenationand" is requested. If requested the generated PSK is inserted into the keyring once negotiation has finished allowing for an encrypted connection once the admin queue is restarted. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/auth.c | 70 ++++++++++++++++++++++++++ drivers/nvme/target/core.c | 5 +- drivers/nvme/target/fabrics-cmd-auth.c | 58 +++++++++++++++++++-- drivers/nvme/target/fabrics-cmd.c | 16 +++++- drivers/nvme/target/nvmet.h | 32 ++++++++++-- drivers/nvme/target/tcp.c | 31 +++++++++++- 6 files changed, 199 insertions(+), 13 deletions(-) diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c index d0392ccda2a1d..0b0645ac5df47 100644 --- a/drivers/nvme/target/auth.c +++ b/drivers/nvme/target/auth.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "nvmet.h" @@ -165,6 +166,11 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) goto out_unlock; } + if (nvmet_queue_tls_keyid(sq)) { + pr_debug("host %s tls enabled\n", ctrl->hostnqn); + goto out_unlock; + } + ret = nvmet_setup_dhgroup(ctrl, host->dhchap_dhgroup_id); if (ret < 0) { pr_warn("Failed to setup DH group"); @@ -233,6 +239,9 @@ u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) void nvmet_auth_sq_free(struct nvmet_sq *sq) { cancel_delayed_work(&sq->auth_expired_work); +#ifdef CONFIG_NVME_TARGET_TCP_TLS + sq->tls_key = 0; +#endif kfree(sq->dhchap_c1); sq->dhchap_c1 = NULL; kfree(sq->dhchap_c2); @@ -261,6 +270,12 @@ void nvmet_destroy_auth(struct nvmet_ctrl *ctrl) nvme_auth_free_key(ctrl->ctrl_key); ctrl->ctrl_key = NULL; } +#ifdef CONFIG_NVME_TARGET_TCP_TLS + if (ctrl->tls_key) { + key_put(ctrl->tls_key); + ctrl->tls_key = NULL; + } +#endif } bool nvmet_check_auth_status(struct nvmet_req *req) @@ -542,3 +557,58 @@ int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, return ret; } + +void nvmet_auth_insert_psk(struct nvmet_sq *sq) +{ + int hash_len = nvme_auth_hmac_hash_len(sq->ctrl->shash_id); + u8 *psk, *digest, *tls_psk; + size_t psk_len; + int ret; +#ifdef CONFIG_NVME_TARGET_TCP_TLS + struct key *tls_key = NULL; +#endif + + ret = nvme_auth_generate_psk(sq->ctrl->shash_id, + sq->dhchap_skey, + sq->dhchap_skey_len, + sq->dhchap_c1, sq->dhchap_c2, + hash_len, &psk, &psk_len); + if (ret) { + pr_warn("%s: ctrl %d qid %d failed to generate PSK, error %d\n", + __func__, sq->ctrl->cntlid, sq->qid, ret); + return; + } + ret = nvme_auth_generate_digest(sq->ctrl->shash_id, psk, psk_len, + sq->ctrl->subsysnqn, + sq->ctrl->hostnqn, &digest); + if (ret) { + pr_warn("%s: ctrl %d qid %d failed to generate digest, error %d\n", + __func__, sq->ctrl->cntlid, sq->qid, ret); + goto out_free_psk; + } + ret = nvme_auth_derive_tls_psk(sq->ctrl->shash_id, psk, psk_len, + digest, &tls_psk); + if (ret) { + pr_warn("%s: ctrl %d qid %d failed to derive TLS PSK, error %d\n", + __func__, sq->ctrl->cntlid, sq->qid, ret); + goto out_free_digest; + } +#ifdef CONFIG_NVME_TARGET_TCP_TLS + tls_key = nvme_tls_psk_refresh(NULL, sq->ctrl->hostnqn, sq->ctrl->subsysnqn, + sq->ctrl->shash_id, tls_psk, psk_len, digest); + if (IS_ERR(tls_key)) { + pr_warn("%s: ctrl %d qid %d failed to refresh key, error %ld\n", + __func__, sq->ctrl->cntlid, sq->qid, PTR_ERR(tls_key)); + tls_key = NULL; + kfree_sensitive(tls_psk); + } + if (sq->ctrl->tls_key) + key_put(sq->ctrl->tls_key); + sq->ctrl->tls_key = tls_key; +#endif + +out_free_digest: + kfree_sensitive(digest); +out_free_psk: + kfree_sensitive(psk); +} diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 112df8970954b..a058f473652cd 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1664,11 +1664,12 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) args->status = NVME_SC_SUCCESS; - pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s.\n", + pr_info("Created %s controller %d for subsystem %s for NQN %s%s%s%s.\n", nvmet_is_disc_subsys(ctrl->subsys) ? "discovery" : "nvm", ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, ctrl->pi_support ? " T10-PI is enabled" : "", - nvmet_has_auth(ctrl) ? " with DH-HMAC-CHAP" : ""); + nvmet_has_auth(ctrl, args->sq) ? " with DH-HMAC-CHAP" : "", + nvmet_queue_tls_keyid(args->sq) ? ", TLS" : ""); return ctrl; diff --git a/drivers/nvme/target/fabrics-cmd-auth.c b/drivers/nvme/target/fabrics-cmd-auth.c index 43b684af816cb..bf01ec414c55f 100644 --- a/drivers/nvme/target/fabrics-cmd-auth.c +++ b/drivers/nvme/target/fabrics-cmd-auth.c @@ -43,8 +43,26 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d) data->auth_protocol[0].dhchap.halen, data->auth_protocol[0].dhchap.dhlen); req->sq->dhchap_tid = le16_to_cpu(data->t_id); - if (data->sc_c) - return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + if (data->sc_c != NVME_AUTH_SECP_NOSC) { + if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + /* Secure concatenation can only be enabled on the admin queue */ + if (req->sq->qid) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + switch (data->sc_c) { + case NVME_AUTH_SECP_NEWTLSPSK: + if (nvmet_queue_tls_keyid(req->sq)) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + break; + case NVME_AUTH_SECP_REPLACETLSPSK: + if (!nvmet_queue_tls_keyid(req->sq)) + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + break; + default: + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + } + ctrl->concat = true; + } if (data->napd != 1) return NVME_AUTH_DHCHAP_FAILURE_HASH_UNUSABLE; @@ -103,6 +121,12 @@ static u8 nvmet_auth_negotiate(struct nvmet_req *req, void *d) nvme_auth_dhgroup_name(fallback_dhgid)); ctrl->dh_gid = fallback_dhgid; } + if (ctrl->dh_gid == NVME_AUTH_DHGROUP_NULL && ctrl->concat) { + pr_debug("%s: ctrl %d qid %d: NULL DH group invalid " + "for secure channel concatenation\n", __func__, + ctrl->cntlid, req->sq->qid); + return NVME_AUTH_DHCHAP_FAILURE_CONCAT_MISMATCH; + } pr_debug("%s: ctrl %d qid %d: selected DH group %s (%d)\n", __func__, ctrl->cntlid, req->sq->qid, nvme_auth_dhgroup_name(ctrl->dh_gid), ctrl->dh_gid); @@ -148,12 +172,22 @@ static u8 nvmet_auth_reply(struct nvmet_req *req, void *d) if (memcmp(data->rval, response, data->hl)) { pr_info("ctrl %d qid %d host response mismatch\n", ctrl->cntlid, req->sq->qid); + pr_debug("ctrl %d qid %d rval %*ph\n", + ctrl->cntlid, req->sq->qid, data->hl, data->rval); + pr_debug("ctrl %d qid %d response %*ph\n", + ctrl->cntlid, req->sq->qid, data->hl, response); kfree(response); return NVME_AUTH_DHCHAP_FAILURE_FAILED; } kfree(response); pr_debug("%s: ctrl %d qid %d host authenticated\n", __func__, ctrl->cntlid, req->sq->qid); + if (!data->cvalid && ctrl->concat) { + pr_debug("%s: ctrl %d qid %d invalid challenge\n", + __func__, ctrl->cntlid, req->sq->qid); + return NVME_AUTH_DHCHAP_FAILURE_FAILED; + } + req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); if (data->cvalid) { req->sq->dhchap_c2 = kmemdup(data->rval + data->hl, data->hl, GFP_KERNEL); @@ -163,11 +197,23 @@ static u8 nvmet_auth_reply(struct nvmet_req *req, void *d) pr_debug("%s: ctrl %d qid %d challenge %*ph\n", __func__, ctrl->cntlid, req->sq->qid, data->hl, req->sq->dhchap_c2); - } else { + } + /* + * NVMe Base Spec 2.2 section 8.3.4.5.4: DH-HMAC-CHAP_Reply message + * Sequence Number (SEQNUM): [ .. ] + * The value 0h is used to indicate that bidirectional authentication + * is not performed, but a challenge value C2 is carried in order to + * generate a pre-shared key (PSK) for subsequent establishment of a + * secure channel. + */ + if (req->sq->dhchap_s2 == 0) { + if (ctrl->concat) + nvmet_auth_insert_psk(req->sq); req->sq->authenticated = true; + kfree(req->sq->dhchap_c2); req->sq->dhchap_c2 = NULL; - } - req->sq->dhchap_s2 = le32_to_cpu(data->seqnum); + } else if (!data->cvalid) + req->sq->authenticated = true; return 0; } @@ -303,6 +349,8 @@ void nvmet_execute_auth_send(struct nvmet_req *req) } goto done_kfree; case NVME_AUTH_DHCHAP_MESSAGE_SUCCESS2: + if (ctrl->concat) + nvmet_auth_insert_psk(req->sq); req->sq->authenticated = true; pr_debug("%s: ctrl %d qid %d ctrl authenticated\n", __func__, ctrl->cntlid, req->sq->qid); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 5e8a3e1ca79cb..f012bdf898502 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -236,8 +236,22 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) static u32 nvmet_connect_result(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { - bool needs_auth = nvmet_has_auth(ctrl); + bool needs_auth = nvmet_has_auth(ctrl, sq); + key_serial_t keyid = nvmet_queue_tls_keyid(sq); + /* Do not authenticate I/O queues for secure concatenation */ + if (ctrl->concat && sq->qid) + needs_auth = false; + + if (keyid) + pr_debug("%s: ctrl %d qid %d should %sauthenticate, tls psk %08x\n", + __func__, ctrl->cntlid, sq->qid, + needs_auth ? "" : "not ", keyid); + else + pr_debug("%s: ctrl %d qid %d should %sauthenticate%s\n", + __func__, ctrl->cntlid, sq->qid, + needs_auth ? "" : "not ", + ctrl->concat ? ", secure concatenation" : ""); return (u32)ctrl->cntlid | (needs_auth ? NVME_CONNECT_AUTHREQ_ATR : 0); } diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index e0cfdae8af6de..9f6110ac7c4ab 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -164,6 +164,9 @@ struct nvmet_sq { u32 dhchap_s2; u8 *dhchap_skey; int dhchap_skey_len; +#endif +#ifdef CONFIG_NVME_TARGET_TCP_TLS + struct key *tls_key; #endif struct completion free_done; struct completion confirm_done; @@ -289,6 +292,7 @@ struct nvmet_ctrl { u64 err_counter; struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; bool pi_support; + bool concat; #ifdef CONFIG_NVME_TARGET_AUTH struct nvme_dhchap_key *host_key; struct nvme_dhchap_key *ctrl_key; @@ -297,6 +301,9 @@ struct nvmet_ctrl { u8 dh_gid; u8 *dh_key; size_t dh_keysize; +#endif +#ifdef CONFIG_NVME_TARGET_TCP_TLS + struct key *tls_key; #endif struct nvmet_pr_log_mgr pr_log_mgr; }; @@ -853,6 +860,22 @@ static inline void nvmet_req_bio_put(struct nvmet_req *req, struct bio *bio) bio_put(bio); } +#ifdef CONFIG_NVME_TARGET_TCP_TLS +static inline key_serial_t nvmet_queue_tls_keyid(struct nvmet_sq *sq) +{ + return sq->tls_key ? key_serial(sq->tls_key) : 0; +} +static inline void nvmet_sq_put_tls_key(struct nvmet_sq *sq) +{ + if (sq->tls_key) { + key_put(sq->tls_key); + sq->tls_key = NULL; + } +} +#else +static inline key_serial_t nvmet_queue_tls_keyid(struct nvmet_sq *sq) { return 0; } +static inline void nvmet_sq_put_tls_key(struct nvmet_sq *sq) {} +#endif #ifdef CONFIG_NVME_TARGET_AUTH u32 nvmet_auth_send_data_len(struct nvmet_req *req); void nvmet_execute_auth_send(struct nvmet_req *req); @@ -871,14 +894,15 @@ int nvmet_auth_host_hash(struct nvmet_req *req, u8 *response, unsigned int hash_len); int nvmet_auth_ctrl_hash(struct nvmet_req *req, u8 *response, unsigned int hash_len); -static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl) +static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) { - return ctrl->host_key != NULL; + return ctrl->host_key != NULL && !nvmet_queue_tls_keyid(sq); } int nvmet_auth_ctrl_exponential(struct nvmet_req *req, u8 *buf, int buf_size); int nvmet_auth_ctrl_sesskey(struct nvmet_req *req, u8 *buf, int buf_size); +void nvmet_auth_insert_psk(struct nvmet_sq *sq); #else static inline u8 nvmet_setup_auth(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq) @@ -894,11 +918,13 @@ static inline bool nvmet_check_auth_status(struct nvmet_req *req) { return true; } -static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl) +static inline bool nvmet_has_auth(struct nvmet_ctrl *ctrl, + struct nvmet_sq *sq) { return false; } static inline const char *nvmet_dhchap_dhgroup_name(u8 dhgid) { return NULL; } +static inline void nvmet_auth_insert_psk(struct nvmet_sq *sq) {}; #endif int nvmet_pr_init_ns(struct nvmet_ns *ns); diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index fa59a7996efa2..9cf97433b0b6d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1072,10 +1072,11 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, &queue->nvme_sq, &nvmet_tcp_ops))) { - pr_err("failed cmd %p id %d opcode %d, data_len: %d\n", + pr_err("failed cmd %p id %d opcode %d, data_len: %d, status: %04x\n", req->cmd, req->cmd->common.command_id, req->cmd->common.opcode, - le32_to_cpu(req->cmd->common.dptr.sgl.length)); + le32_to_cpu(req->cmd->common.dptr.sgl.length), + le16_to_cpu(req->cqe->status)); nvmet_tcp_handle_req_failure(queue, queue->cmd, req); return 0; @@ -1601,6 +1602,7 @@ static void nvmet_tcp_release_queue_work(struct work_struct *w) /* stop accepting incoming data */ queue->rcv_state = NVMET_TCP_RECV_ERR; + nvmet_sq_put_tls_key(&queue->nvme_sq); nvmet_tcp_uninit_data_in_cmds(queue); nvmet_sq_destroy(&queue->nvme_sq); cancel_work_sync(&queue->io_work); @@ -1786,6 +1788,27 @@ static int nvmet_tcp_try_peek_pdu(struct nvmet_tcp_queue *queue) return 0; } +static int nvmet_tcp_tls_key_lookup(struct nvmet_tcp_queue *queue, + key_serial_t peerid) +{ + struct key *tls_key = nvme_tls_key_lookup(peerid); + int status = 0; + + if (IS_ERR(tls_key)) { + pr_warn("%s: queue %d failed to lookup key %x\n", + __func__, queue->idx, peerid); + spin_lock_bh(&queue->state_lock); + queue->state = NVMET_TCP_Q_FAILED; + spin_unlock_bh(&queue->state_lock); + status = PTR_ERR(tls_key); + } else { + pr_debug("%s: queue %d using TLS PSK %x\n", + __func__, queue->idx, peerid); + queue->nvme_sq.tls_key = tls_key; + } + return status; +} + static void nvmet_tcp_tls_handshake_done(void *data, int status, key_serial_t peerid) { @@ -1806,6 +1829,10 @@ static void nvmet_tcp_tls_handshake_done(void *data, int status, spin_unlock_bh(&queue->state_lock); cancel_delayed_work_sync(&queue->tls_handshake_tmo_work); + + if (!status) + status = nvmet_tcp_tls_key_lookup(queue, peerid); + if (status) nvmet_tcp_schedule_release_queue(queue); else From 316dabe6089fe85f7434d32808289d1965c452cb Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Mon, 24 Feb 2025 13:38:18 +0100 Subject: [PATCH 10/22] nvmet: add tls_concat and tls_key debugfs entries Add debugfs entries to display the 'concat' and 'tls_key' controller attributes. Signed-off-by: Hannes Reinecke Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/debugfs.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/nvme/target/debugfs.c b/drivers/nvme/target/debugfs.c index 220c7391fc19a..e4300eb95101a 100644 --- a/drivers/nvme/target/debugfs.c +++ b/drivers/nvme/target/debugfs.c @@ -132,6 +132,27 @@ static int nvmet_ctrl_host_traddr_show(struct seq_file *m, void *p) } NVMET_DEBUGFS_ATTR(nvmet_ctrl_host_traddr); +#ifdef CONFIG_NVME_TARGET_TCP_TLS +static int nvmet_ctrl_tls_key_show(struct seq_file *m, void *p) +{ + struct nvmet_ctrl *ctrl = m->private; + key_serial_t keyid = nvmet_queue_tls_keyid(ctrl->sqs[0]); + + seq_printf(m, "%08x\n", keyid); + return 0; +} +NVMET_DEBUGFS_ATTR(nvmet_ctrl_tls_key); + +static int nvmet_ctrl_tls_concat_show(struct seq_file *m, void *p) +{ + struct nvmet_ctrl *ctrl = m->private; + + seq_printf(m, "%d\n", ctrl->concat); + return 0; +} +NVMET_DEBUGFS_ATTR(nvmet_ctrl_tls_concat); +#endif + int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl) { char name[32]; @@ -157,6 +178,12 @@ int nvmet_debugfs_ctrl_setup(struct nvmet_ctrl *ctrl) &nvmet_ctrl_state_fops); debugfs_create_file("host_traddr", S_IRUSR, ctrl->debugfs_dir, ctrl, &nvmet_ctrl_host_traddr_fops); +#ifdef CONFIG_NVME_TARGET_TCP_TLS + debugfs_create_file("tls_concat", S_IRUSR, ctrl->debugfs_dir, ctrl, + &nvmet_ctrl_tls_concat_fops); + debugfs_create_file("tls_key", S_IRUSR, ctrl->debugfs_dir, ctrl, + &nvmet_ctrl_tls_key_fops); +#endif return 0; } From 4dbd2b2ebe4cc5f101881e2c091a70ccd38db7ee Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:44 +0530 Subject: [PATCH 11/22] nvme-multipath: Add visibility for round-robin io-policy This patch helps add nvme native multipath visibility for round-robin io-policy. It creates a "multipath" sysfs directory under head gendisk device node directory and then from "multipath" directory it adds a link to each namespace path device the head node refers. For instance, if we have a shared namespace accessible from two different controllers/paths then we create a soft link to each path device from head disk node as shown below: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For round-robin I/O policy, we could easily infer from the above output that I/O workload targeted to nvme1n1 would toggle across paths nvme1c1n1 and nvme1c3n1. Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 3 ++ drivers/nvme/host/multipath.c | 99 +++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 18 +++++-- drivers/nvme/host/sysfs.c | 14 +++++ 4 files changed, 130 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 818d4e49aab51..870314c521077 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4020,6 +4020,9 @@ static void nvme_ns_remove(struct nvme_ns *ns) if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); + + nvme_mpath_remove_sysfs_link(ns); + del_gendisk(ns->disk); mutex_lock(&ns->ctrl->namespaces_lock); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2a76355650830..7c89c8da46d6a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -686,6 +686,8 @@ static void nvme_mpath_set_live(struct nvme_ns *ns) kblockd_schedule_work(&head->partition_scan_work); } + nvme_mpath_add_sysfs_link(ns->head); + mutex_lock(&head->lock); if (nvme_path_is_optimized(ns)) { int node, srcu_idx; @@ -768,6 +770,25 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, if (nvme_state_is_live(ns->ana_state) && nvme_ctrl_state(ns->ctrl) == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); + else { + /* + * Add sysfs link from multipath head gendisk node to path + * device gendisk node. + * If path's ana state is live (i.e. state is either optimized + * or non-optimized) while we alloc the ns then sysfs link would + * be created from nvme_mpath_set_live(). In that case we would + * not fallthrough this code path. However for the path's ana + * state other than live, we call nvme_mpath_set_live() only + * after ana state transitioned to the live state. But we still + * want to create the sysfs link from head node to a path device + * irrespctive of the path's ana state. + * If we reach through here then it means that path's ana state + * is not live but still create the sysfs link to this path from + * head node if head node of the path has already come alive. + */ + if (test_bit(NVME_NSHEAD_DISK_LIVE, &ns->head->flags)) + nvme_mpath_add_sysfs_link(ns->head); + } } static int nvme_update_ana_state(struct nvme_ctrl *ctrl, @@ -967,6 +988,84 @@ static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, return -ENXIO; /* just break out of the loop */ } +void nvme_mpath_add_sysfs_link(struct nvme_ns_head *head) +{ + struct device *target; + int rc, srcu_idx; + struct nvme_ns *ns; + struct kobject *kobj; + + /* + * Ensure head disk node is already added otherwise we may get invalid + * kobj for head disk node + */ + if (!test_bit(GD_ADDED, &head->disk->state)) + return; + + kobj = &disk_to_dev(head->disk)->kobj; + + /* + * loop through each ns chained through the head->list and create the + * sysfs link from head node to the ns path node + */ + srcu_idx = srcu_read_lock(&head->srcu); + + list_for_each_entry_rcu(ns, &head->list, siblings) { + /* + * Avoid creating link if it already exists for the given path. + * When path ana state transitions from optimized to non- + * optimized or vice-versa, the nvme_mpath_set_live() is + * invoked which in truns call this function. Now if the sysfs + * link already exists for the given path and we attempt to re- + * create the link then sysfs code would warn about it loudly. + * So we evaluate NVME_NS_SYSFS_ATTR_LINK flag here to ensure + * that we're not creating duplicate link. + * The test_and_set_bit() is used because it is protecting + * against multiple nvme paths being simultaneously added. + */ + if (test_and_set_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) + continue; + + /* + * Ensure that ns path disk node is already added otherwise we + * may get invalid kobj name for target + */ + if (!test_bit(GD_ADDED, &ns->disk->state)) + continue; + + target = disk_to_dev(ns->disk); + /* + * Create sysfs link from head gendisk kobject @kobj to the + * ns path gendisk kobject @target->kobj. + */ + rc = sysfs_add_link_to_group(kobj, nvme_ns_mpath_attr_group.name, + &target->kobj, dev_name(target)); + if (unlikely(rc)) { + dev_err(disk_to_dev(ns->head->disk), + "failed to create link to %s\n", + dev_name(target)); + clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); + } + } + + srcu_read_unlock(&head->srcu, srcu_idx); +} + +void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns) +{ + struct device *target; + struct kobject *kobj; + + if (!test_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags)) + return; + + target = disk_to_dev(ns->disk); + kobj = &disk_to_dev(ns->head->disk)->kobj; + sysfs_remove_link_from_group(kobj, nvme_ns_mpath_attr_group.name, + dev_name(target)); + clear_bit(NVME_NS_SYSFS_ATTR_LINK, &ns->flags); +} + void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) { if (nvme_ctrl_use_ana(ns->ctrl)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index daa4c91e4848c..1eeb782947db9 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -534,10 +534,11 @@ struct nvme_ns { struct nvme_ns_head *head; unsigned long flags; -#define NVME_NS_REMOVING 0 -#define NVME_NS_ANA_PENDING 2 -#define NVME_NS_FORCE_RO 3 -#define NVME_NS_READY 4 +#define NVME_NS_REMOVING 0 +#define NVME_NS_ANA_PENDING 2 +#define NVME_NS_FORCE_RO 3 +#define NVME_NS_READY 4 +#define NVME_NS_SYSFS_ATTR_LINK 5 struct cdev cdev; struct device cdev_device; @@ -933,6 +934,7 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo); int nvme_dev_uring_cmd(struct io_uring_cmd *ioucmd, unsigned int issue_flags); extern const struct attribute_group *nvme_ns_attr_groups[]; +extern const struct attribute_group nvme_ns_mpath_attr_group; extern const struct pr_ops nvme_pr_ops; extern const struct block_device_operations nvme_ns_head_ops; extern const struct attribute_group nvme_dev_attrs_group; @@ -955,6 +957,8 @@ void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys); void nvme_failover_req(struct request *req); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); +void nvme_mpath_add_sysfs_link(struct nvme_ns_head *ns); +void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns); void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); @@ -1009,6 +1013,12 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid) static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } +static inline void nvme_mpath_add_sysfs_link(struct nvme_ns *ns) +{ +} +static inline void nvme_mpath_remove_sysfs_link(struct nvme_ns *ns) +{ +} static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) { return false; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 52683943be152..477c123a4b86a 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -299,8 +299,22 @@ static const struct attribute_group nvme_ns_attr_group = { .is_visible = nvme_ns_attrs_are_visible, }; +#ifdef CONFIG_NVME_MULTIPATH +static struct attribute *nvme_ns_mpath_attrs[] = { + NULL, +}; + +const struct attribute_group nvme_ns_mpath_attr_group = { + .name = "multipath", + .attrs = nvme_ns_mpath_attrs, +}; +#endif + const struct attribute_group *nvme_ns_attr_groups[] = { &nvme_ns_attr_group, +#ifdef CONFIG_NVME_MULTIPATH + &nvme_ns_mpath_attr_group, +#endif NULL, }; From 6546cc4a56618fda55e76c92ff7aea31d8f9fb88 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:45 +0530 Subject: [PATCH 12/22] nvme-multipath: Add visibility for numa io-policy This patch helps add nvme native multipath visibility for numa io-policy. It adds a new attribute file named "numa_nodes" under namespace gendisk device path node which prints the list of numa nodes preferred by the given namespace path. The numa nodes value is comma delimited list of nodes or A-B range of nodes. For instance, if we have a shared namespace accessible from two different controllers/paths then accessing head node of the shared namespace would show the following output: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and this namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For numa io-policy we can then refer the "numa_nodes" attribute file created under each namespace path: $ cat /sys/block/nvme1n1/multipath/nvme1c1n1/numa_nodes 0-1 $ cat /sys/block/nvme1n1/multipath/nvme1c3n1/numa_nodes 2-3 >From the above output, we infer that I/O workload targeted at nvme1n1 and running on numa nodes 0 and 1 would prefer using path nvme1c1n1. Similarly, I/O workload running on numa nodes 2 and 3 would prefer using path nvme1c3n1. Reading "numa_nodes" file when configured io-policy is anything but numa would show no output. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 27 +++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/sysfs.c | 5 +++++ 3 files changed, 33 insertions(+) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 7c89c8da46d6a..b47e01942ca4a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -976,6 +976,33 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); +static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + int node, srcu_idx; + nodemask_t numa_nodes; + struct nvme_ns *current_ns; + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + struct nvme_ns_head *head = ns->head; + + if (head->subsys->iopolicy != NVME_IOPOLICY_NUMA) + return 0; + + nodes_clear(numa_nodes); + + srcu_idx = srcu_read_lock(&head->srcu); + for_each_node(node) { + current_ns = srcu_dereference(head->current_path[node], + &head->srcu); + if (ns == current_ns) + node_set(node, numa_nodes); + } + srcu_read_unlock(&head->srcu, srcu_idx); + + return sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&numa_nodes)); +} +DEVICE_ATTR_RO(numa_nodes); + static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *desc, void *data) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1eeb782947db9..ff4bd0c2e401e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -984,6 +984,7 @@ static inline void nvme_trace_bio_complete(struct request *req) extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; +extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute subsys_attr_iopolicy; static inline bool nvme_disk_is_ns_head(struct gendisk *disk) diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 477c123a4b86a..96eaecda0ab91 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -258,6 +258,7 @@ static struct attribute *nvme_ns_attrs[] = { #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, + &dev_attr_numa_nodes.attr, #endif &dev_attr_io_passthru_err_log_enabled.attr, NULL, @@ -290,6 +291,10 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; } + if (a == &dev_attr_numa_nodes.attr) { + if (nvme_disk_is_ns_head(dev_to_disk(dev))) + return 0; + } #endif return a->mode; } From 7cbafa3ff0187cdfa922aa7eb3d578a93999b3a9 Mon Sep 17 00:00:00 2001 From: Nilay Shroff Date: Sun, 12 Jan 2025 18:11:46 +0530 Subject: [PATCH 13/22] nvme-multipath: Add visibility for queue-depth io-policy This patch helps add nvme native multipath visibility for queue-depth io-policy. It adds a new attribute file named "queue_depth" under namespace device path node which would print the number of active/ in-flight I/O requests currently queued for the given path. For instance, if we have a shared namespace accessible from two different controllers/paths then accessing head block node of the shared namespace would show the following output: $ ls -l /sys/block/nvme1n1/multipath/ nvme1c1n1 -> ../../../../../pci052e:78/052e:78:00.0/nvme/nvme1/nvme1c1n1 nvme1c3n1 -> ../../../../../pci058e:78/058e:78:00.0/nvme/nvme3/nvme1c3n1 In the above example, nvme1n1 is head gendisk node created for a shared namespace and the namespace is accessible from nvme1c1n1 and nvme1c3n1 paths. For queue-depth io-policy we can then refer the "queue_depth" attribute file created under each namespace path: $ cat /sys/block/nvme1n1/multipath/nvme1c1n1/queue_depth 518 $cat /sys/block/nvme1n1/multipath/nvme1c3n1/queue_depth 504 >From the above output, we can infer that I/O workload targeted at nvme1n1 uses two paths nvme1c1n1 and nvme1c3n1 and the current queue depth of each path is 518 and 504 respectively. Reading "queue_depth" file when configured io-policy is anything but queue-depth would show no output. Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Nilay Shroff Signed-off-by: Keith Busch --- drivers/nvme/host/multipath.c | 12 ++++++++++++ drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/sysfs.c | 3 ++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index b47e01942ca4a..6b12ca80aa273 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -976,6 +976,18 @@ static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr, } DEVICE_ATTR_RO(ana_state); +static ssize_t queue_depth_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nvme_ns *ns = nvme_get_ns_from_dev(dev); + + if (ns->head->subsys->iopolicy != NVME_IOPOLICY_QD) + return 0; + + return sysfs_emit(buf, "%d\n", atomic_read(&ns->ctrl->nr_active)); +} +DEVICE_ATTR_RO(queue_depth); + static ssize_t numa_nodes_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index ff4bd0c2e401e..51e0786421271 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -984,6 +984,7 @@ static inline void nvme_trace_bio_complete(struct request *req) extern bool multipath; extern struct device_attribute dev_attr_ana_grpid; extern struct device_attribute dev_attr_ana_state; +extern struct device_attribute dev_attr_queue_depth; extern struct device_attribute dev_attr_numa_nodes; extern struct device_attribute subsys_attr_iopolicy; diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c index 96eaecda0ab91..6d31226f7a4f8 100644 --- a/drivers/nvme/host/sysfs.c +++ b/drivers/nvme/host/sysfs.c @@ -258,6 +258,7 @@ static struct attribute *nvme_ns_attrs[] = { #ifdef CONFIG_NVME_MULTIPATH &dev_attr_ana_grpid.attr, &dev_attr_ana_state.attr, + &dev_attr_queue_depth.attr, &dev_attr_numa_nodes.attr, #endif &dev_attr_io_passthru_err_log_enabled.attr, @@ -291,7 +292,7 @@ static umode_t nvme_ns_attrs_are_visible(struct kobject *kobj, if (!nvme_ctrl_use_ana(nvme_get_ns_from_dev(dev)->ctrl)) return 0; } - if (a == &dev_attr_numa_nodes.attr) { + if (a == &dev_attr_queue_depth.attr || a == &dev_attr_numa_nodes.attr) { if (nvme_disk_is_ns_head(dev_to_disk(dev))) return 0; } From 978540050a85a2b7fc77b60a1cfaec110682e9c3 Mon Sep 17 00:00:00 2001 From: Qasim Ijaz Date: Thu, 13 Feb 2025 22:16:22 +0000 Subject: [PATCH 14/22] nvme-fc: Utilise min3() to simplify queue count calculation Refactor nvme_fc_create_io_queues() and nvme_fc_recreate_io_queues() to use the min3() macro to find the minimum between 3 values instead of multiple min()'s. This shortens the code and makes it easier to read. Signed-off-by: Qasim Ijaz Reviewed-by: James Smart Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7de29dae8e74f..38fa2d049dcf4 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2907,7 +2907,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { @@ -2961,7 +2961,7 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) unsigned int nr_io_queues; int ret; - nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), + nr_io_queues = min3(opts->nr_io_queues, num_online_cpus(), ctrl->lport->ops->max_hw_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); if (ret) { From f1b47aed535c0509e8a101490a5600ecd0641050 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Thu, 6 Mar 2025 10:53:31 +0200 Subject: [PATCH 15/22] nvme-pci: remove stale comment The ns variable has been removed in commit 62451a2b2e7e ("nvme: separate command prep and issue"). Drop reference to ns in comment. Fixes: 62451a2b2e7e ("nvme: separate command prep and issue") Signed-off-by: Baruch Siach Reviewed-by: Anuj Gupta Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a65978b6cdd8a..08779f2205779 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -953,9 +953,6 @@ static blk_status_t nvme_prep_rq(struct nvme_dev *dev, struct request *req) return ret; } -/* - * NOTE: ns is NULL when called on the admin queue. - */ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { From 1b304c006b0fb4f0517a8c4ba8c46e88f48a069c Mon Sep 17 00:00:00 2001 From: WangYuli Date: Wed, 12 Mar 2025 13:06:50 +0800 Subject: [PATCH 16/22] nvmet-fc: Remove unused functions The functions nvmet_fc_iodnum() and nvmet_fc_fodnum() are currently unutilized. Following commit c53432030d86 ("nvme-fabrics: Add target support for FC transport"), which introduced these two functions, they have not been used at all in practice. Remove them to resolve the compiler warnings. Fix follow errors with clang-19 when W=1e: drivers/nvme/target/fc.c:177:1: error: unused function 'nvmet_fc_iodnum' [-Werror,-Wunused-function] 177 | nvmet_fc_iodnum(struct nvmet_fc_ls_iod *iodptr) | ^~~~~~~~~~~~~~~ drivers/nvme/target/fc.c:183:1: error: unused function 'nvmet_fc_fodnum' [-Werror,-Wunused-function] 183 | nvmet_fc_fodnum(struct nvmet_fc_fcp_iod *fodptr) | ^~~~~~~~~~~~~~~ 2 errors generated. make[8]: *** [scripts/Makefile.build:207: drivers/nvme/target/fc.o] Error 1 make[7]: *** [scripts/Makefile.build:465: drivers/nvme/target] Error 2 make[6]: *** [scripts/Makefile.build:465: drivers/nvme] Error 2 make[6]: *** Waiting for unfinished jobs.... Fixes: c53432030d86 ("nvme-fabrics: Add target support for FC transport") Signed-off-by: WangYuli Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/fc.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 3ef4beacde325..7318b736d4141 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -172,20 +172,6 @@ struct nvmet_fc_tgt_assoc { struct work_struct del_work; }; - -static inline int -nvmet_fc_iodnum(struct nvmet_fc_ls_iod *iodptr) -{ - return (iodptr - iodptr->tgtport->iod); -} - -static inline int -nvmet_fc_fodnum(struct nvmet_fc_fcp_iod *fodptr) -{ - return (fodptr - fodptr->queue->fod); -} - - /* * Association and Connection IDs: * From ba65af9a2a0d90f2c3ef6c80793d79eba154945b Mon Sep 17 00:00:00 2001 From: Chen Ni Date: Wed, 12 Mar 2025 16:56:25 +0800 Subject: [PATCH 17/22] nvmet: pci-epf: Remove redundant 'flush_workqueue()' calls 'destroy_workqueue()' already drains the queue before destroying it, so there is no need to flush it explicitly. Remove the redundant 'flush_workqueue()' calls. This was generated with coccinelle: @@ expression E; @@ - flush_workqueue(E); destroy_workqueue(E); Signed-off-by: Chen Ni Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index ac30b42cc6221..a7c40a674df60 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1385,7 +1385,6 @@ static u16 nvmet_pci_epf_delete_sq(struct nvmet_ctrl *tctrl, u16 sqid) if (!test_and_clear_bit(NVMET_PCI_EPF_Q_LIVE, &sq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; - flush_workqueue(sq->iod_wq); destroy_workqueue(sq->iod_wq); sq->iod_wq = NULL; From 945e82633ecfd54015d2447a9ba05941665db2ee Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 13 Mar 2025 14:25:20 +0900 Subject: [PATCH 18/22] nvme: zns: Simplify nvme_zone_parse_entry() Instead of passing a pointer to a struct nvme_ctrl and a pointer to a struct nvme_ns_head as the first two arguments of nvme_zone_parse_entry(), pass only a pointer to a struct nvme_ns as both the controller structure and ns head structure can be infered from the namespace structure. Signed-off-by: Damien Le Moal Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/zns.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c index 382949e18c6ae..cce4c5b55aa9c 100644 --- a/drivers/nvme/host/zns.c +++ b/drivers/nvme/host/zns.c @@ -146,17 +146,16 @@ static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, return NULL; } -static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl, - struct nvme_ns_head *head, +static int nvme_zone_parse_entry(struct nvme_ns *ns, struct nvme_zone_descriptor *entry, unsigned int idx, report_zones_cb cb, void *data) { + struct nvme_ns_head *head = ns->head; struct blk_zone zone = { }; if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { - dev_err(ctrl->device, "invalid zone type %#x\n", - entry->zt); + dev_err(ns->ctrl->device, "invalid zone type %#x\n", entry->zt); return -EINVAL; } @@ -213,8 +212,7 @@ int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, break; for (i = 0; i < nz && zone_idx < nr_zones; i++) { - ret = nvme_zone_parse_entry(ns->ctrl, ns->head, - &report->entries[i], + ret = nvme_zone_parse_entry(ns, &report->entries[i], zone_idx, cb, data); if (ret) goto out_free; From 7b658153f1b8a79ed98980f2fef7b92a66aeb9cd Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Thu, 13 Mar 2025 00:18:02 -0500 Subject: [PATCH 19/22] nvmet: Remove duplicate uuid_copy We do uuid_copy twice in nvmet_alloc_ctrl so this patch deletes one of the calls. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/target/core.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index a058f473652cd..a2b5319c37f35 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1620,8 +1620,6 @@ struct nvmet_ctrl *nvmet_alloc_ctrl(struct nvmet_alloc_ctrl_args *args) } ctrl->cntlid = ret; - uuid_copy(&ctrl->hostid, args->hostid); - /* * Discovery controllers may use some arbitrary high value * in order to cleanup stale discovery sessions From 1cf0184c0ac4f1e936bb3b089894bbeb0a9eb2bc Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 17 Mar 2025 10:57:04 +0100 Subject: [PATCH 20/22] nvmet: pci-epf: Always configure BAR0 as 64-bit NVMe PCIe Transport Specification 1.1, section 2.1.10, claims that the BAR0 type is Implementation Specific. However, in NVMe 1.1, the type is required to be 64-bit. Thus, to make our PCI EPF work on as many host systems as possible, always configure the BAR0 type to be 64-bit. In the rare case that the underlying PCI EPC does not support configuring BAR0 as 64-bit, the call to pci_epc_set_bar() will fail, and we will return a failure back to the user. This should not be a problem, as most PCI EPCs support configuring a BAR as 64-bit (and those EPCs with .only_64bit set to true in epc_features only support configuring the BAR as 64-bit). Tested-by: Damien Le Moal Fixes: 0faa0fe6f90e ("nvmet: New NVMe PCI endpoint function target driver") Signed-off-by: Niklas Cassel Reviewed-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/target/pci-epf.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index a7c40a674df60..0d0018c8c674b 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -2109,8 +2109,15 @@ static int nvmet_pci_epf_configure_bar(struct nvmet_pci_epf *nvme_epf) return -ENODEV; } - if (epc_features->bar[BAR_0].only_64bit) - epf->bar[BAR_0].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; + /* + * While NVMe PCIe Transport Specification 1.1, section 2.1.10, claims + * that the BAR0 type is Implementation Specific, in NVMe 1.1, the type + * is required to be 64-bit. Thus, for interoperability, always set the + * type to 64-bit. In the rare case that the PCI EPC does not support + * configuring BAR0 as 64-bit, the call to pci_epc_set_bar() will fail, + * and we will return failure back to the user. + */ + epf->bar[BAR_0].flags |= PCI_BASE_ADDRESS_MEM_TYPE_64; /* * Calculate the size of the register bar: NVMe registers first with From 1be52169c3488ef98582ed553ab35cefa3978817 Mon Sep 17 00:00:00 2001 From: Peijie Shao Date: Thu, 20 Mar 2025 14:35:23 +0800 Subject: [PATCH 21/22] nvme-tcp: fix selinux denied when calling sock_sendmsg In a SELinux enabled kernel, socket_create() initializes the security label of the socket using the security label of the calling process, this typically works well. However, in a containerized environment like Kubernetes, problem arises when a privileged container(domain spc_t) connects to an NVMe target and mounts the NVMe as persistent storage for unprivileged containers(domain container_t). This is because the container_t domain cannot access resources labeled with spc_t, resulting in socket_sendmsg returning -EACCES. The solution is to use socket_create_kern() instead of socket_create(), which labels the socket context to kernel_t. Access control will then be handled by the VFS layer rather than the socket itself. Signed-off-by: Peijie Shao Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index feb2d7e17c4a1..542ffc921a3ff 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1717,7 +1717,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, queue->cmnd_capsule_len = sizeof(struct nvme_command) + NVME_TCP_ADMIN_CCSZ; - ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM, + ret = sock_create_kern(current->nsproxy->net_ns, + ctrl->addr.ss_family, SOCK_STREAM, IPPROTO_TCP, &queue->sock); if (ret) { dev_err(nctrl->device, From 64ea88e3afa8c5b6c3f9c477da304b7a56149612 Mon Sep 17 00:00:00 2001 From: Li Haoran Date: Thu, 20 Mar 2025 15:53:00 +0800 Subject: [PATCH 22/22] nvmet: replace max(a, min(b, c)) by clamp(val, lo, hi) This patch replaces max(a, min(b, c)) by clamp(val, lo, hi) in the nvme driver. The clamp() macro explicitly expresses the intent of constraining a value within bounds, improving code readability. Signed-off-by: Li Haoran Signed-off-by: Shao Mingyin Signed-off-by: Keith Busch --- drivers/nvme/target/nvmet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 9f6110ac7c4ab..33fac9151b5b1 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -828,7 +828,7 @@ static inline u8 nvmet_cc_iocqes(u32 cc) /* Convert a 32-bit number to a 16-bit 0's based number */ static inline __le16 to0based(u32 a) { - return cpu_to_le16(max(1U, min(1U << 16, a)) - 1); + return cpu_to_le16(clamp(a, 1U, 1U << 16) - 1); } static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)