From 6dd0e5cc87b33aee9cfe4d485c4d8b8382701558 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:11 -0700 Subject: [PATCH 01/47] cxl/mbox: Move cxl_mem_command construction to helper funcs Sanitizing and constructing a cxl_mem_command from a userspace command is part of the validation process prior to submitting the command to a CXL device. Move this work to helper functions: cxl_to_mem_cmd(), cxl_to_mem_cmd_raw(). This declutters cxl_validate_cmd_from_user() in preparation for adding new validation steps. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/7d9b826f29262e3a484cb4bb7b63872134d60bd7.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 147 +++++++++++++++++++++------------------- 1 file changed, 76 insertions(+), 71 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index be61a0d8016bb..1f2175f0f6879 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -207,76 +207,42 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) return true; } -/** - * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. - * @cxlds: The device data for the operation - * @send_cmd: &struct cxl_send_command copied in from userspace. - * @out_cmd: Sanitized and populated &struct cxl_mem_command. - * - * Return: - * * %0 - @out_cmd is ready to send. - * * %-ENOTTY - Invalid command specified. - * * %-EINVAL - Reserved fields or invalid values were used. - * * %-ENOMEM - Input or output buffer wasn't sized properly. - * * %-EPERM - Attempted to use a protected command. - * * %-EBUSY - Kernel has claimed exclusive access to this opcode - * - * The result of this command is a fully validated command in @out_cmd that is - * safe to send to the hardware. - * - * See handle_mailbox_cmd_from_user() - */ -static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_send_command *send_cmd, - struct cxl_mem_command *out_cmd) +static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) { - const struct cxl_command_info *info; - struct cxl_mem_command *c; - - if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) - return -ENOTTY; - - /* - * The user can never specify an input payload larger than what hardware - * supports, but output can be arbitrarily large (simply write out as - * much data as the hardware provides). - */ - if (send_cmd->in.size > cxlds->payload_size) + if (send_cmd->raw.rsvd) return -EINVAL; /* - * Checks are bypassed for raw commands but a WARN/taint will occur - * later in the callchain + * Unlike supported commands, the output size of RAW commands + * gets passed along without further checking, so it must be + * validated here. */ - if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) { - const struct cxl_mem_command temp = { - .info = { - .id = CXL_MEM_COMMAND_ID_RAW, - .flags = 0, - .size_in = send_cmd->in.size, - .size_out = send_cmd->out.size, - }, - .opcode = send_cmd->raw.opcode - }; - - if (send_cmd->raw.rsvd) - return -EINVAL; + if (send_cmd->out.size > cxlds->payload_size) + return -EINVAL; - /* - * Unlike supported commands, the output size of RAW commands - * gets passed along without further checking, so it must be - * validated here. - */ - if (send_cmd->out.size > cxlds->payload_size) - return -EINVAL; + if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) + return -EPERM; - if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) - return -EPERM; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = CXL_MEM_COMMAND_ID_RAW, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = send_cmd->raw.opcode + }; - memcpy(out_cmd, &temp, sizeof(temp)); + return 0; +} - return 0; - } +static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, + const struct cxl_send_command *send_cmd, + struct cxl_dev_state *cxlds) +{ + struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id]; + const struct cxl_command_info *info = &c->info; if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK) return -EINVAL; @@ -287,10 +253,6 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, if (send_cmd->in.rsvd || send_cmd->out.rsvd) return -EINVAL; - /* Convert user's command into the internal representation */ - c = &cxl_mem_commands[send_cmd->id]; - info = &c->info; - /* Check that the command is enabled for hardware */ if (!test_bit(info->id, cxlds->enabled_cmds)) return -ENOTTY; @@ -307,15 +269,58 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, if (info->size_out >= 0 && send_cmd->out.size < info->size_out) return -ENOMEM; - memcpy(out_cmd, c, sizeof(*c)); - out_cmd->info.size_in = send_cmd->in.size; + *mem_cmd = (struct cxl_mem_command) { + .info = { + .id = info->id, + .flags = info->flags, + .size_in = send_cmd->in.size, + .size_out = send_cmd->out.size, + }, + .opcode = c->opcode + }; + + return 0; +} + +/** + * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. + * @cxlds: The device data for the operation + * @send_cmd: &struct cxl_send_command copied in from userspace. + * @out_cmd: Sanitized and populated &struct cxl_mem_command. + * + * Return: + * * %0 - @out_cmd is ready to send. + * * %-ENOTTY - Invalid command specified. + * * %-EINVAL - Reserved fields or invalid values were used. + * * %-ENOMEM - Input or output buffer wasn't sized properly. + * * %-EPERM - Attempted to use a protected command. + * * %-EBUSY - Kernel has claimed exclusive access to this opcode + * + * The result of this command is a fully validated command in @out_cmd that is + * safe to send to the hardware. + * + * See handle_mailbox_cmd_from_user() + */ +static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, + const struct cxl_send_command *send_cmd, + struct cxl_mem_command *out_cmd) +{ + if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) + return -ENOTTY; + /* - * XXX: out_cmd->info.size_out will be controlled by the driver, and the - * specified number of bytes @send_cmd->out.size will be copied back out - * to userspace. + * The user can never specify an input payload larger than what hardware + * supports, but output can be arbitrarily large (simply write out as + * much data as the hardware provides). */ + if (send_cmd->in.size > cxlds->payload_size) + return -EINVAL; - return 0; + /* Sanitize and construct a cxl_mem_command */ + if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) + return cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); + else + return cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); } int cxl_query_cmd(struct cxl_memdev *cxlmd, From 39ed8da4f341c8dbe9c15f716d3a328269a07fc7 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:12 -0700 Subject: [PATCH 02/47] cxl/mbox: Move raw command warning to raw command validation This move serves two purposes: 1) Emit the warning in the raw command validation path, and 2) Remove the dependency on the struct cxl_mem_command in handle_mailbox_cmd_from_user() in preparation for a refactor of that function. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/df5f0e0ec8afa1f75299aa86b4226ab4479ef325.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 1f2175f0f6879..39c8f6ded175b 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -225,6 +225,8 @@ static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode)) return -EPERM; + dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n"); + *mem_cmd = (struct cxl_mem_command) { .info = { .id = CXL_MEM_COMMAND_ID_RAW, @@ -417,9 +419,6 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, cxl_command_names[cmd->info.id].name, mbox_cmd.opcode, cmd->info.size_in); - dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW, - "raw command path used\n"); - rc = cxlds->mbox_send(cxlds, &mbox_cmd); if (rc) goto out; From 63cf60b7e0a556b3542e6e915dfe9c93eaa559fd Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:13 -0700 Subject: [PATCH 03/47] cxl/mbox: Move build of user mailbox cmd to a helper functions In preparation for moving the construction of a mailbox command to the validation path, extract the work into a helper functions. Signed-off-by: Alison Schofield Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/493d7618a846d787c3ae28778935ca35e2b85eed.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 70 ++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 39c8f6ded175b..99b933fb2d9bf 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -207,6 +207,44 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) return true; } +static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, + struct cxl_dev_state *cxlds, u16 opcode, + size_t in_size, size_t out_size, u64 in_payload) +{ + *mbox = (struct cxl_mbox_cmd) { + .opcode = opcode, + .size_in = in_size, + }; + + if (in_size) { + mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), + in_size); + if (!mbox->payload_in) + return PTR_ERR(mbox->payload_in); + } + + /* Prepare to handle a full payload for variable sized output */ + if (out_size < 0) + mbox->size_out = cxlds->payload_size; + else + mbox->size_out = out_size; + + if (mbox->size_out) { + mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL); + if (!mbox->payload_out) { + kvfree(mbox->payload_in); + return -ENOMEM; + } + } + return 0; +} + +static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox) +{ + kvfree(mbox->payload_in); + kvfree(mbox->payload_out); +} + static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd, const struct cxl_send_command *send_cmd, struct cxl_dev_state *cxlds) @@ -390,27 +428,14 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, s32 *size_out, u32 *retval) { struct device *dev = cxlds->dev; - struct cxl_mbox_cmd mbox_cmd = { - .opcode = cmd->opcode, - .size_in = cmd->info.size_in, - .size_out = cmd->info.size_out, - }; + struct cxl_mbox_cmd mbox_cmd; int rc; - if (cmd->info.size_out) { - mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL); - if (!mbox_cmd.payload_out) - return -ENOMEM; - } - - if (cmd->info.size_in) { - mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload), - cmd->info.size_in); - if (IS_ERR(mbox_cmd.payload_in)) { - kvfree(mbox_cmd.payload_out); - return PTR_ERR(mbox_cmd.payload_in); - } - } + rc = cxl_mbox_cmd_ctor(&mbox_cmd, cxlds, cmd->opcode, + cmd->info.size_in, cmd->info.size_out, + in_payload); + if (rc) + return rc; dev_dbg(dev, "Submitting %s command for user\n" @@ -442,8 +467,7 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, *retval = mbox_cmd.return_code; out: - kvfree(mbox_cmd.payload_in); - kvfree(mbox_cmd.payload_out); + cxl_mbox_cmd_dtor(&mbox_cmd); return rc; } @@ -464,10 +488,6 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (rc) return rc; - /* Prepare to handle a full payload for variable sized output */ - if (c.info.size_out < 0) - c.info.size_out = cxlds->payload_size; - rc = handle_mailbox_cmd_from_user(cxlds, &c, send.in.payload, send.out.payload, &send.out.size, &send.retval); From 9ae016aeb722504703c67e6e59c673719868f467 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:14 -0700 Subject: [PATCH 04/47] cxl/mbox: Construct a users cxl_mbox_cmd in the validation path This is a step in refactoring the handling of user space mailbox commands. The intent is to have all the validation work originate in cxl_validate_cmd_from_user(). Move the construction and validation of a mailbox command to the validation path. Continue to pass both the out_cmd and the mbox_cmd until handle_mbox_cmd_from_user() learns how to use a mbox_cmd param. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/c9fbdad968a2b619f9108bb6c37cef1a853cdf5a.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 99b933fb2d9bf..47f39c6131c44 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -324,6 +324,7 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, /** * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND. + * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. * @cxlds: The device data for the operation * @send_cmd: &struct cxl_send_command copied in from userspace. * @out_cmd: Sanitized and populated &struct cxl_mem_command. @@ -341,10 +342,13 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * * See handle_mailbox_cmd_from_user() */ -static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, +static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, + struct cxl_dev_state *cxlds, const struct cxl_send_command *send_cmd, struct cxl_mem_command *out_cmd) { + int rc; + if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) return -ENOTTY; @@ -358,9 +362,17 @@ static int cxl_validate_cmd_from_user(struct cxl_dev_state *cxlds, /* Sanitize and construct a cxl_mem_command */ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) - return cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); else - return cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); + + if (rc) + return rc; + + /* Sanitize and construct a cxl_mbox_cmd */ + return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, out_cmd->opcode, + out_cmd->info.size_in, out_cmd->info.size_out, + send_cmd->in.payload); } int cxl_query_cmd(struct cxl_memdev *cxlmd, @@ -477,6 +489,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) struct device *dev = &cxlmd->dev; struct cxl_send_command send; struct cxl_mem_command c; + struct cxl_mbox_cmd mbox_cmd; int rc; dev_dbg(dev, "Send IOCTL\n"); @@ -484,7 +497,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(cxlmd->cxlds, &send, &c); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send, &c); if (rc) return rc; From 82b8ba29538e5dae0a4e481dbbea4d5015c683af Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:15 -0700 Subject: [PATCH 05/47] cxl/mbox: Remove dependency on cxl_mem_command for a debug msg In preparation for removing access to struct cxl_mem_command, change this debug message to use cxl_mbox_cmd fields instead. Retrieve the pretty command name from cxl_mbox_cmd using a new opcode to command name helper. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/57265751d336a6e95f5ca31a9c77189408b05742.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 47f39c6131c44..8c59c6959c15b 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -127,6 +127,17 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode) return NULL; } +static const char *cxl_mem_opcode_to_name(u16 opcode) +{ + struct cxl_mem_command *c; + + c = cxl_mem_find_command(opcode); + if (!c) + return NULL; + + return cxl_command_names[c->info.id].name; +} + /** * cxl_mbox_send_cmd() - Send a mailbox command to a device. * @cxlds: The device data for the operation @@ -452,9 +463,9 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, dev_dbg(dev, "Submitting %s command for user\n" "\topcode: %x\n" - "\tsize: %ub\n", - cxl_command_names[cmd->info.id].name, mbox_cmd.opcode, - cmd->info.size_in); + "\tsize: %zx\n", + cxl_mem_opcode_to_name(mbox_cmd.opcode), + mbox_cmd.opcode, mbox_cmd.size_in); rc = cxlds->mbox_send(cxlds, &mbox_cmd); if (rc) From d97fe8eec2b895b2aa2f7bb3d55e7b88bc2d53f3 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:16 -0700 Subject: [PATCH 06/47] cxl/mbox: Make handle_mailbox_cmd_from_user() use a mbox param Previously, handle_mailbox_cmd_from_user(), constructed the mailbox command and dispatched it to the hardware. The construction work has moved to the validation path. handle_mailbox_cmd_from_user() now expects a fully validated mbox param. Make it's caller, cxl_send_cmd(), deliver it. Update the comments and dereferencing of the new mbox parameter. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/77050ba512d6c30eccf7505467509e460dd325a0.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 44 ++++++++++++++++------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 8c59c6959c15b..b4bef3c50ee3f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -423,8 +423,7 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, /** * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace. * @cxlds: The device data for the operation - * @cmd: The validated command. - * @in_payload: Pointer to userspace's input payload. + * @mbox_cmd: The validated mailbox command. * @out_payload: Pointer to userspace's output payload. * @size_out: (Input) Max payload size to copy out. * (Output) Payload size hardware generated. @@ -439,35 +438,27 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd, * * %-EINTR - Mailbox acquisition interrupted. * * %-EXXX - Transaction level failures. * - * Creates the appropriate mailbox command and dispatches it on behalf of a - * userspace request. The input and output payloads are copied between - * userspace. + * Dispatches a mailbox command on behalf of a userspace request. + * The output payload is copied to userspace. * * See cxl_send_cmd(). */ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, - const struct cxl_mem_command *cmd, - u64 in_payload, u64 out_payload, - s32 *size_out, u32 *retval) + struct cxl_mbox_cmd *mbox_cmd, + u64 out_payload, s32 *size_out, + u32 *retval) { struct device *dev = cxlds->dev; - struct cxl_mbox_cmd mbox_cmd; int rc; - rc = cxl_mbox_cmd_ctor(&mbox_cmd, cxlds, cmd->opcode, - cmd->info.size_in, cmd->info.size_out, - in_payload); - if (rc) - return rc; - dev_dbg(dev, "Submitting %s command for user\n" "\topcode: %x\n" "\tsize: %zx\n", - cxl_mem_opcode_to_name(mbox_cmd.opcode), - mbox_cmd.opcode, mbox_cmd.size_in); + cxl_mem_opcode_to_name(mbox_cmd->opcode), + mbox_cmd->opcode, mbox_cmd->size_in); - rc = cxlds->mbox_send(cxlds, &mbox_cmd); + rc = cxlds->mbox_send(cxlds, mbox_cmd); if (rc) goto out; @@ -476,21 +467,21 @@ static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds, * to userspace. While the payload may have written more output than * this it will have to be ignored. */ - if (mbox_cmd.size_out) { - dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out, + if (mbox_cmd->size_out) { + dev_WARN_ONCE(dev, mbox_cmd->size_out > *size_out, "Invalid return size\n"); if (copy_to_user(u64_to_user_ptr(out_payload), - mbox_cmd.payload_out, mbox_cmd.size_out)) { + mbox_cmd->payload_out, mbox_cmd->size_out)) { rc = -EFAULT; goto out; } } - *size_out = mbox_cmd.size_out; - *retval = mbox_cmd.return_code; + *size_out = mbox_cmd->size_out; + *retval = mbox_cmd->return_code; out: - cxl_mbox_cmd_dtor(&mbox_cmd); + cxl_mbox_cmd_dtor(mbox_cmd); return rc; } @@ -512,9 +503,8 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (rc) return rc; - rc = handle_mailbox_cmd_from_user(cxlds, &c, send.in.payload, - send.out.payload, &send.out.size, - &send.retval); + rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload, + &send.out.size, &send.retval); if (rc) return rc; From 2dd5600a0e4e1aca8541e1c5bb2bc26bc0441d4d Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:17 -0700 Subject: [PATCH 07/47] cxl/mbox: Move cxl_mem_command param to a local variable cxl_validate_command_from_user() is now the single point of validation for mailbox commands coming from user space. Previously, it returned a a cxl_mem_command, but that was not sufficient when validation of the actual mailbox command became a requirement. Now, it returns a fully validated cxl_mbox_cmd. Remove the extraneous cxl_mem_command parameter. Define and use a local version only. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/c11a437896d914daf36f5ac8ec62f999c5ec2da7.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index b4bef3c50ee3f..704d6b313144f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -338,7 +338,6 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd. * @cxlds: The device data for the operation * @send_cmd: &struct cxl_send_command copied in from userspace. - * @out_cmd: Sanitized and populated &struct cxl_mem_command. * * Return: * * %0 - @out_cmd is ready to send. @@ -348,16 +347,14 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, * * %-EPERM - Attempted to use a protected command. * * %-EBUSY - Kernel has claimed exclusive access to this opcode * - * The result of this command is a fully validated command in @out_cmd that is + * The result of this command is a fully validated command in @mbox_cmd that is * safe to send to the hardware. - * - * See handle_mailbox_cmd_from_user() */ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, struct cxl_dev_state *cxlds, - const struct cxl_send_command *send_cmd, - struct cxl_mem_command *out_cmd) + const struct cxl_send_command *send_cmd) { + struct cxl_mem_command mem_cmd; int rc; if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX) @@ -373,16 +370,16 @@ static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd, /* Sanitize and construct a cxl_mem_command */ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) - rc = cxl_to_mem_cmd_raw(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds); else - rc = cxl_to_mem_cmd(out_cmd, send_cmd, cxlds); + rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds); if (rc) return rc; /* Sanitize and construct a cxl_mbox_cmd */ - return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, out_cmd->opcode, - out_cmd->info.size_in, out_cmd->info.size_out, + return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode, + mem_cmd.info.size_in, mem_cmd.info.size_out, send_cmd->in.payload); } @@ -490,7 +487,6 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxlmd->dev; struct cxl_send_command send; - struct cxl_mem_command c; struct cxl_mbox_cmd mbox_cmd; int rc; @@ -499,7 +495,7 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s) if (copy_from_user(&send, s, sizeof(send))) return -EFAULT; - rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send, &c); + rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send); if (rc) return rc; From 6179045ccc0c6229dc449afc1701dc7fbd40571f Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:18 -0700 Subject: [PATCH 08/47] cxl/mbox: Block immediate mode in SET_PARTITION_INFO command User space may send the SET_PARTITION_INFO mailbox command using the IOCTL interface. Inspect the input payload and fail if the immediate flag is set. This is the first instance of the driver inspecting an input payload from user space. Assume there will be more such cases and implement with an extensible helper. In order for the kernel to react to an immediate partition change it needs to assert that the change will not affect any active decode. At a minimum this requires validating that the device is using HDM decoders instead of the CXL DVSEC for decode, and that none of the active HDM decoders are affected by the partition change. For now, just fail until that support arrives. Signed-off-by: Alison Schofield Reviewed-by: Jonathan Cameron Reviewed-by: Dan Williams Link: https://lore.kernel.org/r/241821186c363833980adbc389e2c547bc5a6395.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 41 +++++++++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 7 +++++++ 2 files changed, 48 insertions(+) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 704d6b313144f..839207b627a84 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -218,6 +218,40 @@ static bool cxl_mem_raw_command_allowed(u16 opcode) return true; } +/** + * cxl_payload_from_user_allowed() - Check contents of in_payload. + * @opcode: The mailbox command opcode. + * @payload_in: Pointer to the input payload passed in from user space. + * + * Return: + * * true - payload_in passes check for @opcode. + * * false - payload_in contains invalid or unsupported values. + * + * The driver may inspect payload contents before sending a mailbox + * command from user space to the device. The intent is to reject + * commands with input payloads that are known to be unsafe. This + * check is not intended to replace the users careful selection of + * mailbox command parameters and makes no guarantee that the user + * command will succeed, nor that it is appropriate. + * + * The specific checks are determined by the opcode. + */ +static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) +{ + switch (opcode) { + case CXL_MBOX_OP_SET_PARTITION_INFO: { + struct cxl_mbox_set_partition_info *pi = payload_in; + + if (pi->flags && CXL_SET_PARTITION_IMMEDIATE_FLAG) + return false; + break; + } + default: + break; + } + return true; +} + static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, struct cxl_dev_state *cxlds, u16 opcode, size_t in_size, size_t out_size, u64 in_payload) @@ -232,6 +266,13 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, in_size); if (!mbox->payload_in) return PTR_ERR(mbox->payload_in); + + if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { + dev_dbg(cxlds->dev, "%s: input payload not allowed\n", + cxl_mem_opcode_to_name(opcode)); + kvfree(mbox->payload_in); + return -EBUSY; + } } /* Prepare to handle a full payload for variable sized output */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 5d33ce24fe09f..df8dad028e431 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -262,6 +262,13 @@ struct cxl_mbox_set_lsa { u8 data[]; } __packed; +struct cxl_mbox_set_partition_info { + __le64 volatile_capacity; + u8 flags; +} __packed; + +#define CXL_SET_PARTITION_IMMEDIATE_FLAG BIT(0) + /** * struct cxl_mem_command - Driver representation of a memory device command * @info: Command information as it exists for the UAPI From 6aa657f416b65f23d7a3c9d04f144b1c4aa2ebc1 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 30 Mar 2022 18:27:19 -0700 Subject: [PATCH 09/47] cxl/pmem: Remove CXL SET_PARTITION_INFO from exclusive_cmds list With SET_PARTITION_INFO on the exclusive_cmds list for the CXL_PMEM driver, userspace cannot execute a set-partition command without first unbinding the pmem driver from the device. When userspace requests a partition change to take effect on the next reboot this unbind requirement is unnecessarily restrictive. The driver does not need to enforce an unbind because partitions will not change until the next reboot. Of course, userspace still needs to be aware that changing the size of persistent capacity on the next reboot will result in the loss of data stored. That can happen regardless of whether it is presently bound at the time of issuing the set-partition command. When userspace requests a partition change to take effect immediately, restrictions are needed. The CXL_MEM driver currently blocks the usage of immediate mode, making the presence of SET_PARTITION_INFO, in this exclusive commands list, redundant. In the future, when the CXL_MEM driver adds support for immediate changes to device partitions it will ensure that the partition change will not affect any active decode. That means the work will not fall right back here, onto the CXL_PMEM driver. Signed-off-by: Alison Schofield Link: https://lore.kernel.org/r/accc6abc878f0662093b81490a1a052f2ff6f06e.1648687552.git.alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/pmem.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 15ad666ab03e3..404018dd99e6e 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -344,7 +344,6 @@ static __init int cxl_pmem_init(void) { int rc; - set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds); From ee92c7e261fd4b58ed5991a776ae9b25e9a5e030 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:13 -0700 Subject: [PATCH 10/47] cxl/mbox: Drop mbox_mutex comment ... we have lockdep for this. Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-2-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 839207b627a84..187590c9844c3 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -147,7 +147,7 @@ static const char *cxl_mem_opcode_to_name(u16 opcode) * @out: Caller allocated buffer for the output. * @out_size: Expected size of output. * - * Context: Any context. Will acquire and release mbox_mutex. + * Context: Any context. * Return: * * %>=0 - Number of bytes returned in @out. * * %-E2BIG - Payload is too large for hardware. From cbe83a2052682c6f57d45f76fe7fea4bf254acd9 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:14 -0700 Subject: [PATCH 11/47] cxl/pci: Use CXL_MBOX_SUCCESS to check against mbox_cmd return code Also mention the need for the caller to check against any errors from the hardware in return_code. Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-3-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 3f2182d668292..94a91048e2f68 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -177,9 +177,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); - if (mbox_cmd->return_code != 0) { + if (mbox_cmd->return_code != CXL_MBOX_SUCCESS) { dev_dbg(dev, "Mailbox operation had an error\n"); - return 0; + return 0; /* completed but caller must check return_code */ } /* #7 */ From 92fcc1abab095dceb2337444f79875c8a85063df Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:15 -0700 Subject: [PATCH 12/47] cxl/mbox: Improve handling of mbox_cmd hw return codes Upon a completed command the caller is still expected to check the actual return_code register to ensure it succeed. This adds, per the spec, the potential command return codes. It maps the hardware return code with the kernel's errno style, and by default continues to use -ENXIO (Command completed, but device reported an error). Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-4-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- drivers/cxl/cxlmem.h | 53 ++++++++++++++++++++++++++++++++++++++++- drivers/cxl/pci.c | 2 +- 3 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 187590c9844c3..5adc0ddb1737b 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -181,7 +181,7 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, return rc; /* TODO: Map return code to proper kernel style errno */ - if (mbox_cmd.return_code != CXL_MBOX_SUCCESS) + if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS) return -ENXIO; /* diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index df8dad028e431..243dd86a8b46b 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -85,9 +85,60 @@ struct cxl_mbox_cmd { size_t size_in; size_t size_out; u16 return_code; -#define CXL_MBOX_SUCCESS 0 }; +/* + * Per CXL 2.0 Section 8.2.8.4.5.1 + */ +#define CMD_CMD_RC_TABLE \ + C(SUCCESS, 0, NULL), \ + C(BACKGROUND, -ENXIO, "background cmd started successfully"), \ + C(INPUT, -ENXIO, "cmd input was invalid"), \ + C(UNSUPPORTED, -ENXIO, "cmd is not supported"), \ + C(INTERNAL, -ENXIO, "internal device error"), \ + C(RETRY, -ENXIO, "temporary error, retry once"), \ + C(BUSY, -ENXIO, "ongoing background operation"), \ + C(MEDIADISABLED, -ENXIO, "media access is disabled"), \ + C(FWINPROGRESS, -ENXIO, "one FW package can be transferred at a time"), \ + C(FWOOO, -ENXIO, "FW package content was transferred out of order"), \ + C(FWAUTH, -ENXIO, "FW package authentication failed"), \ + C(FWSLOT, -ENXIO, "FW slot is not supported for requested operation"), \ + C(FWROLLBACK, -ENXIO, "rolled back to the previous active FW"), \ + C(FWRESET, -ENXIO, "FW failed to activate, needs cold reset"), \ + C(HANDLE, -ENXIO, "one or more Event Record Handles were invalid"), \ + C(PADDR, -ENXIO, "physical address specified is invalid"), \ + C(POISONLMT, -ENXIO, "poison injection limit has been reached"), \ + C(MEDIAFAILURE, -ENXIO, "permanent issue with the media"), \ + C(ABORT, -ENXIO, "background cmd was aborted by device"), \ + C(SECURITY, -ENXIO, "not valid in the current security state"), \ + C(PASSPHRASE, -ENXIO, "phrase doesn't match current set passphrase"), \ + C(MBUNSUPPORTED, -ENXIO, "unsupported on the mailbox it was issued on"),\ + C(PAYLOADLEN, -ENXIO, "invalid payload length") + +#undef C +#define C(a, b, c) CXL_MBOX_CMD_RC_##a +enum { CMD_CMD_RC_TABLE }; +#undef C +#define C(a, b, c) { b, c } +struct cxl_mbox_cmd_rc { + int err; + const char *desc; +}; + +static const +struct cxl_mbox_cmd_rc cxl_mbox_cmd_rctable[] ={ CMD_CMD_RC_TABLE }; +#undef C + +static inline const char *cxl_mbox_cmd_rc2str(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].desc; +} + +static inline int cxl_mbox_cmd_rc2errno(struct cxl_mbox_cmd *mbox_cmd) +{ + return cxl_mbox_cmd_rctable[mbox_cmd->return_code].err; +} + /* * CXL 2.0 - Memory capacity multiplier * See Section 8.2.9.5 diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 94a91048e2f68..88fcd6cc38a36 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -177,7 +177,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, mbox_cmd->return_code = FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); - if (mbox_cmd->return_code != CXL_MBOX_SUCCESS) { + if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { dev_dbg(dev, "Mailbox operation had an error\n"); return 0; /* completed but caller must check return_code */ } From c43e036d6f861f4c68b50eea49ab55b539eaab02 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 3 Apr 2022 19:12:16 -0700 Subject: [PATCH 13/47] cxl/mbox: Use new return_code handling Use the global cxl_mbox_cmd_rc table to improve debug messaging in __cxl_pci_mbox_send_cmd() and allow cxl_mbox_send_cmd() to map to proper kernel style errno codes - this patch continues to use -ENXIO only so no change in semantics. Signed-off-by: Davidlohr Bueso Reviewed by: Adam Manzanares Link: https://lore.kernel.org/r/20220404021216.66841-5-dave@stgolabs.net Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 3 +-- drivers/cxl/pci.c | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 5adc0ddb1737b..8a8388599a85f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -180,9 +180,8 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, if (rc) return rc; - /* TODO: Map return code to proper kernel style errno */ if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS) - return -ENXIO; + return cxl_mbox_cmd_rc2errno(&mbox_cmd); /* * Variable sized commands can't be validated and so it's up to the diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 88fcd6cc38a36..39694cc63381d 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -178,7 +178,8 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { - dev_dbg(dev, "Mailbox operation had an error\n"); + dev_dbg(dev, "Mailbox operation had an error: %s\n", + cxl_mbox_cmd_rc2str(mbox_cmd)); return 0; /* completed but caller must check return_code */ } From e08063fb87944b1db963e94b833608318179708d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:22 -0700 Subject: [PATCH 14/47] cxl/mem: Drop DVSEC vs EFI Memory Map sanity check When the driver finds legacy DVSEC ranges active on a CXL Memory Expander it indicates that platform firmware is not aware of, or is deliberately disabling common CXL 2.0 operation. In this case Linux generally has no choice, but to leave the device alone. The driver attempts to validate that the DVSEC range is in the EFI memory map. Remove that logic since there is no requirement that the BIOS publish DVSEC ranges in the EFI Memory Map. In the future the driver will want to permanently reserve this capacity out of the available CFMWS capacity and hide it from request_free_mem_region(), but it serves no purpose to warn about the range not appearing in the EFI Memory Map. Reviewed-by: Davidlohr Bueso Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Link: https://lore.kernel.org/r/164730734246.3806189.13995924771963139898.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 49a4b1c47299f..cd4e8bba82aaa 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -158,30 +158,8 @@ static int cxl_mem_probe(struct device *dev) * is no use in trying to manage those. */ if (!cxl_dvsec_decode_init(cxlds)) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - int i; - - /* */ - for (i = 0; i < 2; i++) { - u64 base, size; - - /* - * Give a nice warning to the user that BIOS has really - * botched things for them if it didn't place DVSEC - * ranges in the memory map. - */ - base = info->dvsec_range[i].start; - size = range_len(&info->dvsec_range[i]); - if (size && !region_intersects(base, size, - IORESOURCE_SYSTEM_RAM, - IORES_DESC_NONE)) { - dev_err(dev, - "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n", - base, base + size - 1); - } - } dev_err(dev, - "Active DVSEC range registers in use. Will not bind.\n"); + "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; } From e39f9be08d9dfe685c8a325ac1755c04f383effc Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:28 -0700 Subject: [PATCH 15/47] cxl/pci: Add debug for DVSEC range init failures In preparation for not treating DVSEC range initialization failures as fatal to cxl_pci_probe() add individual dev_dbg() statements for each of the major failure reasons in cxl_dvsec_ranges(). The rationale for cxl_dvsec_ranges() failure not being fatal is that there is still value for cxl_pci to enable mailbox operations even if CXL.mem operation is disabled. Reviewed-by: Jonathan Cameron Reviewed-by: Ben Widawsky Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730734812.3806189.2726330688692684104.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 39694cc63381d..1cb2557d68b74 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -467,12 +467,15 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) { struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct device *dev = &pdev->dev; int d = cxlds->cxl_dvsec; int hdm_count, rc, i; u16 cap, ctrl; - if (!d) + if (!d) { + dev_dbg(dev, "No DVSEC Capability\n"); return -ENXIO; + } rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); if (rc) @@ -482,8 +485,10 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) if (rc) return rc; - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + dev_dbg(dev, "Not MEM Capable\n"); return -ENXIO; + } /* * It is not allowed by spec for MEM.capable to be set and have 0 legacy @@ -496,8 +501,10 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) return -EINVAL; rc = wait_for_valid(cxlds); - if (rc) + if (rc) { + dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); return rc; + } info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); From fbaf2b079d2a0a9c7114fbd4d1c0f3dd7a3cb3ad Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:33 -0700 Subject: [PATCH 16/47] cxl/mem: Make cxl_dvsec_range() init failure fatal In preparation for the cxl_pci driver to continue operation after cxl_dvsec_range() failure, update cxl_mem to check for negative error codes in info->ranges. Treat that condition as fatal regardless of the state of the HDM configuration since cxl_mem needs positive confirmation that legacy ranges were not established by platform firmware or another agent. Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730735324.3806189.4167509857771192422.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index cd4e8bba82aaa..50704deb2ff0a 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -88,6 +88,9 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) void __iomem *crb; u32 global_ctrl; + if (info->ranges < 0) + return false; + /* map hdm decoder */ crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); if (!crb) { From 36bfc6ad508af38f212cf5a38147d867fb3f80a8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:38 -0700 Subject: [PATCH 17/47] cxl/pci: Make cxl_dvsec_ranges() failure not fatal to cxl_pci cxl_dvsec_ranges(), the helper for enumerating the presence of an active legacy CXL.mem configuration on a CXL 2.0 Memory Expander, is not fatal for cxl_pci because there is still value to enable mailbox operations even if CXL.mem operation is disabled. Recall that the reason cxl_pci does this initialization and not cxl_mem is to preserve the useful property (for unit testing) that cxl_mem is cxl_memdev + mmio generic, and does not require access to a 'struct pci_dev' to issue config cycles. Update 'struct cxl_endpoint_dvsec_info' to carry either a positive number of non-zero size legacy CXL DVSEC ranges, or the negative error code from __cxl_dvsec_ranges() in its @ranges member. Reported-by: Krzysztof Zach Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730735869.3806189.4032428192652531946.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 1cb2557d68b74..e7ab9a34d7189 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -463,13 +463,18 @@ static int wait_for_media_ready(struct cxl_dev_state *cxlds) return 0; } -static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +/* + * Return positive number of non-zero ranges on success and a negative + * error code on failure. The cxl_mem driver depends on ranges == 0 to + * init HDM operation. + */ +static int __cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int hdm_count, rc, i, ranges = 0; struct device *dev = &pdev->dev; int d = cxlds->cxl_dvsec; - int hdm_count, rc, i; u16 cap, ctrl; if (!d) { @@ -546,10 +551,17 @@ static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) }; if (size) - info->ranges++; + ranges++; } - return 0; + return ranges; +} + +static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + + info->ranges = __cxl_dvsec_ranges(cxlds, info); } static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -618,10 +630,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_dvsec_ranges(cxlds); - if (rc) - dev_warn(&pdev->dev, - "Failed to get DVSEC range information (%d)\n", rc); + cxl_dvsec_ranges(cxlds); cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) From 31e624a77e748e4ab7d5c9c3ddc46ba7735bd75e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 14 Mar 2022 18:22:44 -0700 Subject: [PATCH 18/47] cxl/mem: Rename cxl_dvsec_decode_init() to cxl_hdm_decode_init() cxl_dvsec_decode_init() is tasked with checking whether legacy DVSEC range based decode is in effect, or whether HDM can be enabled / already is enabled. As such it either succeeds or fails and that result is the return value. The @do_hdm_init variable is misleading in the case where HDM operation is already found to be active, so just call it @retval. Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164730736435.3806189.2537160791687837469.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 12 ++++++------ tools/testing/cxl/mock_mem.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 50704deb2ff0a..3baae1332760e 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -68,7 +68,7 @@ static int create_endpoint(struct cxl_memdev *cxlmd, } /** - * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint + * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint * @cxlds: Device state * * Additionally, enables global HDM decoding. Warning: don't call this outside @@ -79,12 +79,12 @@ static int create_endpoint(struct cxl_memdev *cxlmd, * decoders, or if it can not be determined if DVSEC Ranges are in use. * Otherwise, returns true. */ -__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct cxl_register_map map; struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, do_hdm_init = false; + bool global_enable, retval = false; void __iomem *crb; u32 global_ctrl; @@ -113,7 +113,7 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) goto out; } - do_hdm_init = true; + retval = true; /* * Permanently (for this boot at least) opt the device into HDM @@ -129,7 +129,7 @@ __mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) out: iounmap(crb); - return do_hdm_init; + return retval; } static int cxl_mem_probe(struct device *dev) @@ -160,7 +160,7 @@ static int cxl_mem_probe(struct device *dev) * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!cxl_dvsec_decode_init(cxlds)) { + if (!cxl_hdm_decode_init(cxlds)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c index d1dec5845139d..69946f678cfa0 100644 --- a/tools/testing/cxl/mock_mem.c +++ b/tools/testing/cxl/mock_mem.c @@ -4,7 +4,7 @@ #include struct cxl_dev_state; -bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { return true; } From 35ee1f499091c76bd5f5d52f5ef79c3568ac74a6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 8 Apr 2022 12:30:29 -0700 Subject: [PATCH 19/47] cxl/mem: Replace redundant debug message with a comment cxl_mem_probe() already emits a log message when HDM operation can not be established. Delete the similar one in cxl_hdm_decode_init(). What is less obvious is why global_ctrl being enabled makes positive values of info->ranges irrelevant, and the Linux behavior with respect to the spec recommendation to mirror CXL Range registers with HDM Decoder Base + Size registers. Cc: Ben Widawsky Reviewed-by: Jonathan Cameron Reviewed-by: Davidlohr Bueso Link: https://lore.kernel.org/r/164944616743.454665.7055846627973202403.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 3baae1332760e..43e73d259207c 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -107,11 +107,17 @@ __mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) global_ctrl = readl(crb + cmap->hdm_decoder.offset + CXL_HDM_DECODER_CTRL_OFFSET); global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; - if (!global_enable && info->ranges) { - dev_dbg(cxlds->dev, - "DVSEC ranges already programmed and HDM decoders not enabled.\n"); + + /* + * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base + * [High,Low] when HDM operation is enabled the range register values + * are ignored by the device, but the spec also recommends matching the + * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges + * are expected even though Linux does not require or maintain that + * match. + */ + if (!global_enable && info->ranges) goto out; - } retval = true; From 9ea4dcf49878bb9546b8fa9319dcbdc9b7ee20f8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 22 Apr 2022 15:58:11 -0700 Subject: [PATCH 20/47] PM: CXL: Disable suspend The CXL specification claims S3 support at a hardware level, but at a system software level there are some missing pieces. Section 9.4 (CXL 2.0) rightly claims that "CXL mem adapters may need aux power to retain memory context across S3", but there is no enumeration mechanism for the OS to determine if a given adapter has that support. Moreover the save state and resume image for the system may inadvertantly end up in a CXL device that needs to be restored before the save state is recoverable. I.e. a circular dependency that is not resolvable without a third party save-area. Arrange for the cxl_mem driver to fail S3 attempts. This still nominaly allows for suspend, but requires unbinding all CXL memory devices before the suspend to ensure the typical DRAM flow is taken. The cxl_mem unbind flow is intended to also tear down all CXL memory regions associated with a given cxl_memdev. It is reasonable to assume that any device participating in a System RAM range published in the EFI memory map is covered by aux power and save-area outside the device itself. So this restriction can be minimized in the future once pre-existing region enumeration support arrives, and perhaps a spec update to clarify if the EFI memory map is sufficent for determining the range of devices managed by platform-firmware for S3 support. Per Rafael, if the CXL configuration prevents suspend then it should fail early before tasks are frozen, and mem_sleep should stop showing 'mem' as an option [1]. Effectively CXL augments the platform suspend ->valid() op since, for example, the ACPI ops are not aware of the CXL / PCI dependencies. Given the split role of platform firmware vs OS provisioned CXL memory it is up to the cxl_mem driver to determine if the CXL configuration has elements that platform firmware may not be prepared to restore. Link: https://lore.kernel.org/r/CAJZ5v0hGVN_=3iU8OLpHY3Ak35T5+JcBM-qs8SbojKrpd0VXsA@mail.gmail.com [1] Cc: "Rafael J. Wysocki" Cc: Pavel Machek Cc: Len Brown Reviewed-by: Rafael J. Wysocki Link: https://lore.kernel.org/r/165066828317.3907920.5690432272182042556.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/Makefile | 2 +- drivers/cxl/Kconfig | 4 ++++ drivers/cxl/Makefile | 2 +- drivers/cxl/core/Makefile | 1 + drivers/cxl/core/suspend.c | 24 ++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 11 +++++++++++ drivers/cxl/mem.c | 22 +++++++++++++++++++++- include/linux/pm.h | 9 +++++++++ kernel/power/hibernate.c | 2 +- kernel/power/main.c | 5 ++++- kernel/power/suspend.c | 3 ++- 11 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 drivers/cxl/core/suspend.c diff --git a/drivers/Makefile b/drivers/Makefile index 020780b6b4d22..f735c49551432 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -72,9 +72,9 @@ obj-$(CONFIG_PARPORT) += parport/ obj-y += base/ block/ misc/ mfd/ nfc/ obj-$(CONFIG_LIBNVDIMM) += nvdimm/ obj-$(CONFIG_DAX) += dax/ -obj-$(CONFIG_CXL_BUS) += cxl/ obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/ obj-$(CONFIG_NUBUS) += nubus/ +obj-y += cxl/ obj-y += macintosh/ obj-y += scsi/ obj-y += nvme/ diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index b88ab956bb7cf..f64e3984689ff 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -98,4 +98,8 @@ config CXL_PORT default CXL_BUS tristate +config CXL_SUSPEND + def_bool y + depends on SUSPEND && CXL_MEM + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index ce267ef11d93c..a78270794150d 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_CXL_BUS) += core/ +obj-y += core/ obj-$(CONFIG_CXL_PCI) += cxl_pci.o obj-$(CONFIG_CXL_MEM) += cxl_mem.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 6d37cd78b1518..9d35085d25aff 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += cxl_core.o +obj-$(CONFIG_CXL_SUSPEND) += suspend.o ccflags-y += -I$(srctree)/drivers/cxl cxl_core-y := port.o diff --git a/drivers/cxl/core/suspend.c b/drivers/cxl/core/suspend.c new file mode 100644 index 0000000000000..a5984d96ea1d0 --- /dev/null +++ b/drivers/cxl/core/suspend.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include "cxlmem.h" + +static atomic_t mem_active; + +bool cxl_mem_active(void) +{ + return atomic_read(&mem_active) != 0; +} + +void cxl_mem_active_inc(void) +{ + atomic_inc(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_inc, CXL); + +void cxl_mem_active_dec(void) +{ + atomic_dec(&mem_active); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_active_dec, CXL); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 243dd86a8b46b..7235d2f976e5c 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -353,6 +353,17 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); +#ifdef CONFIG_CXL_SUSPEND +void cxl_mem_active_inc(void); +void cxl_mem_active_dec(void); +#else +static inline void cxl_mem_active_inc(void) +{ +} +static inline void cxl_mem_active_dec(void) +{ +} +#endif struct cxl_hdm { struct cxl_component_regs regs; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 43e73d259207c..1bd2e0b67f59f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -138,6 +138,11 @@ __mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) return retval; } +static void enable_suspend(void *data) +{ + cxl_mem_active_dec(); +} + static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); @@ -194,7 +199,22 @@ static int cxl_mem_probe(struct device *dev) out: cxl_device_unlock(&parent_port->dev); put_device(&parent_port->dev); - return rc; + + /* + * The kernel may be operating out of CXL memory on this device, + * there is no spec defined way to determine whether this device + * preserves contents over suspend, and there is no simple way + * to arrange for the suspend image to avoid CXL memory which + * would setup a circular dependency between PCI resume and save + * state restoration. + * + * TODO: support suspend when all the regions this device is + * hosting are locked and covered by the system address map, + * i.e. platform firmware owns restoring the HDM configuration + * that it locked. + */ + cxl_mem_active_inc(); + return devm_add_action_or_reset(dev, enable_suspend, NULL); } static struct cxl_driver cxl_mem_driver = { diff --git a/include/linux/pm.h b/include/linux/pm.h index e65b3ab28377b..7911c4c9a7be6 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -36,6 +36,15 @@ static inline void pm_vt_switch_unregister(struct device *dev) } #endif /* CONFIG_VT_CONSOLE_SLEEP */ +#ifdef CONFIG_CXL_SUSPEND +bool cxl_mem_active(void); +#else +static inline bool cxl_mem_active(void) +{ + return false; +} +#endif + /* * Device power management */ diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 938d5c78b4211..20a66bf9f4659 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -83,7 +83,7 @@ bool hibernation_available(void) { return nohibernate == 0 && !security_locked_down(LOCKDOWN_HIBERNATION) && - !secretmem_active(); + !secretmem_active() && !cxl_mem_active(); } /** diff --git a/kernel/power/main.c b/kernel/power/main.c index 7e646079fbeb2..3e6be1c33e0b2 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -127,7 +127,9 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, char *s = buf; suspend_state_t i; - for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) + for (i = PM_SUSPEND_MIN; i < PM_SUSPEND_MAX; i++) { + if (i >= PM_SUSPEND_MEM && cxl_mem_active()) + continue; if (mem_sleep_states[i]) { const char *label = mem_sleep_states[i]; @@ -136,6 +138,7 @@ static ssize_t mem_sleep_show(struct kobject *kobj, struct kobj_attribute *attr, else s += sprintf(s, "%s ", label); } + } /* Convert the last space to a newline if needed. */ if (s != buf) diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 6fcdee7e87a57..827075944d283 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -236,7 +236,8 @@ EXPORT_SYMBOL_GPL(suspend_valid_only_mem); static bool sleep_state_supported(suspend_state_t state) { - return state == PM_SUSPEND_TO_IDLE || valid_state(state); + return state == PM_SUSPEND_TO_IDLE || + (valid_state(state) && !cxl_mem_active()); } static int platform_suspend_prepare(suspend_state_t state) From 26f89535a5bb17915a2e1062c3999a2ee797c7b0 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 13 Apr 2022 22:12:46 -0700 Subject: [PATCH 21/47] cxl/mbox: Use type __u32 for mailbox payload sizes Payload sizes for mailbox commands are expected to be positive values coming from userspace. The documentation correctly describes these as always unsigned values. The mailbox and send structures that support the mailbox commands however, use __s32 types for the payloads. Replace __s32 with __u32 in the mailbox and send command structures and update usages. Kernel users of the interface already block all negative values and there is no known ability for userspace to have grown a dependency on submitting negative values to the kernel. The known user of the IOCTL, the CXL command line interface (cxl-cli) already enforces positive size values. A Smatch warning of a signedness uncovered this issue. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Alison Schofield Link: https://lore.kernel.org/r/20220414051246.1244575-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 28 +++++++++++++++------------- include/uapi/linux/cxl_mem.h | 14 +++++++------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 8a8388599a85f..d54a6d175fff5 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -35,6 +35,7 @@ static bool cxl_raw_allow_all; .flags = _flags, \ } +#define CXL_VARIABLE_PAYLOAD ~0U /* * This table defines the supported mailbox commands for the driver. This table * is made up of a UAPI structure. Non-negative values as parameters in the @@ -44,26 +45,26 @@ static bool cxl_raw_allow_all; static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = { CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE), #ifdef CONFIG_CXL_MEM_RAW_COMMANDS - CXL_CMD(RAW, ~0, ~0, 0), + CXL_CMD(RAW, CXL_VARIABLE_PAYLOAD, CXL_VARIABLE_PAYLOAD, 0), #endif - CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(GET_FW_INFO, 0, 0x50, 0), CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0), - CXL_CMD(GET_LSA, 0x8, ~0, 0), + CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0), - CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE), + CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE), CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0), - CXL_CMD(SET_LSA, ~0, 0, 0), + CXL_CMD(SET_LSA, CXL_VARIABLE_PAYLOAD, 0, 0), CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0), CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0), CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0), CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0), - CXL_CMD(GET_POISON, 0x10, ~0, 0), + CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0), CXL_CMD(INJECT_POISON, 0x8, 0, 0), CXL_CMD(CLEAR_POISON, 0x48, 0, 0), CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0), CXL_CMD(SCAN_MEDIA, 0x11, 0, 0), - CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0), + CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0), }; /* @@ -187,9 +188,10 @@ int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, * Variable sized commands can't be validated and so it's up to the * caller to do that if they wish. */ - if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size) - return -EIO; - + if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) { + if (mbox_cmd.size_out != out_size) + return -EIO; + } return 0; } EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL); @@ -275,7 +277,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, } /* Prepare to handle a full payload for variable sized output */ - if (out_size < 0) + if (out_size == CXL_VARIABLE_PAYLOAD) mbox->size_out = cxlds->payload_size; else mbox->size_out = out_size; @@ -353,11 +355,11 @@ static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd, return -EBUSY; /* Check the input buffer is the expected size */ - if (info->size_in >= 0 && info->size_in != send_cmd->in.size) + if (info->size_in != send_cmd->in.size) return -ENOMEM; /* Check the output buffer is at least large enough */ - if (info->size_out >= 0 && send_cmd->out.size < info->size_out) + if (send_cmd->out.size < info->size_out) return -ENOMEM; *mem_cmd = (struct cxl_mem_command) { diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h index 8d206f27bb6d7..c71021a2a9edf 100644 --- a/include/uapi/linux/cxl_mem.h +++ b/include/uapi/linux/cxl_mem.h @@ -68,8 +68,8 @@ static const struct { * struct cxl_command_info - Command information returned from a query. * @id: ID number for the command. * @flags: Flags that specify command behavior. - * @size_in: Expected input size, or -1 if variable length. - * @size_out: Expected output size, or -1 if variable length. + * @size_in: Expected input size, or ~0 if variable length. + * @size_out: Expected output size, or ~0 if variable length. * * Represents a single command that is supported by both the driver and the * hardware. This is returned as part of an array from the query ioctl. The @@ -78,7 +78,7 @@ static const struct { * * - @id = 10 * - @flags = 0 - * - @size_in = -1 + * - @size_in = ~0 * - @size_out = 0 * * See struct cxl_mem_query_commands. @@ -89,8 +89,8 @@ struct cxl_command_info { __u32 flags; #define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0) - __s32 size_in; - __s32 size_out; + __u32 size_in; + __u32 size_out; }; /** @@ -169,13 +169,13 @@ struct cxl_send_command { __u32 retval; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } in; struct { - __s32 size; + __u32 size; __u32 rsvd; __u64 payload; } out; From 280302f0e8f6919f0c591753ea21906d77797746 Mon Sep 17 00:00:00 2001 From: Alison Schofield Date: Wed, 6 Apr 2022 18:09:15 -0700 Subject: [PATCH 22/47] cxl/mbox: Replace NULL check with IS_ERR() after vmemdup_user() vmemdup_user() returns an ERR_PTR() on failure. Use IS_ERR() to check the return value. Reported-by: kernel test robot Reported-by: Dan Carpenter Signed-off-by: Alison Schofield Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20220407010915.1211258-1-alison.schofield@intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index d54a6d175fff5..731cb43b570e2 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -265,7 +265,7 @@ static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox, if (in_size) { mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload), in_size); - if (!mbox->payload_in) + if (IS_ERR(mbox->payload_in)) return PTR_ERR(mbox->payload_in); if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) { From 35e01667c84b1e16060020c3a13099447e61c822 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 28 Apr 2022 12:38:25 +0300 Subject: [PATCH 23/47] cxl/mbox: fix logical vs bitwise typo This should be bitwise & instead of &&. Fixes: 6179045ccc0c ("cxl/mbox: Block immediate mode in SET_PARTITION_INFO command") Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/YmpgkbbQ1Yxu36uO@kili Signed-off-by: Dan Williams --- drivers/cxl/core/mbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 731cb43b570e2..54f434733b562 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -243,7 +243,7 @@ static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in) case CXL_MBOX_OP_SET_PARTITION_INFO: { struct cxl_mbox_set_partition_info *pi = payload_in; - if (pi->flags && CXL_SET_PARTITION_IMMEDIATE_FLAG) + if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG) return false; break; } From cc10eee95204579fcd66fd5965073fdcbf629676 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 13 Apr 2022 01:36:16 -0600 Subject: [PATCH 24/47] PCI/ACPI: add a helper for retrieving _OSC Control DWORDs During _OSC negotiation, when the 'Control' DWORD is needed from the result buffer after running _OSC, a couple of places performed manual pointer arithmetic to offset into the right spot in the raw buffer. Add a acpi_osc_ctx_get_pci_control() helper to use the #define'd DWORD offsets to fetch the DWORDs needed from @acpi_osc_context, and replace the above instances of the open-coded arithmetic. Cc: "Rafael J. Wysocki" Suggested-by: Davidlohr Bueso Acked-by: Rafael J. Wysocki Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Reviewed by: Adam Manzanares Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-2-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/bus.c | 2 +- drivers/acpi/pci_root.c | 2 +- include/linux/acpi.h | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 3e58b613a2c41..7658acbbb2bdd 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -425,7 +425,7 @@ static void acpi_bus_osc_negotiate_usb_control(void) } osc_sb_native_usb4_control = - control & ((u32 *)context.ret.pointer)[OSC_CONTROL_DWORD]; + control & acpi_osc_ctx_get_pci_control(&context); acpi_bus_decode_usb_osc("USB4 _OSC: OS supports", control); acpi_bus_decode_usb_osc("USB4 _OSC: OS controls", diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 6f9e75d148084..57be89cb3966d 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -183,7 +183,7 @@ static acpi_status acpi_pci_run_osc(acpi_handle handle, status = acpi_run_osc(handle, &context); if (ACPI_SUCCESS(status)) { - *retval = *((u32 *)(context.ret.pointer + 8)); + *retval = acpi_osc_ctx_get_pci_control(&context); kfree(context.ret.pointer); } return status; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d7136d13aa442..04e5a038dd57f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -608,6 +608,13 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_CONTROL_DWORD]; +} + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@ -1004,6 +1011,12 @@ static inline int acpi_register_wakeup_handler(int wake_irq, static inline void acpi_unregister_wakeup_handler( bool (*wakeup)(void *context), void *context) { } +struct acpi_osc_context; +static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC From 241d26bc26add2e2867c546f7474902406d37c60 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 13 Apr 2022 01:36:17 -0600 Subject: [PATCH 25/47] PCI/ACPI: Prefer CXL _OSC instead of PCIe _OSC for CXL host bridges OB In preparation for negotiating OS control of CXL _OSC features, do the minimal enabling to use CXL _OSC to handle the base PCIe feature negotiation. Recall that CXL _OSC is a super-set of PCIe _OSC and the CXL 2.0 specification mandates: "If a CXL Host Bridge device exposes CXL _OSC, CXL aware OSPM shall evaluate CXL _OSC and not evaluate PCIe _OSC." Rather than pass a boolean flag alongside @root to all the helper functions that need to consider PCIe specifics, add is_pcie() and is_cxl() helper functions to check the flavor of @root. This also allows for dynamic fallback to PCIe _OSC in cases where an attempt to use CXL _OXC fails. This can happen on CXL 1.1 platforms that publish ACPI0016 devices to indicate CXL host bridges, but do not publish the optional CXL _OSC method. CXL _OSC is mandatory for CXL 2.0 hosts. Cc: Bjorn Helgaas Cc: "Rafael J. Wysocki" Cc: Robert Moore Reviewed-by: Jonathan Cameron Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-3-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/pci_root.c | 67 ++++++++++++++++++++++++++++++++--------- include/acpi/acpi_bus.h | 6 ++++ include/linux/acpi.h | 4 +++ 3 files changed, 63 insertions(+), 14 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 57be89cb3966d..1e683c115ee05 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -168,20 +168,45 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static inline bool is_pcie(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; +} + +static inline bool is_cxl(struct acpi_pci_root *root) +{ + return root->bridge_type == ACPI_BRIDGE_TYPE_CXL; +} + static u8 pci_osc_uuid_str[] = "33DB4D5B-1FF7-401C-9657-7441C03DD766"; +static u8 cxl_osc_uuid_str[] = "68F2D50B-C469-4d8A-BD3D-941A103FD3FC"; + +static char *to_uuid(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return cxl_osc_uuid_str; + return pci_osc_uuid_str; +} + +static int cap_length(struct acpi_pci_root *root) +{ + if (is_cxl(root)) + return sizeof(u32) * OSC_CXL_CAPABILITY_DWORDS; + return sizeof(u32) * OSC_PCI_CAPABILITY_DWORDS; +} -static acpi_status acpi_pci_run_osc(acpi_handle handle, +static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, const u32 *capbuf, u32 *retval) { struct acpi_osc_context context = { - .uuid_str = pci_osc_uuid_str, + .uuid_str = to_uuid(root), .rev = 1, - .cap.length = 12, + .cap.length = cap_length(root), .cap.pointer = (void *)capbuf, }; acpi_status status; - status = acpi_run_osc(handle, &context); + status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { *retval = acpi_osc_ctx_get_pci_control(&context); kfree(context.ret.pointer); @@ -194,7 +219,7 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 *control) { acpi_status status; - u32 result, capbuf[3]; + u32 result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -202,10 +227,18 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; - status = acpi_pci_run_osc(root->device->handle, capbuf, &result); +retry: + status = acpi_pci_run_osc(root, capbuf, &result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; *control = result; + } else if (is_cxl(root)) { + /* + * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC + * upon any failure using CXL _OSC. + */ + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + goto retry; } return status; } @@ -336,7 +369,7 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[3]; + u32 ctrl, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -373,7 +406,7 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(handle, capbuf, mask); + status = acpi_pci_run_osc(root, capbuf, mask); if (ACPI_FAILURE(status)) return status; @@ -452,8 +485,7 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) return true; } -static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, - bool is_pcie) +static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; acpi_status status; @@ -504,7 +536,7 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm, *no_aspm = 1; /* _OSC is optional for PCI host bridges */ - if ((status == AE_NOT_FOUND) && !is_pcie) + if (status == AE_NOT_FOUND && !is_pcie(root)) return; if (control) { @@ -527,7 +559,7 @@ static int acpi_pci_root_add(struct acpi_device *device, acpi_handle handle = device->handle; int no_aspm = 0; bool hotadd = system_state == SYSTEM_RUNNING; - bool is_pcie; + const char *acpi_hid; root = kzalloc(sizeof(struct acpi_pci_root), GFP_KERNEL); if (!root) @@ -585,8 +617,15 @@ static int acpi_pci_root_add(struct acpi_device *device, root->mcfg_addr = acpi_pci_root_get_mcfg_addr(handle); - is_pcie = strcmp(acpi_device_hid(device), "PNP0A08") == 0; - negotiate_os_control(root, &no_aspm, is_pcie); + acpi_hid = acpi_device_hid(root->device); + if (strcmp(acpi_hid, "PNP0A08") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_PCIE; + else if (strcmp(acpi_hid, "ACPI0016") == 0) + root->bridge_type = ACPI_BRIDGE_TYPE_CXL; + else + dev_dbg(&device->dev, "Assuming non-PCIe host bridge\n"); + + negotiate_os_control(root, &no_aspm); /* * TBD: Need PCI interface for enumeration/configuration of roots. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index c4b78c21d7930..305ebf2a3fa74 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -582,10 +582,16 @@ int unregister_acpi_bus_type(struct acpi_bus_type *); int acpi_bind_one(struct device *dev, struct acpi_device *adev); int acpi_unbind_one(struct device *dev); +enum acpi_bridge_type { + ACPI_BRIDGE_TYPE_PCIE = 1, + ACPI_BRIDGE_TYPE_CXL, +}; + struct acpi_pci_root { struct acpi_device * device; struct pci_bus *bus; u16 segment; + int bridge_type; struct resource secondary; /* downstream bus range */ u32 osc_support_set; /* _OSC state of support bits */ diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 04e5a038dd57f..82d91d9ccce58 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -550,6 +550,10 @@ struct acpi_osc_context { acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); +/* Number of _OSC capability DWORDS depends on bridge type */ +#define OSC_PCI_CAPABILITY_DWORDS 3 +#define OSC_CXL_CAPABILITY_DWORDS 5 + /* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ From 56368029d93bbb3246ee2e03268fa6dd9754be05 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 13 Apr 2022 01:36:18 -0600 Subject: [PATCH 26/47] PCI/ACPI: negotiate CXL _OSC Add full support for negotiating _OSC as defined in the CXL 2.0 spec, as applicable to CXL-enabled platforms. Advertise support for the CXL features we support - 'CXL 2.0 port/device register access', 'Protocol Error Reporting', and 'CXL Native Hot Plug'. Request control for 'CXL Memory Error Reporting'. The requests are dependent on CONFIG_* based prerequisites, and prior PCI enabling, similar to how the standard PCI _OSC bits are determined. The CXL specification does not define any additional constraints on the hotplug flow beyond PCIe native hotplug, so a kernel that supports native PCIe hotplug, supports CXL hotplug. For error handling protocol and link errors just use PCIe AER. There is nascent support for amending AER events with CXL specific status [1], but there's otherwise no additional OS responsibility for CXL errors beyond PCIe AER. CXL Memory Errors behave the same as typical memory errors so CONFIG_MEMORY_FAILURE is sufficient to indicate support to platform firmware. [1]: https://lore.kernel.org/linux-cxl/164740402242.3912056.8303625392871313860.stgit@dwillia2-desk3.amr.corp.intel.com/ Cc: Bjorn Helgaas Cc: "Rafael J. Wysocki" Cc: Robert Moore Cc: Dan Williams Reviewed-by: Rafael J. Wysocki Reviewed-by: Davidlohr Bueso Signed-off-by: Vishal Verma Link: https://lore.kernel.org/r/20220413073618.291335-4-vishal.l.verma@intel.com Signed-off-by: Dan Williams --- drivers/acpi/pci_root.c | 179 +++++++++++++++++++++++++++++++++++----- include/acpi/acpi_bus.h | 6 +- include/linux/acpi.h | 25 +++++- 3 files changed, 188 insertions(+), 22 deletions(-) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 1e683c115ee05..c82ad63fffed7 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -140,6 +140,17 @@ static struct pci_osc_bit_struct pci_osc_control_bit[] = { { OSC_PCI_EXPRESS_DPC_CONTROL, "DPC" }, }; +static struct pci_osc_bit_struct cxl_osc_support_bit[] = { + { OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT, "CXL11PortRegAccess" }, + { OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT, "CXL20PortDevRegAccess" }, + { OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT, "CXLProtocolErrorReporting" }, + { OSC_CXL_NATIVE_HP_SUPPORT, "CXLNativeHotPlug" }, +}; + +static struct pci_osc_bit_struct cxl_osc_control_bit[] = { + { OSC_CXL_ERROR_REPORTING_CONTROL, "CXLMemErrorReporting" }, +}; + static void decode_osc_bits(struct acpi_pci_root *root, char *msg, u32 word, struct pci_osc_bit_struct *table, int size) { @@ -168,6 +179,18 @@ static void decode_osc_control(struct acpi_pci_root *root, char *msg, u32 word) ARRAY_SIZE(pci_osc_control_bit)); } +static void decode_cxl_osc_support(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_support_bit, + ARRAY_SIZE(cxl_osc_support_bit)); +} + +static void decode_cxl_osc_control(struct acpi_pci_root *root, char *msg, u32 word) +{ + decode_osc_bits(root, msg, word, cxl_osc_control_bit, + ARRAY_SIZE(cxl_osc_control_bit)); +} + static inline bool is_pcie(struct acpi_pci_root *root) { return root->bridge_type == ACPI_BRIDGE_TYPE_PCIE; @@ -196,7 +219,8 @@ static int cap_length(struct acpi_pci_root *root) } static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, - const u32 *capbuf, u32 *retval) + const u32 *capbuf, u32 *pci_control, + u32 *cxl_control) { struct acpi_osc_context context = { .uuid_str = to_uuid(root), @@ -208,18 +232,20 @@ static acpi_status acpi_pci_run_osc(struct acpi_pci_root *root, status = acpi_run_osc(root->device->handle, &context); if (ACPI_SUCCESS(status)) { - *retval = acpi_osc_ctx_get_pci_control(&context); + *pci_control = acpi_osc_ctx_get_pci_control(&context); + if (is_cxl(root)) + *cxl_control = acpi_osc_ctx_get_cxl_control(&context); kfree(context.ret.pointer); } return status; } -static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, - u32 support, - u32 *control) +static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, u32 support, + u32 *control, u32 cxl_support, + u32 *cxl_control) { acpi_status status; - u32 result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; + u32 pci_result, cxl_result, capbuf[OSC_CXL_CAPABILITY_DWORDS]; support |= root->osc_support_set; @@ -227,11 +253,21 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root, capbuf[OSC_SUPPORT_DWORD] = support; capbuf[OSC_CONTROL_DWORD] = *control | root->osc_control_set; + if (is_cxl(root)) { + cxl_support |= root->osc_ext_support_set; + capbuf[OSC_EXT_SUPPORT_DWORD] = cxl_support; + capbuf[OSC_EXT_CONTROL_DWORD] = *cxl_control | root->osc_ext_control_set; + } + retry: - status = acpi_pci_run_osc(root, capbuf, &result); + status = acpi_pci_run_osc(root, capbuf, &pci_result, &cxl_result); if (ACPI_SUCCESS(status)) { root->osc_support_set = support; - *control = result; + *control = pci_result; + if (is_cxl(root)) { + root->osc_ext_support_set = cxl_support; + *cxl_control = cxl_result; + } } else if (is_cxl(root)) { /* * CXL _OSC is optional on CXL 1.1 hosts. Fall back to PCIe _OSC @@ -354,6 +390,8 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * @handle: ACPI handle of a PCI root bridge (or PCIe Root Complex). * @mask: Mask of _OSC bits to request control of, place to store control mask. * @support: _OSC supported capability. + * @cxl_mask: Mask of CXL _OSC control bits, place to store control mask. + * @cxl_support: CXL _OSC supported capability. * * Run _OSC query for @mask and if that is successful, compare the returned * mask of control bits with @req. If all of the @req bits are set in the @@ -364,12 +402,14 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev); * _OSC bits the BIOS has granted control of, but its contents are meaningless * on failure. **/ -static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 support) +static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, + u32 support, u32 *cxl_mask, + u32 cxl_support) { u32 req = OSC_PCI_EXPRESS_CAPABILITY_CONTROL; struct acpi_pci_root *root; acpi_status status; - u32 ctrl, capbuf[OSC_CXL_CAPABILITY_DWORDS]; + u32 ctrl, cxl_ctrl = 0, capbuf[OSC_CXL_CAPABILITY_DWORDS]; if (!mask) return AE_BAD_PARAMETER; @@ -381,20 +421,42 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s ctrl = *mask; *mask |= root->osc_control_set; + if (is_cxl(root)) { + cxl_ctrl = *cxl_mask; + *cxl_mask |= root->osc_ext_control_set; + } + /* Need to check the available controls bits before requesting them. */ do { - status = acpi_pci_query_osc(root, support, mask); + u32 pci_missing = 0, cxl_missing = 0; + + status = acpi_pci_query_osc(root, support, mask, cxl_support, + cxl_mask); if (ACPI_FAILURE(status)) return status; - if (ctrl == *mask) - break; - decode_osc_control(root, "platform does not support", - ctrl & ~(*mask)); + if (is_cxl(root)) { + if (ctrl == *mask && cxl_ctrl == *cxl_mask) + break; + pci_missing = ctrl & ~(*mask); + cxl_missing = cxl_ctrl & ~(*cxl_mask); + } else { + if (ctrl == *mask) + break; + pci_missing = ctrl & ~(*mask); + } + if (pci_missing) + decode_osc_control(root, "platform does not support", + pci_missing); + if (cxl_missing) + decode_cxl_osc_control(root, "CXL platform does not support", + cxl_missing); ctrl = *mask; - } while (*mask); + cxl_ctrl = *cxl_mask; + } while (*mask || *cxl_mask); /* No need to request _OSC if the control was already granted. */ - if ((root->osc_control_set & ctrl) == ctrl) + if ((root->osc_control_set & ctrl) == ctrl && + (root->osc_ext_control_set & cxl_ctrl) == cxl_ctrl) return AE_OK; if ((ctrl & req) != req) { @@ -406,11 +468,17 @@ static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 s capbuf[OSC_QUERY_DWORD] = 0; capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set; capbuf[OSC_CONTROL_DWORD] = ctrl; - status = acpi_pci_run_osc(root, capbuf, mask); + if (is_cxl(root)) { + capbuf[OSC_EXT_SUPPORT_DWORD] = root->osc_ext_support_set; + capbuf[OSC_EXT_CONTROL_DWORD] = cxl_ctrl; + } + + status = acpi_pci_run_osc(root, capbuf, mask, cxl_mask); if (ACPI_FAILURE(status)) return status; root->osc_control_set = *mask; + root->osc_ext_control_set = *cxl_mask; return AE_OK; } @@ -436,6 +504,53 @@ static u32 calculate_support(void) return support; } +/* + * Background on hotplug support, and making it depend on only + * CONFIG_HOTPLUG_PCI_PCIE vs. also considering CONFIG_MEMORY_HOTPLUG: + * + * CONFIG_ACPI_HOTPLUG_MEMORY does depend on CONFIG_MEMORY_HOTPLUG, but + * there is no existing _OSC for memory hotplug support. The reason is that + * ACPI memory hotplug requires the OS to acknowledge / coordinate with + * memory plug events via a scan handler. On the CXL side the equivalent + * would be if Linux supported the Mechanical Retention Lock [1], or + * otherwise had some coordination for the driver of a PCI device + * undergoing hotplug to be consulted on whether the hotplug should + * proceed or not. + * + * The concern is that if Linux says no to supporting CXL hotplug then + * the BIOS may say no to giving the OS hotplug control of any other PCIe + * device. So the question here is not whether hotplug is enabled, it's + * whether it is handled natively by the at all OS, and if + * CONFIG_HOTPLUG_PCI_PCIE is enabled then the answer is "yes". + * + * Otherwise, the plan for CXL coordinated remove, since the kernel does + * not support blocking hotplug, is to require the memory device to be + * disabled before hotplug is attempted. When CONFIG_MEMORY_HOTPLUG is + * disabled that step will fail and the remove attempt cancelled by the + * user. If that is not honored and the card is removed anyway then it + * does not matter if CONFIG_MEMORY_HOTPLUG is enabled or not, it will + * cause a crash and other badness. + * + * Therefore, just say yes to CXL hotplug and require removal to + * be coordinated by userspace unless and until the kernel grows better + * mechanisms for doing "managed" removal of devices in consultation with + * the driver. + * + * [1]: https://lore.kernel.org/all/20201122014203.4706-1-ashok.raj@intel.com/ + */ +static u32 calculate_cxl_support(void) +{ + u32 support; + + support = OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT; + if (pci_aer_available()) + support |= OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT; + if (IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) + support |= OSC_CXL_NATIVE_HP_SUPPORT; + + return support; +} + static u32 calculate_control(void) { u32 control; @@ -467,6 +582,16 @@ static u32 calculate_control(void) return control; } +static u32 calculate_cxl_control(void) +{ + u32 control = 0; + + if (IS_ENABLED(CONFIG_MEMORY_FAILURE)) + control |= OSC_CXL_ERROR_REPORTING_CONTROL; + + return control; +} + static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) { struct acpi_device *device = root->device; @@ -488,6 +613,7 @@ static bool os_control_query_checks(struct acpi_pci_root *root, u32 support) static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) { u32 support, control = 0, requested = 0; + u32 cxl_support = 0, cxl_control = 0, cxl_requested = 0; acpi_status status; struct acpi_device *device = root->device; acpi_handle handle = device->handle; @@ -511,10 +637,20 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) if (os_control_query_checks(root, support)) requested = control = calculate_control(); - status = acpi_pci_osc_control_set(handle, &control, support); + if (is_cxl(root)) { + cxl_support = calculate_cxl_support(); + decode_cxl_osc_support(root, "OS supports", cxl_support); + cxl_requested = cxl_control = calculate_cxl_control(); + } + + status = acpi_pci_osc_control_set(handle, &control, support, + &cxl_control, cxl_support); if (ACPI_SUCCESS(status)) { if (control) decode_osc_control(root, "OS now controls", control); + if (cxl_control) + decode_cxl_osc_control(root, "OS now controls", + cxl_control); if (acpi_gbl_FADT.boot_flags & ACPI_FADT_NO_ASPM) { /* @@ -543,6 +679,11 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm) decode_osc_control(root, "OS requested", requested); decode_osc_control(root, "platform willing to grant", control); } + if (cxl_control) { + decode_cxl_osc_control(root, "OS requested", cxl_requested); + decode_cxl_osc_control(root, "platform willing to grant", + cxl_control); + } dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n", acpi_format_exception(status)); diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 305ebf2a3fa74..4c463ae2777b9 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -594,8 +594,10 @@ struct acpi_pci_root { int bridge_type; struct resource secondary; /* downstream bus range */ - u32 osc_support_set; /* _OSC state of support bits */ - u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_support_set; /* _OSC state of support bits */ + u32 osc_control_set; /* _OSC state of control bits */ + u32 osc_ext_support_set; /* _OSC state of extended support bits */ + u32 osc_ext_control_set; /* _OSC state of extended control bits */ phys_addr_t mcfg_addr; }; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 82d91d9ccce58..378a431666b3d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -554,10 +554,12 @@ acpi_status acpi_run_osc(acpi_handle handle, struct acpi_osc_context *context); #define OSC_PCI_CAPABILITY_DWORDS 3 #define OSC_CXL_CAPABILITY_DWORDS 5 -/* Indexes into _OSC Capabilities Buffer (DWORDs 2 & 3 are device-specific) */ +/* Indexes into _OSC Capabilities Buffer (DWORDs 2 to 5 are device-specific) */ #define OSC_QUERY_DWORD 0 /* DWORD 1 */ #define OSC_SUPPORT_DWORD 1 /* DWORD 2 */ #define OSC_CONTROL_DWORD 2 /* DWORD 3 */ +#define OSC_EXT_SUPPORT_DWORD 3 /* DWORD 4 */ +#define OSC_EXT_CONTROL_DWORD 4 /* DWORD 5 */ /* _OSC Capabilities DWORD 1: Query/Control and Error Returns (generic) */ #define OSC_QUERY_ENABLE 0x00000001 /* input */ @@ -612,6 +614,15 @@ extern u32 osc_sb_native_usb4_control; #define OSC_PCI_EXPRESS_LTR_CONTROL 0x00000020 #define OSC_PCI_EXPRESS_DPC_CONTROL 0x00000080 +/* CXL _OSC: Capabilities DWORD 4: Support Field */ +#define OSC_CXL_1_1_PORT_REG_ACCESS_SUPPORT 0x00000001 +#define OSC_CXL_2_0_PORT_DEV_REG_ACCESS_SUPPORT 0x00000002 +#define OSC_CXL_PROTOCOL_ERR_REPORTING_SUPPORT 0x00000004 +#define OSC_CXL_NATIVE_HP_SUPPORT 0x00000008 + +/* CXL _OSC: Capabilities DWORD 5: Control Field */ +#define OSC_CXL_ERROR_REPORTING_CONTROL 0x00000001 + static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) { u32 *ret = context->ret.pointer; @@ -619,6 +630,13 @@ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) return ret[OSC_CONTROL_DWORD]; } +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + u32 *ret = context->ret.pointer; + + return ret[OSC_EXT_CONTROL_DWORD]; +} + #define ACPI_GSB_ACCESS_ATTRIB_QUICK 0x00000002 #define ACPI_GSB_ACCESS_ATTRIB_SEND_RCV 0x00000004 #define ACPI_GSB_ACCESS_ATTRIB_BYTE 0x00000006 @@ -1021,6 +1039,11 @@ static inline u32 acpi_osc_ctx_get_pci_control(struct acpi_osc_context *context) return 0; } +static inline u32 acpi_osc_ctx_get_cxl_control(struct acpi_osc_context *context) +{ + return 0; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC From 3750d013182b071dbf458144540390de0031be8c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:13 -0700 Subject: [PATCH 27/47] cxl: Replace lockdep_mutex with local lock classes In response to an attempt to expand dev->lockdep_mutex for device_lock() validation [1], Peter points out [2] that the lockdep API already has the ability to assign a dedicated lock class per subsystem device-type. Use lockdep_set_class() to override the default device_lock() '__lockdep_no_validate__' class for each CXL subsystem device-type. This enables lockdep to detect deadlocks and recursive locking within the device-driver core and the subsystem. The lockdep_set_class_and_subclass() API is used for port objects that recursively lock the 'cxl_port_key' class by hierarchical topology depth. Link: https://lore.kernel.org/r/164982968798.684294.15817853329823976469.stgit@dwillia2-desk3.amr.corp.intel.com [1] Link: https://lore.kernel.org/r/Ylf0dewci8myLvoW@hirez.programming.kicks-ass.net [2] Suggested-by: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Ben Widawsky Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055519317.3745911.7342499516839702840.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/memdev.c | 3 +++ drivers/cxl/core/pmem.c | 6 ++++++ drivers/cxl/core/port.c | 13 +++++++++---- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 1f76b28f9826b..f7cdcd33504aa 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -228,6 +228,8 @@ static void detach_memdev(struct work_struct *work) put_device(&cxlmd->dev); } +static struct lock_class_key cxl_memdev_key; + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops) { @@ -247,6 +249,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev = &cxlmd->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_memdev_key); dev->parent = cxlds->dev; dev->bus = &cxl_bus_type; dev->devt = MKDEV(cxl_mem_major, cxlmd->id); diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index 8de240c4d96bb..e825e261278d8 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -80,6 +80,8 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) } EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL); +static struct lock_class_key cxl_nvdimm_bridge_key; + static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; @@ -99,6 +101,7 @@ static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) cxl_nvb->port = port; cxl_nvb->state = CXL_NVB_NEW; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; @@ -214,6 +217,8 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev) } EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL); +static struct lock_class_key cxl_nvdimm_key; + static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) { struct cxl_nvdimm *cxl_nvd; @@ -226,6 +231,7 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd) dev = &cxl_nvd->dev; cxl_nvd->cxlmd = cxlmd; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_nvdimm_key); device_set_pm_not_required(dev); dev->parent = &cxlmd->dev; dev->bus = &cxl_bus_type; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 2ab1ba4499b30..750aac95ed5fd 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -391,6 +391,8 @@ static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) return devm_add_action_or_reset(host, cxl_unlink_uport, port); } +static struct lock_class_key cxl_port_key; + static struct cxl_port *cxl_port_alloc(struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port) @@ -415,9 +417,10 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, * description. */ dev = &port->dev; - if (parent_port) + if (parent_port) { dev->parent = &parent_port->dev; - else + port->depth = parent_port->depth + 1; + } else dev->parent = uport; port->uport = uport; @@ -427,6 +430,7 @@ static struct cxl_port *cxl_port_alloc(struct device *uport, INIT_LIST_HEAD(&port->endpoints); device_initialize(dev); + lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth); device_set_pm_not_required(dev); dev->bus = &cxl_bus_type; dev->type = &cxl_port_type; @@ -457,8 +461,6 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, if (IS_ERR(port)) return port; - if (parent_port) - port->depth = parent_port->depth + 1; dev = &port->dev; if (is_cxl_memdev(uport)) rc = dev_set_name(dev, "endpoint%d", port->id); @@ -1173,6 +1175,8 @@ static int decoder_populate_targets(struct cxl_decoder *cxld, return rc; } +static struct lock_class_key cxl_decoder_key; + /** * cxl_decoder_alloc - Allocate a new CXL decoder * @port: owning port of this decoder @@ -1214,6 +1218,7 @@ static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, seqlock_init(&cxld->target_lock); dev = &cxld->dev; device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_decoder_key); device_set_pm_not_required(dev); dev->parent = &port->dev; dev->bus = &cxl_bus_type; From d864b8ea6468cf1dce614a58eec92a23d8e07fec Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 26 Apr 2022 12:22:44 -0700 Subject: [PATCH 28/47] cxl/acpi: Add root device lockdep validation The CXL "root" device, ACPI0017, is an attach point for coordinating platform level CXL resources and is the parent device for a CXL port topology tree. As such it has distinct locking rules relative to other CXL subsystem objects, but because it is an ACPI device the lock class is established well before it is given to the cxl_acpi driver. However, the lockdep API does support changing the lock class "live" for situations like this. Add a device_lock_set_class() helper that a driver can use in ->probe() to set a custom lock class, and device_lock_reset_class() to return to the default "no validate" class before the custom lock class key goes out of scope after ->remove(). Note the helpers are all macros to support dead code elimination in the CONFIG_PROVE_LOCKING=n case, however device_set_lock_class() still needs #ifdef CONFIG_PROVE_LOCKING since lockdep_match_class() explicitly does not have a helper in the CONFIG_PROVE_LOCKING=n case (see comment in lockdep.h). The lockdep API needs 2 small tweaks to prevent "unused" warnings for the @key argument to lock_set_class(), and a new lock_set_novalidate_class() is added to supplement lockdep_set_novalidate_class() in the cases where the lock class is converted while the lock is held. Suggested-by: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Cc: Alison Schofield Cc: Vishal Verma Cc: Ben Widawsky Cc: Jonathan Cameron Reviewed-by: Greg Kroah-Hartman Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165100081305.1528964.11138612430659737238.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/acpi.c | 13 +++++++++++++ include/linux/device.h | 43 +++++++++++++++++++++++++++++++++++++++++ include/linux/lockdep.h | 6 +++++- 3 files changed, 61 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index d15a6aec03318..40286f5df8127 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -275,6 +275,13 @@ static int add_root_nvdimm_bridge(struct device *match, void *data) return 1; } +static struct lock_class_key cxl_root_key; + +static void cxl_acpi_lock_reset_class(void *dev) +{ + device_lock_reset_class(dev); +} + static int cxl_acpi_probe(struct platform_device *pdev) { int rc; @@ -283,6 +290,12 @@ static int cxl_acpi_probe(struct platform_device *pdev) struct acpi_device *adev = ACPI_COMPANION(host); struct cxl_cfmws_context ctx; + device_lock_set_class(&pdev->dev, &cxl_root_key); + rc = devm_add_action_or_reset(&pdev->dev, cxl_acpi_lock_reset_class, + &pdev->dev); + if (rc) + return rc; + root_port = devm_cxl_add_port(host, host, CXL_RESOURCE_NONE, NULL); if (IS_ERR(root_port)) return PTR_ERR(root_port); diff --git a/include/linux/device.h b/include/linux/device.h index 93459724dcdeb..833b0b3b01932 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -850,6 +850,49 @@ static inline bool device_supports_offline(struct device *dev) return dev->bus && dev->bus->offline && dev->bus->online; } +#define __device_lock_set_class(dev, name, key) \ +do { \ + struct device *__d2 __maybe_unused = dev; \ + lock_set_class(&__d2->mutex.dep_map, name, key, 0, _THIS_IP_); \ +} while (0) + +/** + * device_lock_set_class - Specify a temporary lock class while a device + * is attached to a driver + * @dev: device to modify + * @key: lock class key data + * + * This must be called with the device_lock() already held, for example + * from driver ->probe(). Take care to only override the default + * lockdep_no_validate class. + */ +#ifdef CONFIG_LOCKDEP +#define device_lock_set_class(dev, key) \ +do { \ + struct device *__d = dev; \ + dev_WARN_ONCE(__d, !lockdep_match_class(&__d->mutex, \ + &__lockdep_no_validate__), \ + "overriding existing custom lock class\n"); \ + __device_lock_set_class(__d, #key, key); \ +} while (0) +#else +#define device_lock_set_class(dev, key) __device_lock_set_class(dev, #key, key) +#endif + +/** + * device_lock_reset_class - Return a device to the default lockdep novalidate state + * @dev: device to modify + * + * This must be called with the device_lock() already held, for example + * from driver ->remove(). + */ +#define device_lock_reset_class(dev) \ +do { \ + struct device *__d __maybe_unused = dev; \ + lock_set_novalidate_class(&__d->mutex.dep_map, "&dev->mutex", \ + _THIS_IP_); \ +} while (0) + void lock_device_hotplug(void); void unlock_device_hotplug(void); int lock_device_hotplug_sysfs(void); diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 467b94257105e..43b0dc6a0b214 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -290,6 +290,9 @@ extern void lock_set_class(struct lockdep_map *lock, const char *name, struct lock_class_key *key, unsigned int subclass, unsigned long ip); +#define lock_set_novalidate_class(l, n, i) \ + lock_set_class(l, n, &__lockdep_no_validate__, 0, i) + static inline void lock_set_subclass(struct lockdep_map *lock, unsigned int subclass, unsigned long ip) { @@ -357,7 +360,8 @@ static inline void lockdep_set_selftest_task(struct task_struct *task) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, i) do { } while (0) # define lock_downgrade(l, i) do { } while (0) -# define lock_set_class(l, n, k, s, i) do { } while (0) +# define lock_set_class(l, n, key, s, i) do { (void)(key); } while (0) +# define lock_set_novalidate_class(l, n, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_init() do { } while (0) # define lockdep_init_map_type(lock, name, key, sub, inner, outer, type) \ From 38a34e10768c85d3be4bb31fea5d8942bb72bbd7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:23 -0700 Subject: [PATCH 29/47] cxl: Drop cxl_device_lock() Now that all CXL subsystem locking is validated with custom lock classes, there is no need for the custom usage of the lockdep_mutex. Cc: Alison Schofield Cc: Vishal Verma Cc: Ira Weiny Cc: Ben Widawsky Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055520383.3745911.53447786039115271.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/cxl/core/pmem.c | 4 +-- drivers/cxl/core/port.c | 55 ++++++++++++----------------- drivers/cxl/cxl.h | 78 ----------------------------------------- drivers/cxl/mem.c | 4 +-- drivers/cxl/pmem.c | 12 +++---- lib/Kconfig.debug | 6 ---- 6 files changed, 33 insertions(+), 126 deletions(-) diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index e825e261278d8..bec7cfb54ebfb 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -124,10 +124,10 @@ static void unregister_nvb(void *_cxl_nvb) * work to flush. Once the state has been changed to 'dead' then no new * work can be queued by user-triggered bind. */ - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); flush = cxl_nvb->state != CXL_NVB_NEW; cxl_nvb->state = CXL_NVB_DEAD; - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); /* * Even though the device core will trigger device_release_driver() diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 750aac95ed5fd..ea60abda65006 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -312,10 +312,10 @@ static void cxl_port_release(struct device *dev) struct cxl_port *port = to_cxl_port(dev); struct cxl_ep *ep, *_e; - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry_safe(ep, _e, &port->endpoints, list) cxl_ep_release(ep); - cxl_device_unlock(dev); + device_unlock(dev); ida_free(&cxl_port_ida, port->id); kfree(port); } @@ -556,7 +556,7 @@ static int match_root_child(struct device *dev, const void *match) return 0; port = to_cxl_port(dev); - cxl_device_lock(dev); + device_lock(dev); list_for_each_entry(dport, &port->dports, list) { iter = match; while (iter) { @@ -566,7 +566,7 @@ static int match_root_child(struct device *dev, const void *match) } } out: - cxl_device_unlock(dev); + device_unlock(dev); return !!iter; } @@ -625,13 +625,13 @@ static int add_dport(struct cxl_port *port, struct cxl_dport *new) static void cond_cxl_root_lock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_lock(&port->dev); + device_lock(&port->dev); } static void cond_cxl_root_unlock(struct cxl_port *port) { if (is_cxl_root(port)) - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); } static void cxl_dport_remove(void *data) @@ -738,15 +738,15 @@ static int add_ep(struct cxl_port *port, struct cxl_ep *new) { struct cxl_ep *dup; - cxl_device_lock(&port->dev); + device_lock(&port->dev); if (port->dead) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return -ENXIO; } dup = find_ep(port, new->ep); if (!dup) list_add_tail(&new->list, &port->endpoints); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dup ? -EEXIST : 0; } @@ -856,12 +856,12 @@ static void delete_endpoint(void *data) goto out; parent = &parent_port->dev; - cxl_device_lock(parent); + device_lock(parent); if (parent->driver && endpoint->uport) { devm_release_action(parent, cxl_unlink_uport, endpoint); devm_release_action(parent, unregister_port, endpoint); } - cxl_device_unlock(parent); + device_unlock(parent); put_device(parent); out: put_device(&endpoint->dev); @@ -922,7 +922,7 @@ static void cxl_detach_ep(void *data) } parent_port = to_cxl_port(port->dev.parent); - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { /* * The bottom-up race to delete the port lost to a @@ -930,12 +930,12 @@ static void cxl_detach_ep(void *data) * parent_port ->remove() will have cleaned up all * descendants. */ - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); put_device(&port->dev); continue; } - cxl_device_lock(&port->dev); + device_lock(&port->dev); ep = find_ep(port, &cxlmd->dev); dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); @@ -950,7 +950,7 @@ static void cxl_detach_ep(void *data) port->dead = true; list_splice_init(&port->dports, &reap_dports); } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); if (!list_empty(&reap_dports)) { dev_dbg(&cxlmd->dev, "delete %s\n", @@ -958,7 +958,7 @@ static void cxl_detach_ep(void *data) delete_switch_port(port, &reap_dports); } put_device(&port->dev); - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); } } @@ -1006,7 +1006,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, return -EAGAIN; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_warn(&cxlmd->dev, "port %s:%s disabled, failed to enumerate CXL.mem\n", @@ -1024,7 +1024,7 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd, get_device(&port->dev); } out: - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); if (IS_ERR(port)) rc = PTR_ERR(port); @@ -1135,14 +1135,14 @@ struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, { struct cxl_dport *dport; - cxl_device_lock(&port->dev); + device_lock(&port->dev); list_for_each_entry(dport, &port->dports, list) if (dport->dport == dev) { - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return dport; } - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return NULL; } EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL); @@ -1384,9 +1384,9 @@ int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) port = to_cxl_port(cxld->dev.parent); - cxl_device_lock(&port->dev); + device_lock(&port->dev); rc = cxl_decoder_add_locked(cxld, target_map); - cxl_device_unlock(&port->dev); + device_unlock(&port->dev); return rc; } @@ -1457,14 +1457,7 @@ static int cxl_bus_probe(struct device *dev) { int rc; - /* - * Take the CXL nested lock since the driver core only holds - * @dev->mutex and not @dev->lockdep_mutex. - */ - cxl_nested_lock(dev); rc = to_cxl_drv(dev->driver)->probe(dev); - cxl_nested_unlock(dev); - dev_dbg(dev, "probe: %d\n", rc); return rc; } @@ -1473,10 +1466,8 @@ static void cxl_bus_remove(struct device *dev) { struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); - cxl_nested_lock(dev); if (cxl_drv->remove) cxl_drv->remove(dev); - cxl_nested_unlock(dev); } static struct workqueue_struct *cxl_bus_wq; diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 990b6670222ed..140dc3278cde1 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -405,82 +405,4 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd); #define __mock static #endif -#ifdef CONFIG_PROVE_CXL_LOCKING -enum cxl_lock_class { - CXL_ANON_LOCK, - CXL_NVDIMM_LOCK, - CXL_NVDIMM_BRIDGE_LOCK, - CXL_PORT_LOCK, - /* - * Be careful to add new lock classes here, CXL_PORT_LOCK is - * extended by the port depth, so a maximum CXL port topology - * depth would need to be defined first. - */ -}; - -static inline void cxl_nested_lock(struct device *dev) -{ - if (is_cxl_port(dev)) { - struct cxl_port *port = to_cxl_port(dev); - - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth); - } else if (is_cxl_decoder(dev)) { - struct cxl_port *port = to_cxl_port(dev->parent); - - /* - * A decoder is the immediate child of a port, so set - * its lock class equal to other child device siblings. - */ - mutex_lock_nested(&dev->lockdep_mutex, - CXL_PORT_LOCK + port->depth + 1); - } else if (is_cxl_nvdimm_bridge(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK); - else if (is_cxl_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK); - else - mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK); -} - -static inline void cxl_nested_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void cxl_device_lock(struct device *dev) -{ - /* - * For double lock errors the lockup will happen before lockdep - * warns at cxl_nested_lock(), so assert explicitly. - */ - lockdep_assert_not_held(&dev->lockdep_mutex); - - device_lock(dev); - cxl_nested_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - cxl_nested_unlock(dev); - device_unlock(dev); -} -#else -static inline void cxl_nested_lock(struct device *dev) -{ -} - -static inline void cxl_nested_unlock(struct device *dev) -{ -} - -static inline void cxl_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void cxl_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif #endif /* __CXL_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 1bd2e0b67f59f..401b0fbe21db9 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -187,7 +187,7 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - cxl_device_lock(&parent_port->dev); + device_lock(&parent_port->dev); if (!parent_port->dev.driver) { dev_err(dev, "CXL port topology %s not enabled\n", dev_name(&parent_port->dev)); @@ -197,7 +197,7 @@ static int cxl_mem_probe(struct device *dev) rc = create_endpoint(cxlmd, parent_port); out: - cxl_device_unlock(&parent_port->dev); + device_unlock(&parent_port->dev); put_device(&parent_port->dev); /* diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index 404018dd99e6e..bbeef91e637e9 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev) if (!cxl_nvb) return -ENXIO; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); if (!cxl_nvb->nvdimm_bus) { rc = -ENXIO; goto out; @@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev) dev_set_drvdata(dev, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; @@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work) struct nvdimm_bus *victim_bus = NULL; bool release = false, rescan = false; - cxl_device_lock(&cxl_nvb->dev); + device_lock(&cxl_nvb->dev); switch (cxl_nvb->state) { case CXL_NVB_ONLINE: if (!online_nvdimm_bus(cxl_nvb)) { @@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work) default: break; } - cxl_device_unlock(&cxl_nvb->dev); + device_unlock(&cxl_nvb->dev); if (release) device_release_driver(&cxl_nvb->dev); @@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) return 0; cxl_nvb = to_cxl_nvdimm_bridge(dev); - cxl_device_lock(dev); + device_lock(dev); cxl_nvb->state = CXL_NVB_NEW; - cxl_device_unlock(dev); + device_unlock(dev); return 0; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 075cd25363ac3..4a2c853c948ba 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1559,12 +1559,6 @@ config PROVE_NVDIMM_LOCKING help Enable lockdep to validate nd_device_lock() usage. -config PROVE_CXL_LOCKING - bool "CXL" - depends on CXL_BUS - help - Enable lockdep to validate cxl_device_lock() usage. - endchoice endmenu # lock debugging From 4a0079bc7aae5a003ecc548090b75c96d3abf490 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:29 -0700 Subject: [PATCH 30/47] nvdimm: Replace lockdep_mutex with local lock classes In response to an attempt to expand dev->lockdep_mutex for device_lock() validation [1], Peter points out [2] that the lockdep API already has the ability to assign a dedicated lock class per subsystem device-type. Use lockdep_set_class() to override the default device_lock() '__lockdep_no_validate__' class for each NVDIMM subsystem device-type. This enables lockdep to detect deadlocks and recursive locking within the device-driver core and the subsystem. Link: https://lore.kernel.org/r/164982968798.684294.15817853329823976469.stgit@dwillia2-desk3.amr.corp.intel.com [1] Link: https://lore.kernel.org/r/Ylf0dewci8myLvoW@hirez.programming.kicks-ass.net [2] Suggested-by: Peter Zijlstra Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055520896.3745911.8021255583475547548.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/btt_devs.c | 7 +++++-- drivers/nvdimm/bus.c | 14 +++++++------- drivers/nvdimm/dax_devs.c | 4 ++-- drivers/nvdimm/dimm_devs.c | 4 ++++ drivers/nvdimm/namespace_devs.c | 10 +++++++++- drivers/nvdimm/nd-core.h | 2 +- drivers/nvdimm/pfn_devs.c | 7 +++++-- drivers/nvdimm/region_devs.c | 4 ++++ 8 files changed, 37 insertions(+), 15 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index e5a58520d3982..1208217966682 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -178,6 +178,8 @@ bool is_nd_btt(struct device *dev) } EXPORT_SYMBOL(is_nd_btt); +static struct lock_class_key nvdimm_btt_key; + static struct device *__nd_btt_create(struct nd_region *nd_region, unsigned long lbasize, uuid_t *uuid, struct nd_namespace_common *ndns) @@ -205,6 +207,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->parent = &nd_region->dev; dev->type = &nd_btt_device_type; device_initialize(&nd_btt->dev); + lockdep_set_class(&nd_btt->dev.mutex, &nvdimm_btt_key); if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -225,7 +228,7 @@ struct device *nd_btt_create(struct nd_region *nd_region) { struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -324,7 +327,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, if (!nd_btt->uuid) return -ENOMEM; - __nd_device_register(&nd_btt->dev); + nd_device_register(&nd_btt->dev); return 0; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 7b0d1443217a3..85ffa04e93c27 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -334,6 +334,8 @@ struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_to_bus); +static struct lock_class_key nvdimm_bus_key; + struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nd_desc) { @@ -360,6 +362,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent, nvdimm_bus->dev.bus = &nvdimm_bus_type; nvdimm_bus->dev.of_node = nd_desc->of_node; device_initialize(&nvdimm_bus->dev); + lockdep_set_class(&nvdimm_bus->dev.mutex, &nvdimm_bus_key); device_set_pm_not_required(&nvdimm_bus->dev); rc = dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id); if (rc) @@ -511,7 +514,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) put_device(dev); } -void __nd_device_register(struct device *dev) +void nd_device_register(struct device *dev) { if (!dev) return; @@ -537,12 +540,6 @@ void __nd_device_register(struct device *dev) async_schedule_dev_domain(nd_async_device_register, dev, &nd_async_domain); } - -void nd_device_register(struct device *dev) -{ - device_initialize(dev); - __nd_device_register(dev); -} EXPORT_SYMBOL(nd_device_register); void nd_device_unregister(struct device *dev, enum nd_async_mode mode) @@ -724,6 +721,8 @@ static void ndctl_release(struct device *dev) kfree(dev); } +static struct lock_class_key nvdimm_ndctl_key; + int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) { dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id); @@ -734,6 +733,7 @@ int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus) if (!dev) return -ENOMEM; device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_ndctl_key); device_set_pm_not_required(dev); dev->class = nd_class; dev->parent = &nvdimm_bus->dev; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c index 99965077bac4f..7f4a9d28b6702 100644 --- a/drivers/nvdimm/dax_devs.c +++ b/drivers/nvdimm/dax_devs.c @@ -80,7 +80,7 @@ struct device *nd_dax_create(struct nd_region *nd_region) nd_dax = nd_dax_alloc(nd_region); if (nd_dax) dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -119,7 +119,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(dax_dev, &nd_pfn->ndns); put_device(dax_dev); } else - __nd_device_register(dax_dev); + nd_device_register(dax_dev); return rc; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index ee507eed42b53..27b8f8cd1b489 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -570,6 +570,8 @@ bool is_nvdimm(struct device *dev) return dev->type == &nvdimm_device_type; } +static struct lock_class_key nvdimm_key; + struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, unsigned long cmd_mask, int num_flush, @@ -613,6 +615,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, /* get security state and extended (master) state */ nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER); nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_key); nd_device_register(dev); return nvdimm; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 62b83b2e26e3f..5197a813849dc 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1830,6 +1830,8 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region) return dev; } +static struct lock_class_key nvdimm_namespace_key; + void nd_region_create_ns_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); @@ -1845,8 +1847,12 @@ void nd_region_create_ns_seed(struct nd_region *nd_region) */ if (!nd_region->ns_seed) dev_err(&nd_region->dev, "failed to create namespace\n"); - else + else { + device_initialize(nd_region->ns_seed); + lockdep_set_class(&nd_region->ns_seed->mutex, + &nvdimm_namespace_key); nd_device_register(nd_region->ns_seed); + } } void nd_region_create_dax_seed(struct nd_region *nd_region) @@ -2200,6 +2206,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err) if (id < 0) break; dev_set_name(dev, "namespace%d.%d", nd_region->id, id); + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_namespace_key); nd_device_register(dev); } if (i) diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 448f9dcb4bb7d..99b04106434be 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -106,7 +106,7 @@ void nd_region_create_dax_seed(struct nd_region *nd_region); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); void nd_synchronize(void); -void __nd_device_register(struct device *dev); +void nd_device_register(struct device *dev); struct nd_label_id; char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid, u32 flags); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index c31e184bfa45e..0cd396d0024c8 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -291,6 +291,8 @@ bool is_nd_pfn(struct device *dev) } EXPORT_SYMBOL(is_nd_pfn); +static struct lock_class_key nvdimm_pfn_key; + struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { @@ -303,6 +305,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, nd_pfn->align = nd_pfn_default_alignment(); dev = &nd_pfn->dev; device_initialize(&nd_pfn->dev); + lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key); if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { dev_dbg(&ndns->dev, "failed, already claimed by %s\n", dev_name(ndns->claim)); @@ -346,7 +349,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) nd_pfn = nd_pfn_alloc(nd_region); dev = nd_pfn_devinit(nd_pfn, NULL); - __nd_device_register(dev); + nd_device_register(dev); return dev; } @@ -643,7 +646,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) nd_detach_ndns(pfn_dev, &nd_pfn->ndns); put_device(pfn_dev); } else - __nd_device_register(pfn_dev); + nd_device_register(pfn_dev); return rc; } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 0cb274c2b508b..8650e8d39c895 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -949,6 +949,8 @@ static unsigned long default_align(struct nd_region *nd_region) return align; } +static struct lock_class_key nvdimm_region_key; + static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, struct nd_region_desc *ndr_desc, const struct device_type *dev_type, const char *caller) @@ -1035,6 +1037,8 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, else nd_region->flush = NULL; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &nvdimm_region_key); nd_device_register(dev); return nd_region; From 1550a17a7da2936281fda5e87fc454cee3c9c683 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:34 -0700 Subject: [PATCH 31/47] ACPI: NFIT: Drop nfit_device_lock() The nfit_device_lock() helper was added to provide lockdep coverage for the NFIT driver's usage of device_lock() on the nvdimm_bus object. Now that nvdimm_bus objects have their own lock class this wrapper can be dropped. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055521409.3745911.8085645201146909612.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 30 +++++++++++++++--------------- drivers/acpi/nfit/nfit.h | 24 ------------------------ 2 files changed, 15 insertions(+), 39 deletions(-) diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index fe61f617a943b..ae5f4acf26753 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -1230,7 +1230,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, if (rc) return rc; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); @@ -1247,7 +1247,7 @@ static ssize_t hw_error_scrub_store(struct device *dev, break; } } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1267,10 +1267,10 @@ static ssize_t scrub_show(struct device *dev, ssize_t rc = -ENXIO; bool busy; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (!nd_desc) { - nfit_device_unlock(dev); + device_unlock(dev); return rc; } acpi_desc = to_acpi_desc(nd_desc); @@ -1287,7 +1287,7 @@ static ssize_t scrub_show(struct device *dev, } mutex_unlock(&acpi_desc->init_mutex); - nfit_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1304,14 +1304,14 @@ static ssize_t scrub_store(struct device *dev, if (val != 1) return -EINVAL; - nfit_device_lock(dev); + device_lock(dev); nd_desc = dev_get_drvdata(dev); if (nd_desc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG); } - nfit_device_unlock(dev); + device_unlock(dev); if (rc) return rc; return size; @@ -1697,9 +1697,9 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data) struct acpi_device *adev = data; struct device *dev = &adev->dev; - nfit_device_lock(dev->parent); + device_lock(dev->parent); __acpi_nvdimm_notify(dev, event); - nfit_device_unlock(dev->parent); + device_unlock(dev->parent); } static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method) @@ -3152,8 +3152,8 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) struct device *dev = acpi_desc->dev; /* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ - nfit_device_lock(dev); - nfit_device_unlock(dev); + device_lock(dev); + device_unlock(dev); /* Bounce the init_mutex to complete initial registration */ mutex_lock(&acpi_desc->init_mutex); @@ -3305,8 +3305,8 @@ void acpi_nfit_shutdown(void *data) * acpi_nfit_ars_rescan() submissions have had a chance to * either submit or see ->cancel set. */ - nfit_device_lock(bus_dev); - nfit_device_unlock(bus_dev); + device_lock(bus_dev); + device_unlock(bus_dev); flush_workqueue(nfit_wq); } @@ -3449,9 +3449,9 @@ EXPORT_SYMBOL_GPL(__acpi_nfit_notify); static void acpi_nfit_notify(struct acpi_device *adev, u32 event) { - nfit_device_lock(&adev->dev); + device_lock(&adev->dev); __acpi_nfit_notify(&adev->dev, adev->handle, event); - nfit_device_unlock(&adev->dev); + device_unlock(&adev->dev); } static const struct acpi_device_id acpi_nfit_ids[] = { diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index 50882bdbeb96e..6023ad61831aa 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -337,30 +337,6 @@ static inline struct acpi_nfit_desc *to_acpi_desc( return container_of(nd_desc, struct acpi_nfit_desc, nd_desc); } -#ifdef CONFIG_PROVE_LOCKING -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); - mutex_lock(&dev->lockdep_mutex); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); - device_unlock(dev); -} -#else -static inline void nfit_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nfit_device_unlock(struct device *dev) -{ - device_unlock(dev); -} -#endif - const guid_t *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz); void acpi_nfit_shutdown(void *data); From 81beea55cb7407a52bac16f7d01a415e008c910f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:39 -0700 Subject: [PATCH 32/47] nvdimm: Drop nd_device_lock() Now that all NVDIMM subsystem locking is validated with custom lock classes, there is no need for the custom usage of the lockdep_mutex. Cc: Vishal Verma Cc: Dave Jiang Cc: Ira Weiny Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165055521979.3745911.10751769706032029999.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/btt_devs.c | 16 ++++---- drivers/nvdimm/bus.c | 24 +++++------- drivers/nvdimm/core.c | 10 ++--- drivers/nvdimm/dimm_devs.c | 8 ++-- drivers/nvdimm/namespace_devs.c | 36 +++++++++--------- drivers/nvdimm/nd-core.h | 66 --------------------------------- drivers/nvdimm/pfn_devs.c | 24 ++++++------ drivers/nvdimm/pmem.c | 2 +- drivers/nvdimm/region.c | 2 +- drivers/nvdimm/region_devs.c | 16 ++++---- lib/Kconfig.debug | 17 --------- 11 files changed, 66 insertions(+), 155 deletions(-) diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 1208217966682..fabbb31f2c35d 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -50,14 +50,14 @@ static ssize_t sector_size_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_btt->lbasize, btt_lbasize_supported); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -79,11 +79,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -108,13 +108,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -126,14 +126,14 @@ static ssize_t size_show(struct device *dev, struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = sprintf(buf, "%llu\n", nd_btt->size); else { /* no size to convey if the btt instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 85ffa04e93c27..a4fc17db707c9 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -88,10 +88,7 @@ static int nvdimm_bus_probe(struct device *dev) dev->driver->name, dev_name(dev)); nvdimm_bus_probe_start(nvdimm_bus); - debug_nvdimm_lock(dev); rc = nd_drv->probe(dev); - debug_nvdimm_unlock(dev); - if ((rc == 0 || rc == -EOPNOTSUPP) && dev->parent && is_nd_region(dev->parent)) nd_region_advance_seeds(to_nd_region(dev->parent), dev); @@ -111,11 +108,8 @@ static void nvdimm_bus_remove(struct device *dev) struct module *provider = to_bus_provider(dev); struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); - if (nd_drv->remove) { - debug_nvdimm_lock(dev); + if (nd_drv->remove) nd_drv->remove(dev); - debug_nvdimm_unlock(dev); - } dev_dbg(&nvdimm_bus->dev, "%s.remove(%s)\n", dev->driver->name, dev_name(dev)); @@ -139,7 +133,7 @@ static void nvdimm_bus_shutdown(struct device *dev) void nd_device_notify(struct device *dev, enum nvdimm_event event) { - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_device_driver *nd_drv; @@ -147,7 +141,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event) if (nd_drv->notify) nd_drv->notify(dev, event); } - nd_device_unlock(dev); + device_unlock(dev); } EXPORT_SYMBOL(nd_device_notify); @@ -569,9 +563,9 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode) * or otherwise let the async path handle it if the * unregistration was already queued. */ - nd_device_lock(dev); + device_lock(dev); killed = kill_device(dev); - nd_device_unlock(dev); + device_unlock(dev); if (!killed) return; @@ -930,10 +924,10 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) if (nvdimm_bus->probe_active == 0) break; nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); wait_event(nvdimm_bus->wait, nvdimm_bus->probe_active == 0); - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); } while (true); } @@ -1167,7 +1161,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, goto out; } - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf); if (rc) @@ -1189,7 +1183,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, out_unlock: nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); out: kfree(in_env); kfree(out_env); diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 69a03358817f1..144926b7451cc 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -215,7 +215,7 @@ EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev); * * Enforce that uuids can only be changed while the device is disabled * (driver detached) - * LOCKING: expects nd_device_lock() is held on entry + * LOCKING: expects device_lock() is held on entry */ int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf, size_t len) @@ -316,15 +316,15 @@ static DEVICE_ATTR_RO(provider); static int flush_namespaces(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); return 0; } static int flush_regions_dimms(struct device *dev, void *data) { - nd_device_lock(dev); - nd_device_unlock(dev); + device_lock(dev); + device_unlock(dev); device_for_each_child(dev, NULL, flush_namespaces); return 0; } diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 27b8f8cd1b489..c7c9805774911 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -341,9 +341,9 @@ static ssize_t available_slots_show(struct device *dev, { ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = __available_slots_show(dev_get_drvdata(dev), buf); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -386,12 +386,12 @@ static ssize_t security_store(struct device *dev, * done while probing is idle and the DIMM is not in active use * in any region. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = nvdimm_security_store(dev, buf, len); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 5197a813849dc..bf4f5c09d9b1b 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -264,7 +264,7 @@ static ssize_t alt_name_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __alt_name_store(dev, buf, len); @@ -272,7 +272,7 @@ static ssize_t alt_name_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -846,7 +846,7 @@ static ssize_t size_store(struct device *dev, if (rc) return rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __size_store(dev, val); @@ -868,7 +868,7 @@ static ssize_t size_store(struct device *dev, dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1043,7 +1043,7 @@ static ssize_t uuid_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (to_ndns(dev)->claim) @@ -1059,7 +1059,7 @@ static ssize_t uuid_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1118,7 +1118,7 @@ static ssize_t sector_size_store(struct device *dev, } else return -ENXIO; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (to_ndns(dev)->claim) rc = -EBUSY; @@ -1129,7 +1129,7 @@ static ssize_t sector_size_store(struct device *dev, dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote", buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -1239,9 +1239,9 @@ static ssize_t holder_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : ""); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1278,7 +1278,7 @@ static ssize_t holder_class_store(struct device *dev, struct nd_region *nd_region = to_nd_region(dev->parent); int rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); rc = __holder_class_store(dev, buf); @@ -1286,7 +1286,7 @@ static ssize_t holder_class_store(struct device *dev, rc = nd_namespace_label_update(nd_region, dev); dev_dbg(dev, "%s(%d)\n", rc < 0 ? "fail " : "", rc); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc < 0 ? rc : len; } @@ -1297,7 +1297,7 @@ static ssize_t holder_class_show(struct device *dev, struct nd_namespace_common *ndns = to_ndns(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (ndns->claim_class == NVDIMM_CCLASS_NONE) rc = sprintf(buf, "\n"); else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) || @@ -1309,7 +1309,7 @@ static ssize_t holder_class_show(struct device *dev, rc = sprintf(buf, "dax\n"); else rc = sprintf(buf, "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1323,7 +1323,7 @@ static ssize_t mode_show(struct device *dev, char *mode; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); claim = ndns->claim; if (claim && is_nd_btt(claim)) mode = "safe"; @@ -1336,7 +1336,7 @@ static ssize_t mode_show(struct device *dev, else mode = "raw"; rc = sprintf(buf, "%s\n", mode); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -1456,8 +1456,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) * Flush any in-progess probes / removals in the driver * for the raw personality of this namespace. */ - nd_device_lock(&ndns->dev); - nd_device_unlock(&ndns->dev); + device_lock(&ndns->dev); + device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", dev_name(dev)); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 99b04106434be..cc86ee09d7c08 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -161,70 +161,4 @@ static inline void devm_nsio_disable(struct device *dev, { } #endif - -#ifdef CONFIG_PROVE_NVDIMM_LOCKING -extern struct class *nd_class; - -enum { - LOCK_BUS, - LOCK_NDCTL, - LOCK_REGION, - LOCK_DIMM = LOCK_REGION, - LOCK_NAMESPACE, - LOCK_CLAIM, -}; - -static inline void debug_nvdimm_lock(struct device *dev) -{ - if (is_nd_region(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION); - else if (is_nvdimm(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM); - else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM); - else if (dev->parent && (is_nd_region(dev->parent))) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE); - else if (is_nvdimm_bus(dev)) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS); - else if (dev->class && dev->class == nd_class) - mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL); - else - dev_WARN(dev, "unknown lock level\n"); -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ - mutex_unlock(&dev->lockdep_mutex); -} - -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); - debug_nvdimm_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - debug_nvdimm_unlock(dev); - device_unlock(dev); -} -#else -static inline void nd_device_lock(struct device *dev) -{ - device_lock(dev); -} - -static inline void nd_device_unlock(struct device *dev) -{ - device_unlock(dev); -} - -static inline void debug_nvdimm_lock(struct device *dev) -{ -} - -static inline void debug_nvdimm_unlock(struct device *dev) -{ -} -#endif #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 0cd396d0024c8..0e92ab4b32833 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -55,7 +55,7 @@ static ssize_t mode_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); if (dev->driver) rc = -EBUSY; @@ -77,7 +77,7 @@ static ssize_t mode_store(struct device *dev, dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -123,14 +123,14 @@ static ssize_t align_store(struct device *dev, unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_size_select_store(dev, buf, &nd_pfn->align, nd_pfn_supported_alignments(aligns)); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -152,11 +152,11 @@ static ssize_t uuid_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); - nd_device_unlock(dev); + device_unlock(dev); return rc ? rc : len; } @@ -181,13 +181,13 @@ static ssize_t namespace_store(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, buf[len - 1] == '\n' ? "" : "\n"); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -199,7 +199,7 @@ static ssize_t resource_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -213,7 +213,7 @@ static ssize_t resource_show(struct device *dev, /* no address to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } @@ -225,7 +225,7 @@ static ssize_t size_show(struct device *dev, struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) { struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; u64 offset = __le64_to_cpu(pfn_sb->dataoff); @@ -241,7 +241,7 @@ static ssize_t size_show(struct device *dev, /* no size to convey if the pfn instance is disabled */ rc = -ENXIO; } - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 58d95242a836b..3992521c151fa 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -573,7 +573,7 @@ static void nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)); else { /* - * Note, this assumes nd_device_lock() context to not + * Note, this assumes device_lock() context to not * race nd_pmem_notify() */ sysfs_put(pmem->bb_state); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 188560b1c110d..390123d293ea1 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -95,7 +95,7 @@ static void nd_region_remove(struct device *dev) nvdimm_bus_unlock(dev); /* - * Note, this assumes nd_device_lock() context to not race + * Note, this assumes device_lock() context to not race * nd_region_notify() */ sysfs_put(nd_region->bb_state); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 8650e8d39c895..d976260eca7ad 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -279,7 +279,7 @@ static ssize_t set_cookie_show(struct device *dev, * the v1.1 namespace label cookie definition. To read all this * data we need to wait for probing to settle. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); if (nd_region->ndr_mappings) { @@ -296,7 +296,7 @@ static ssize_t set_cookie_show(struct device *dev, } } nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); if (rc) return rc; @@ -353,12 +353,12 @@ static ssize_t available_size_show(struct device *dev, * memory nvdimm_bus_lock() is dropped, but that's userspace's * problem to not race itself. */ - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_available_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -370,12 +370,12 @@ static ssize_t max_available_extent_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); unsigned long long available = 0; - nd_device_lock(dev); + device_lock(dev); nvdimm_bus_lock(dev); wait_nvdimm_bus_probe_idle(dev); available = nd_region_allocatable_dpa(nd_region); nvdimm_bus_unlock(dev); - nd_device_unlock(dev); + device_unlock(dev); return sprintf(buf, "%llu\n", available); } @@ -549,12 +549,12 @@ static ssize_t region_badblocks_show(struct device *dev, struct nd_region *nd_region = to_nd_region(dev); ssize_t rc; - nd_device_lock(dev); + device_lock(dev); if (dev->driver) rc = badblocks_show(&nd_region->bb, buf, 0); else rc = -ENXIO; - nd_device_unlock(dev); + device_unlock(dev); return rc; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4a2c853c948ba..cfe3b092c31da 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1544,23 +1544,6 @@ config CSD_LOCK_WAIT_DEBUG include the IPI handler function currently executing (if any) and relevant stack traces. -choice - prompt "Lock debugging: prove subsystem device_lock() correctness" - depends on PROVE_LOCKING - help - For subsystems that have instrumented their usage of the device_lock() - with nested annotations, enable lock dependency checking. The locking - hierarchy 'subclass' identifiers are not compatible across - sub-systems, so only one can be enabled at a time. - -config PROVE_NVDIMM_LOCKING - bool "NVDIMM" - depends on LIBNVDIMM - help - Enable lockdep to validate nd_device_lock() usage. - -endchoice - endmenu # lock debugging config TRACE_IRQFLAGS From fd3abd2cafa46955846d731b9a6ded2c19ab73d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 21 Apr 2022 08:33:45 -0700 Subject: [PATCH 33/47] device-core: Kill the lockdep_mutex Per Peter [1], the lockdep API has native support for all the use cases lockdep_mutex was attempting to enable. Now that all lockdep_mutex users have been converted to those APIs, drop this lock. Link: https://lore.kernel.org/r/Ylf0dewci8myLvoW@hirez.programming.kicks-ass.net [1] Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Suggested-by: Peter Zijlstra Reviewed-by: Ira Weiny Acked-by: Greg Kroah-Hartman Link: https://lore.kernel.org/r/165055522548.3745911.14298368286915484086.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/base/core.c | 3 --- include/linux/device.h | 5 ----- 2 files changed, 8 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 3d6430eb0c6a1..2eede2ec3d645 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -2864,9 +2864,6 @@ void device_initialize(struct device *dev) kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); -#ifdef CONFIG_PROVE_LOCKING - mutex_init(&dev->lockdep_mutex); -#endif lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); diff --git a/include/linux/device.h b/include/linux/device.h index 833b0b3b01932..073f1b0126ac9 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -400,8 +400,6 @@ struct dev_msi_info { * This identifies the device type and carries type-specific * information. * @mutex: Mutex to synchronize calls to its driver. - * @lockdep_mutex: An optional debug lock that a subsystem can use as a - * peer lock to gain localized lockdep coverage of the device_lock. * @bus: Type of bus device is on. * @driver: Which driver has allocated this * @platform_data: Platform data specific to the device. @@ -499,9 +497,6 @@ struct device { core doesn't touch it */ void *driver_data; /* Driver data, set and get with dev_set_drvdata/dev_get_drvdata */ -#ifdef CONFIG_PROVE_LOCKING - struct mutex lockdep_mutex; -#endif struct mutex mutex; /* mutex to synchronize calls to * its driver. */ From e6829d1bd3c4b58296ee9e412f7ed4d6cb390192 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 26 Apr 2022 13:23:05 -0700 Subject: [PATCH 34/47] nvdimm: Fix firmware activation deadlock scenarios Lockdep reports the following deadlock scenarios for CXL root device power-management, device_prepare(), operations, and device_shutdown() operations for 'nd_region' devices: Chain exists of: &nvdimm_region_key --> &nvdimm_bus->reconfig_mutex --> system_transition_mutex Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(system_transition_mutex); lock(&nvdimm_bus->reconfig_mutex); lock(system_transition_mutex); lock(&nvdimm_region_key); Chain exists of: &cxl_nvdimm_bridge_key --> acpi_scan_lock --> &cxl_root_key Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&cxl_root_key); lock(acpi_scan_lock); lock(&cxl_root_key); lock(&cxl_nvdimm_bridge_key); These stem from holding nvdimm_bus_lock() over hibernate_quiet_exec() which walks the entire system device topology taking device_lock() along the way. The nvdimm_bus_lock() is protecting against unregistration, multiple simultaneous ops callers, and preventing activate_show() from racing activate_store(). For the first 2, the lock is redundant. Unregistration already flushes all ops users, and sysfs already prevents multiple threads to be active in an ops handler at the same time. For the last userspace should already be waiting for its last activate_store() to complete, and does not need activate_show() to flush the write side, so this lock usage can be deleted in these attributes. Fixes: 48001ea50d17 ("PM, libnvdimm: Add runtime firmware activation support") Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165074883800.4116052.10737040861825806582.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams --- drivers/nvdimm/core.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 144926b7451cc..d91799b71d23a 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -368,9 +368,7 @@ static ssize_t capability_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); switch (cap) { case NVDIMM_FWA_CAP_QUIESCE: @@ -395,10 +393,8 @@ static ssize_t activate_show(struct device *dev, if (!nd_desc->fw_ops) return -EOPNOTSUPP; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); state = nd_desc->fw_ops->activate_state(nd_desc); - nvdimm_bus_unlock(dev); if (cap < NVDIMM_FWA_CAP_QUIESCE) return -EOPNOTSUPP; @@ -443,7 +439,6 @@ static ssize_t activate_store(struct device *dev, else return -EINVAL; - nvdimm_bus_lock(dev); state = nd_desc->fw_ops->activate_state(nd_desc); switch (state) { @@ -461,7 +456,6 @@ static ssize_t activate_store(struct device *dev, default: rc = -ENXIO; } - nvdimm_bus_unlock(dev); if (rc == 0) rc = len; @@ -484,10 +478,7 @@ static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribut if (!nd_desc->fw_ops) return 0; - nvdimm_bus_lock(dev); cap = nd_desc->fw_ops->capability(nd_desc); - nvdimm_bus_unlock(dev); - if (cap < NVDIMM_FWA_CAP_QUIESCE) return 0; From 2bcf3bbd348fc10260aa6243ff6a22a1882b5b35 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:15 -0700 Subject: [PATCH 35/47] cxl/mem: Drop mem_enabled check from wait_for_media() Media ready is asserted by the device independent of whether mem_enabled was ever set. Drop this check to allow for dropping wait_for_media() in favor of ->wait_media_ready(). Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver") Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291685501.1426646.10372821863672431074.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 401b0fbe21db9..c2d9dadf4a2e0 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -27,12 +27,8 @@ static int wait_for_media(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_endpoint_dvsec_info *info = &cxlds->info; int rc; - if (!info->mem_enabled) - return -EBUSY; - rc = cxlds->wait_media_ready(cxlds); if (rc) return rc; From 1e14c9fbb55fbc48eb88b55d1736c994b1deb631 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:20 -0700 Subject: [PATCH 36/47] cxl/pci: Consolidate wait_for_media() and wait_for_media_ready() Now that wait_for_media() does nothing supplemental to wait_for_media_ready() just promote wait_for_media_ready() to a common helper and drop wait_for_media(). Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291686046.1426646.4390664747934592185.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 19 +------------------ drivers/cxl/pci.c | 4 ++-- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index c2d9dadf4a2e0..7622cfefa1b0f 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -24,23 +24,6 @@ * in higher level operations. */ -static int wait_for_media(struct cxl_memdev *cxlmd) -{ - struct cxl_dev_state *cxlds = cxlmd->cxlds; - int rc; - - rc = cxlds->wait_media_ready(cxlds); - if (rc) - return rc; - - /* - * We know the device is active, and enabled, if any ranges are non-zero - * we'll need to check later before adding the port since that owns the - * HDM decoder registers. - */ - return 0; -} - static int create_endpoint(struct cxl_memdev *cxlmd, struct cxl_port *parent_port) { @@ -157,7 +140,7 @@ static int cxl_mem_probe(struct device *dev) if (work_pending(&cxlmd->detach_work)) return -EBUSY; - rc = wait_for_media(cxlmd); + rc = cxlds->wait_media_ready(cxlds); if (rc) { dev_err(dev, "Media not active (%d)\n", rc); return rc; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index e7ab9a34d7189..435f9f89b7937 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -423,7 +423,7 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) * Wait up to @mbox_ready_timeout for the device to report memory * active. */ -static int wait_for_media_ready(struct cxl_dev_state *cxlds) +static int cxl_await_media_ready(struct cxl_dev_state *cxlds) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); int d = cxlds->cxl_dvsec; @@ -593,7 +593,7 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); - cxlds->wait_media_ready = wait_for_media_ready; + cxlds->wait_media_ready = cxl_await_media_ready; rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) From 194d5edadf0b403f6de2be89c484a01c83ee269f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:26 -0700 Subject: [PATCH 37/47] cxl/pci: Drop wait_for_valid() from cxl_await_media_ready() A check mem_info_valid already happens in __cxl_dvsec_ranges(). Rely on that instead of calling wait_for_valid again. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291686632.1426646.7479581732894574486.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/pci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 435f9f89b7937..91b266911e522 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -431,10 +431,6 @@ static int cxl_await_media_ready(struct cxl_dev_state *cxlds) u64 md_status; int rc, i; - rc = wait_for_valid(cxlds); - if (rc) - return rc; - for (i = mbox_ready_timeout; i; i--) { u32 temp; From 76a4121e86649bf381aa32cb69ede913def57202 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:31 -0700 Subject: [PATCH 38/47] cxl/mem: Fix cxl_mem_probe() error exit The addition of cxl_mem_active() broke error exit scenarios for cxl_mem_probe(). Return early rather than proceed with disabling suspend, and update the label name since it is no longer a terminal "out" label that exits the function. Fixes: 9ea4dcf49878 ("PM: CXL: Disable suspend") Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291687176.1426646.15449254938752532784.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 7622cfefa1b0f..184549e5093fb 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -171,13 +171,15 @@ static int cxl_mem_probe(struct device *dev) dev_err(dev, "CXL port topology %s not enabled\n", dev_name(&parent_port->dev)); rc = -ENXIO; - goto out; + goto unlock; } rc = create_endpoint(cxlmd, parent_port); -out: +unlock: device_unlock(&parent_port->dev); put_device(&parent_port->dev); + if (rc) + return rc; /* * The kernel may be operating out of CXL memory on this device, From 75b7ae29991f945b69c10d75b861d7d5e90bd541 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:37 -0700 Subject: [PATCH 39/47] cxl/mem: Validate port connectivity before dvsec ranges In preparation for validating DVSEC ranges against the platform declared CXL memory ranges (ACPI CFMWS) move port enumeration before the endpoint's decoder validation. Ultimately this logic will move to the port driver, but create a bisect point before that larger move. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291687749.1426646.18091538443879226995.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 184549e5093fb..80e75a4104993 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -140,22 +140,6 @@ static int cxl_mem_probe(struct device *dev) if (work_pending(&cxlmd->detach_work)) return -EBUSY; - rc = cxlds->wait_media_ready(cxlds); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } - - /* - * If DVSEC ranges are being used instead of HDM decoder registers there - * is no use in trying to manage those. - */ - if (!cxl_hdm_decode_init(cxlds)) { - dev_err(dev, - "Legacy range registers configuration prevents HDM operation.\n"); - return -EBUSY; - } - rc = devm_cxl_enumerate_ports(cxlmd); if (rc) return rc; @@ -181,6 +165,22 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; + rc = cxlds->wait_media_ready(cxlds); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } + + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ + if (!cxl_hdm_decode_init(cxlds)) { + dev_err(dev, + "Legacy range registers configuration prevents HDM operation.\n"); + return -EBUSY; + } + /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device From 2e4ba0ec978335b4b550bbed95cb198ac3a00745 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:43 -0700 Subject: [PATCH 40/47] cxl/pci: Move cxl_await_media_ready() to the core Allow cxl_await_media_ready() to be mocked for testing purposes rather than carrying the maintenance burden of an indirect function call in the mainline driver. With the move cxl_await_media_ready() can no longer reuse the mailbox timeout override, so add a media_ready_timeout module parameter to the core to backfill. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291688340.1426646.4755627801983775011.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 48 +++++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 3 +-- drivers/cxl/mem.c | 2 +- drivers/cxl/pci.c | 45 +------------------------------- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/test/mem.c | 7 ----- tools/testing/cxl/test/mock.c | 15 +++++++++++ 7 files changed, 67 insertions(+), 54 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index c9a494d6976a4..603945f491741 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -1,8 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include #include +#include #include #include +#include #include #include "core.h" @@ -13,6 +16,10 @@ * a set of helpers for CXL interactions which occur via PCIe. */ +static unsigned short media_ready_timeout = 60; +module_param(media_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); + struct cxl_walk_context { struct pci_bus *bus; struct cxl_port *port; @@ -94,3 +101,44 @@ int devm_cxl_port_enumerate_dports(struct cxl_port *port) return ctx.count; } EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); + +/* + * Wait up to @media_ready_timeout for the device to report memory + * active. + */ +int cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + for (i = media_ready_timeout; i; i--) { + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; + + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } + + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + media_ready_timeout); + return -ETIMEDOUT; + } + + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 7235d2f976e5c..843916c1dab6e 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -192,7 +192,6 @@ struct cxl_endpoint_dvsec_info { * @info: Cached DVSEC information about the device. * @serial: PCIe Device Serial Number * @mbox_send: @dev specific transport for transmitting mailbox commands - * @wait_media_ready: @dev specific method to await media ready * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -227,7 +226,6 @@ struct cxl_dev_state { u64 serial; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); - int (*wait_media_ready)(struct cxl_dev_state *cxlds); }; enum cxl_opcode { @@ -348,6 +346,7 @@ struct cxl_mem_command { int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in, size_t in_size, void *out, size_t out_size); int cxl_dev_state_identify(struct cxl_dev_state *cxlds); +int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_dev_state *cxlds); int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 80e75a4104993..8c3a1c85a7aea 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -165,7 +165,7 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; - rc = cxlds->wait_media_ready(cxlds); + rc = cxl_await_media_ready(cxlds); if (rc) { dev_err(dev, "Media not active (%d)\n", rc); return rc; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 91b266911e522..1bf880fa1fb8f 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -48,8 +48,7 @@ */ static unsigned short mbox_ready_timeout = 60; module_param(mbox_ready_timeout, ushort, 0644); -MODULE_PARM_DESC(mbox_ready_timeout, - "seconds to wait for mailbox ready / memory active status"); +MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { @@ -419,46 +418,6 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) return -ETIMEDOUT; } -/* - * Wait up to @mbox_ready_timeout for the device to report memory - * active. - */ -static int cxl_await_media_ready(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec; - bool active = false; - u64 md_status; - int rc, i; - - for (i = mbox_ready_timeout; i; i--) { - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); - if (rc) - return rc; - - active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); - if (active) - break; - msleep(1000); - } - - if (!active) { - dev_err(&pdev->dev, - "timeout awaiting memory active after %d seconds\n", - mbox_ready_timeout); - return -ETIMEDOUT; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!CXLMDEV_READY(md_status)) - return -EIO; - - return 0; -} - /* * Return positive number of non-zero ranges on success and a negative * error code on failure. The cxl_mem driver depends on ranges == 0 to @@ -589,8 +548,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev_warn(&pdev->dev, "Device DVSEC not present, skip CXL.mem init\n"); - cxlds->wait_media_ready = cxl_await_media_ready; - rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 82e49ab0937d5..6007fe770122a 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -8,6 +8,7 @@ ldflags-y += --wrap=devm_cxl_port_enumerate_dports ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders +ldflags-y += --wrap=cxl_await_media_ready DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index b6b726eff3e25..c519ace17b410 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -237,12 +237,6 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * return rc; } -static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds) -{ - msleep(100); - return 0; -} - static void label_area_release(void *lsa) { vfree(lsa); @@ -278,7 +272,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; - cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; rc = cxl_enumerate_cmds(cxlds); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6e8c9d63c92d9..2c01d81ab0149 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -193,6 +193,21 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); +int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_dev(cxlds->dev)) + rc = 0; + else + rc = cxl_await_media_ready(cxlds); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); From 14d78874077442d1d0f08129f5a0ea5070984b4b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:48 -0700 Subject: [PATCH 41/47] cxl/mem: Consolidate CXL DVSEC Range enumeration in the core In preparation for fixing the setting of the 'mem_enabled' bit in CXL DVSEC Control register, move all CXL DVSEC range enumeration into the same source file. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291688886.1426646.15046138604010482084.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 129 ++++++++++++++++++++++++++++++++ drivers/cxl/cxlmem.h | 1 - drivers/cxl/cxlpci.h | 4 + drivers/cxl/mem.c | 14 ++-- drivers/cxl/pci.c | 135 ---------------------------------- tools/testing/cxl/Kbuild | 1 + tools/testing/cxl/test/mem.c | 10 --- tools/testing/cxl/test/mock.c | 16 ++++ 8 files changed, 158 insertions(+), 152 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 603945f491741..ea67117219019 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -142,3 +142,132 @@ int cxl_await_media_ready(struct cxl_dev_state *cxlds) return 0; } EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL); + +static int wait_for_valid(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; + + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + return -ETIMEDOUT; +} + +/* + * Return positive number of non-zero ranges on success and a negative + * error code on failure. The cxl_mem driver depends on ranges == 0 to + * init HDM operation. + */ +int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int hdm_count, rc, i, ranges = 0; + struct device *dev = &pdev->dev; + int d = cxlds->cxl_dvsec; + u16 cap, ctrl; + + if (!d) { + dev_dbg(dev, "No DVSEC Capability\n"); + return -ENXIO; + } + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { + dev_dbg(dev, "Not MEM Capable\n"); + return -ENXIO; + } + + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) { + dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); + return rc; + } + + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info->dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + ranges++; + } + + info->ranges = ranges; + + return 0; +} +EXPORT_SYMBOL_NS_GPL(cxl_dvsec_ranges, CXL); diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 843916c1dab6e..60d10ee1e7fcd 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -222,7 +222,6 @@ struct cxl_dev_state { u64 next_persistent_bytes; resource_size_t component_reg_phys; - struct cxl_endpoint_dvsec_info info; u64 serial; int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 329e7ea3f36a6..ad1b628431959 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -72,4 +72,8 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, } int devm_cxl_port_enumerate_dports(struct cxl_port *port); +struct cxl_dev_state; +struct cxl_endpoint_dvsec_info; +int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 8c3a1c85a7aea..0cfbde134fc74 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -58,18 +58,15 @@ static int create_endpoint(struct cxl_memdev *cxlmd, * decoders, or if it can not be determined if DVSEC Ranges are in use. * Otherwise, returns true. */ -__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) +__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { - struct cxl_endpoint_dvsec_info *info = &cxlds->info; struct cxl_register_map map; struct cxl_component_reg_map *cmap = &map.component_map; bool global_enable, retval = false; void __iomem *crb; u32 global_ctrl; - if (info->ranges < 0) - return false; - /* map hdm decoder */ crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); if (!crb) { @@ -125,6 +122,7 @@ static void enable_suspend(void *data) static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_endpoint_dvsec_info info = { 0 }; struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -165,6 +163,10 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; + rc = cxl_dvsec_ranges(cxlds, &info); + if (rc) + return rc; + rc = cxl_await_media_ready(cxlds); if (rc) { dev_err(dev, "Media not active (%d)\n", rc); @@ -175,7 +177,7 @@ static int cxl_mem_probe(struct device *dev) * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!cxl_hdm_decode_init(cxlds)) { + if (!cxl_hdm_decode_init(cxlds, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 1bf880fa1fb8f..5a0ae46d4989d 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -386,139 +386,6 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } -static int wait_for_valid(struct cxl_dev_state *cxlds) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int d = cxlds->cxl_dvsec, rc; - u32 val; - - /* - * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high - * and Size Low registers are valid. Must be set within 1 second of - * deassertion of reset to CXL device. Likely it is already set by the - * time this runs, but otherwise give a 1.5 second timeout in case of - * clock skew. - */ - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - msleep(1500); - - rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); - if (rc) - return rc; - - if (val & CXL_DVSEC_MEM_INFO_VALID) - return 0; - - return -ETIMEDOUT; -} - -/* - * Return positive number of non-zero ranges on success and a negative - * error code on failure. The cxl_mem driver depends on ranges == 0 to - * init HDM operation. - */ -static int __cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - struct pci_dev *pdev = to_pci_dev(cxlds->dev); - int hdm_count, rc, i, ranges = 0; - struct device *dev = &pdev->dev; - int d = cxlds->cxl_dvsec; - u16 cap, ctrl; - - if (!d) { - dev_dbg(dev, "No DVSEC Capability\n"); - return -ENXIO; - } - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); - if (rc) - return rc; - - rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); - if (rc) - return rc; - - if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { - dev_dbg(dev, "Not MEM Capable\n"); - return -ENXIO; - } - - /* - * It is not allowed by spec for MEM.capable to be set and have 0 legacy - * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this - * driver is for a spec defined class code which must be CXL.mem - * capable, there is no point in continuing to enable CXL.mem. - */ - hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); - if (!hdm_count || hdm_count > 2) - return -EINVAL; - - rc = wait_for_valid(cxlds); - if (rc) { - dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc); - return rc; - } - - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); - - for (i = 0; i < hdm_count; i++) { - u64 base, size; - u32 temp; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); - if (rc) - return rc; - - size = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); - if (rc) - return rc; - - size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); - if (rc) - return rc; - - base = (u64)temp << 32; - - rc = pci_read_config_dword( - pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); - if (rc) - return rc; - - base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - - info->dvsec_range[i] = (struct range) { - .start = base, - .end = base + size - 1 - }; - - if (size) - ranges++; - } - - return ranges; -} - -static void cxl_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info = &cxlds->info; - - info->ranges = __cxl_dvsec_ranges(cxlds, info); -} - static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -583,8 +450,6 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - cxl_dvsec_ranges(cxlds); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 6007fe770122a..2ea6fcb8baa56 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -9,6 +9,7 @@ ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready +ldflags-y += --wrap=cxl_dvsec_ranges DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index c519ace17b410..6b9239b2afd4e 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -242,14 +242,6 @@ static void label_area_release(void *lsa) vfree(lsa); } -static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds) -{ - struct cxl_endpoint_dvsec_info *info; - - info = &cxlds->info; - info->mem_enabled = true; -} - static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -286,8 +278,6 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - mock_validate_dvsec_ranges(cxlds); - cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 2c01d81ab0149..d6aa644822db4 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,6 +208,22 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); +int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) +{ + int rc = 0, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_dev(cxlds->dev)) + info->mem_enabled = 1; + else + rc = cxl_dvsec_ranges(cxlds, info); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_ranges, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); From dd2d42ad6f422076d1bd49b132bec74376c26f5c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:34:54 -0700 Subject: [PATCH 42/47] cxl/mem: Skip range enumeration if mem_enable clear When a device does not have mem_enable set then the current range settings are moot. Skip the enumeration and cause cxl_hdm_decode_init() to proceed directly to enable the HDM Decoder Capability. Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291689442.1426646.18012291761753694336.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 2 ++ drivers/cxl/mem.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index ea67117219019..f3e59f8b6621c 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -224,6 +224,8 @@ int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, } info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + if (!info->mem_enabled) + return 0; for (i = 0; i < hdm_count; i++) { u64 base, size; diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 0cfbde134fc74..902d1f6e189e1 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -92,7 +92,7 @@ __mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds, * are expected even though Linux does not require or maintain that * match. */ - if (!global_enable && info->ranges) + if (!global_enable && info->mem_enabled && info->ranges) goto out; retval = true; From a12562bb70776093b270f79a4b6ef18f4bcead2b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:00 -0700 Subject: [PATCH 43/47] cxl/mem: Merge cxl_dvsec_ranges() and cxl_hdm_decode_init() In preparation for changing how the driver handles 'mem_enable' in the CXL DVSEC control register. Merge the contents of cxl_hdm_decode_init() into cxl_dvsec_ranges() and rename the combined function cxl_hdm_decode_init(). The possible cleanups and fixes that result from this merge are saved for a follow-on change. Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165291690027.1426646.10249756632415633752.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 82 ++++++++++++++++++++++++++++++++--- drivers/cxl/cxlpci.h | 4 +- drivers/cxl/mem.c | 80 +--------------------------------- tools/testing/cxl/Kbuild | 3 +- tools/testing/cxl/mock_mem.c | 10 ----- tools/testing/cxl/test/mock.c | 8 ++-- 6 files changed, 83 insertions(+), 104 deletions(-) delete mode 100644 tools/testing/cxl/mock_mem.c diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index f3e59f8b6621c..0fbda1a1ca1b1 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -175,13 +175,71 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) return -ETIMEDOUT; } -/* - * Return positive number of non-zero ranges on success and a negative - * error code on failure. The cxl_mem driver depends on ranges == 0 to - * init HDM operation. +static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) +{ + struct cxl_register_map map; + struct cxl_component_reg_map *cmap = &map.component_map; + bool global_enable, retval = false; + void __iomem *crb; + u32 global_ctrl; + + /* map hdm decoder */ + crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_dbg(cxlds->dev, "Failed to map component registers\n"); + return false; + } + + cxl_probe_component_regs(cxlds->dev, crb, cmap); + if (!cmap->hdm_decoder.valid) { + dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); + goto out; + } + + global_ctrl = readl(crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; + + /* + * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base + * [High,Low] when HDM operation is enabled the range register values + * are ignored by the device, but the spec also recommends matching the + * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges + * are expected even though Linux does not require or maintain that + * match. + */ + if (!global_enable && info->mem_enabled && info->ranges) + goto out; + + retval = true; + + /* + * Permanently (for this boot at least) opt the device into HDM + * operation. Individual HDM decoders still need to be enabled after + * this point. + */ + if (!global_enable) { + dev_dbg(cxlds->dev, "Enabling HDM decode\n"); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + } + +out: + iounmap(crb); + return retval; +} + +/** + * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * @info: DVSEC Range cached enumeration + * + * Try to enable the endpoint's HDM Decoder Capability */ -int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); int hdm_count, rc, i, ranges = 0; @@ -270,6 +328,16 @@ int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, info->ranges = ranges; + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ + if (!__cxl_hdm_decode_init(cxlds, info)) { + dev_err(dev, + "Legacy range registers configuration prevents HDM operation.\n"); + return -EBUSY; + } + return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_dvsec_ranges, CXL); +EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL); diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index ad1b628431959..202fdaa8d293f 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -74,6 +74,6 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; struct cxl_endpoint_dvsec_info; -int cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info); +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 902d1f6e189e1..2a5dc92d566fe 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -46,74 +46,6 @@ static int create_endpoint(struct cxl_memdev *cxlmd, return cxl_endpoint_autoremove(cxlmd, endpoint); } -/** - * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint - * @cxlds: Device state - * - * Additionally, enables global HDM decoding. Warning: don't call this outside - * of probe. Once probe is complete, the port driver owns all access to the HDM - * decoder registers. - * - * Returns: false if DVSEC Ranges are being used instead of HDM - * decoders, or if it can not be determined if DVSEC Ranges are in use. - * Otherwise, returns true. - */ -__mock bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) -{ - struct cxl_register_map map; - struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, retval = false; - void __iomem *crb; - u32 global_ctrl; - - /* map hdm decoder */ - crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - dev_dbg(cxlds->dev, "Failed to map component registers\n"); - return false; - } - - cxl_probe_component_regs(cxlds->dev, crb, cmap); - if (!cmap->hdm_decoder.valid) { - dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); - goto out; - } - - global_ctrl = readl(crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; - - /* - * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base - * [High,Low] when HDM operation is enabled the range register values - * are ignored by the device, but the spec also recommends matching the - * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges - * are expected even though Linux does not require or maintain that - * match. - */ - if (!global_enable && info->mem_enabled && info->ranges) - goto out; - - retval = true; - - /* - * Permanently (for this boot at least) opt the device into HDM - * operation. Individual HDM decoders still need to be enabled after - * this point. - */ - if (!global_enable) { - dev_dbg(cxlds->dev, "Enabling HDM decode\n"); - writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); - } - -out: - iounmap(crb); - return retval; -} - static void enable_suspend(void *data) { cxl_mem_active_dec(); @@ -163,7 +95,7 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; - rc = cxl_dvsec_ranges(cxlds, &info); + rc = cxl_hdm_decode_init(cxlds, &info); if (rc) return rc; @@ -173,16 +105,6 @@ static int cxl_mem_probe(struct device *dev) return rc; } - /* - * If DVSEC ranges are being used instead of HDM decoder registers there - * is no use in trying to manage those. - */ - if (!cxl_hdm_decode_init(cxlds, &info)) { - dev_err(dev, - "Legacy range registers configuration prevents HDM operation.\n"); - return -EBUSY; - } - /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 2ea6fcb8baa56..33543231d453d 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -9,7 +9,7 @@ ldflags-y += --wrap=devm_cxl_setup_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready -ldflags-y += --wrap=cxl_dvsec_ranges +ldflags-y += --wrap=cxl_hdm_decode_init DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -36,7 +36,6 @@ cxl_port-y += config_check.o obj-m += cxl_mem.o cxl_mem-y := $(CXL_SRC)/mem.o -cxl_mem-y += mock_mem.o cxl_mem-y += config_check.o obj-m += cxl_core.o diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c deleted file mode 100644 index 69946f678cfa0..0000000000000 --- a/tools/testing/cxl/mock_mem.c +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ - -#include - -struct cxl_dev_state; -bool cxl_hdm_decode_init(struct cxl_dev_state *cxlds) -{ - return true; -} diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index d6aa644822db4..ddf0e7dd92499 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,8 +208,8 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); -int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_endpoint_dvsec_info *info) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); @@ -217,12 +217,12 @@ int __wrap_cxl_dvsec_ranges(struct cxl_dev_state *cxlds, if (ops && ops->is_mock_dev(cxlds->dev)) info->mem_enabled = 1; else - rc = cxl_dvsec_ranges(cxlds, info); + rc = cxl_hdm_decode_init(cxlds, info); put_cxl_mock_ops(index); return rc; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_ranges, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); From 92804edb11f065aadb3a4f398bed8a846a035cd3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:06 -0700 Subject: [PATCH 44/47] cxl/pci: Drop @info argument to cxl_hdm_decode_init() Now that nothing external to cxl_hdm_decode_init() considers 'struct cxl_endpoint_dvec_info' move it internal to cxl_hdm_decode_init(). Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291690612.1426646.7866084245521113414.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 15 +++++++-------- drivers/cxl/cxlpci.h | 4 +--- drivers/cxl/mem.c | 3 +-- tools/testing/cxl/test/mock.c | 9 +++------ 4 files changed, 12 insertions(+), 19 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 0fbda1a1ca1b1..7d2238edc3794 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -234,14 +234,13 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, /** * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint * @cxlds: Device state - * @info: DVSEC Range cached enumeration * * Try to enable the endpoint's HDM Decoder Capability */ -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); + struct cxl_endpoint_dvsec_info info = { 0 }; int hdm_count, rc, i, ranges = 0; struct device *dev = &pdev->dev; int d = cxlds->cxl_dvsec; @@ -281,8 +280,8 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, return rc; } - info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); - if (!info->mem_enabled) + info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + if (!info.mem_enabled) return 0; for (i = 0; i < hdm_count; i++) { @@ -317,7 +316,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; - info->dvsec_range[i] = (struct range) { + info.dvsec_range[i] = (struct range) { .start = base, .end = base + size - 1 }; @@ -326,13 +325,13 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, ranges++; } - info->ranges = ranges; + info.ranges = ranges; /* * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!__cxl_hdm_decode_init(cxlds, info)) { + if (!__cxl_hdm_decode_init(cxlds, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 202fdaa8d293f..53cd34f8813c5 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -73,7 +73,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -struct cxl_endpoint_dvsec_info; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info); +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 2a5dc92d566fe..8ce89d128e36a 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -54,7 +54,6 @@ static void enable_suspend(void *data) static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_endpoint_dvsec_info info = { 0 }; struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -95,7 +94,7 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds, &info); + rc = cxl_hdm_decode_init(cxlds); if (rc) return rc; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index ddf0e7dd92499..45ffbb8f519af 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,16 +208,13 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); -int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, - struct cxl_endpoint_dvsec_info *info) +bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_dev(cxlds->dev)) - info->mem_enabled = 1; - else - rc = cxl_hdm_decode_init(cxlds, info); + if (!ops || !ops->is_mock_dev(cxlds->dev)) + rc = cxl_hdm_decode_init(cxlds); put_cxl_mock_ops(index); return rc; From 5e5f4ad52f33c125af9b91d4c3b7cad59c13772e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:11 -0700 Subject: [PATCH 45/47] cxl/port: Move endpoint HDM Decoder Capability init to port driver The responsibility for establishing HDM Decoder Capability based operation is more closely tied to port enabling than memdev enabling which is concerned with port enumeration. This later enables reusing @cxlhdm for probing / controlling "global enable" for the HDM Decoder Capability. For now, just do the nominal move. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291691167.1426646.7936109077255288258.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/mem.c | 11 ----------- drivers/cxl/port.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 8ce89d128e36a..c310f1fd3db02 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -54,7 +54,6 @@ static void enable_suspend(void *data) static int cxl_mem_probe(struct device *dev) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_port *parent_port; int rc; @@ -94,16 +93,6 @@ static int cxl_mem_probe(struct device *dev) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds); - if (rc) - return rc; - - rc = cxl_await_media_ready(cxlds); - if (rc) { - dev_err(dev, "Media not active (%d)\n", rc); - return rc; - } - /* * The kernel may be operating out of CXL memory on this device, * there is no spec defined way to determine whether this device diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index d420da5fc39c9..a7deaeaf02760 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -38,11 +38,22 @@ static int cxl_port_probe(struct device *dev) if (is_cxl_endpoint(port)) { struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + struct cxl_dev_state *cxlds = cxlmd->cxlds; get_device(&cxlmd->dev); rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); if (rc) return rc; + + rc = cxl_hdm_decode_init(cxlds); + if (rc) + return rc; + + rc = cxl_await_media_ready(cxlds); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } } else { rc = devm_cxl_port_enumerate_dports(port); if (rc < 0) From fcfbc93cc33ec601f00f113eca6fc484b930532d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 18 May 2022 16:35:17 -0700 Subject: [PATCH 46/47] cxl/port: Reuse 'struct cxl_hdm' context for hdm init The port driver maps component registers for port operations. Reuse that mapping for HDM Decoder Capability setup / enable. Move devm_cxl_setup_hdm() before cxl_hdm_decode_init() and plumb @cxlhdm through the hdm init helpers. Reviewed-by: Ira Weiny Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/165291691712.1426646.14336397551571515480.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 39 +++++++++-------------------------- drivers/cxl/cxlpci.h | 2 +- drivers/cxl/port.c | 25 ++++++++++++---------- tools/testing/cxl/test/mock.c | 5 +++-- 4 files changed, 28 insertions(+), 43 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 7d2238edc3794..3305e9b750af1 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -176,29 +176,14 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) } static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info) { - struct cxl_register_map map; - struct cxl_component_reg_map *cmap = &map.component_map; - bool global_enable, retval = false; - void __iomem *crb; + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + bool global_enable; u32 global_ctrl; - /* map hdm decoder */ - crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); - if (!crb) { - dev_dbg(cxlds->dev, "Failed to map component registers\n"); - return false; - } - - cxl_probe_component_regs(cxlds->dev, crb, cmap); - if (!cmap->hdm_decoder.valid) { - dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); - goto out; - } - - global_ctrl = readl(crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; /* @@ -210,9 +195,7 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, * match. */ if (!global_enable && info->mem_enabled && info->ranges) - goto out; - - retval = true; + return false; /* * Permanently (for this boot at least) opt the device into HDM @@ -222,22 +205,20 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, if (!global_enable) { dev_dbg(cxlds->dev, "Enabling HDM decode\n"); writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - crb + cmap->hdm_decoder.offset + - CXL_HDM_DECODER_CTRL_OFFSET); + hdm + CXL_HDM_DECODER_CTRL_OFFSET); } -out: - iounmap(crb); - return retval; + return true; } /** * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint * @cxlds: Device state + * @cxlhdm: Mapped HDM decoder Capability * * Try to enable the endpoint's HDM Decoder Capability */ -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds) +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) { struct pci_dev *pdev = to_pci_dev(cxlds->dev); struct cxl_endpoint_dvsec_info info = { 0 }; @@ -331,7 +312,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds) * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ - if (!__cxl_hdm_decode_init(cxlds, &info)) { + if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n"); return -EBUSY; diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h index 53cd34f8813c5..fce1c11729c24 100644 --- a/drivers/cxl/cxlpci.h +++ b/drivers/cxl/cxlpci.h @@ -73,5 +73,5 @@ static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, int devm_cxl_port_enumerate_dports(struct cxl_port *port); struct cxl_dev_state; -int cxl_hdm_decode_init(struct cxl_dev_state *cxlds); +int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm); #endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c index a7deaeaf02760..3cf308f114c47 100644 --- a/drivers/cxl/port.c +++ b/drivers/cxl/port.c @@ -36,6 +36,19 @@ static int cxl_port_probe(struct device *dev) struct cxl_hdm *cxlhdm; int rc; + + if (!is_cxl_endpoint(port)) { + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) + return rc; + if (rc == 1) + return devm_cxl_add_passthrough_decoder(port); + } + + cxlhdm = devm_cxl_setup_hdm(port); + if (IS_ERR(cxlhdm)) + return PTR_ERR(cxlhdm); + if (is_cxl_endpoint(port)) { struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); struct cxl_dev_state *cxlds = cxlmd->cxlds; @@ -45,7 +58,7 @@ static int cxl_port_probe(struct device *dev) if (rc) return rc; - rc = cxl_hdm_decode_init(cxlds); + rc = cxl_hdm_decode_init(cxlds, cxlhdm); if (rc) return rc; @@ -54,18 +67,8 @@ static int cxl_port_probe(struct device *dev) dev_err(dev, "Media not active (%d)\n", rc); return rc; } - } else { - rc = devm_cxl_port_enumerate_dports(port); - if (rc < 0) - return rc; - if (rc == 1) - return devm_cxl_add_passthrough_decoder(port); } - cxlhdm = devm_cxl_setup_hdm(port); - if (IS_ERR(cxlhdm)) - return PTR_ERR(cxlhdm); - rc = devm_cxl_enumerate_decoders(cxlhdm); if (rc) { dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 45ffbb8f519af..f1f8c40948c5c 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -208,13 +208,14 @@ int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_await_media_ready, CXL); -bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds) +bool __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, + struct cxl_hdm *cxlhdm) { int rc = 0, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (!ops || !ops->is_mock_dev(cxlds->dev)) - rc = cxl_hdm_decode_init(cxlds); + rc = cxl_hdm_decode_init(cxlds, cxlhdm); put_cxl_mock_ops(index); return rc; From 34e37b4c432cd0f1842b352fde4b8878b4166888 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 20 May 2022 11:30:15 -0700 Subject: [PATCH 47/47] cxl/port: Enable HDM Capability after validating DVSEC Ranges CXL memory expanders that support the CXL 2.0 memory device class code include an "HDM Decoder Capability" mechanism to supplant the "CXL DVSEC Range" mechanism originally defined in CXL 1.1. Both mechanisms depend on a "mem_enable" bit being set in configuration space before either mechanism activates. When the HDM Decoder Capability is enabled the CXL DVSEC Range settings are ignored. Previously, the cxl_mem driver was relying on platform-firmware to set "mem_enable". That is an invalid assumption as there is no requirement that platform-firmware sets the bit before the driver sees a device, especially in hot-plug scenarios. Additionally, ACPI-platforms that support CXL 2.0 devices also support the ACPI CEDT (CXL Early Discovery Table). That table outlines the platform permissible address ranges for CXL operation. So, there is a need for the driver to set "mem_enable", and there is information available to determine the validity of the CXL DVSEC Ranges. Arrange for the driver to optionally enable the HDM Decoder Capability if "mem_enable" was not set by platform firmware, or the CXL DVSEC Range configuration was invalid. Be careful to only disable memory decode if the kernel was the one to enable it. In other words, if CXL is backing all of kernel memory at boot the device needs to maintain "mem_enable" and "HDM Decoder enable" all the way up to handoff back to platform firmware (e.g. ACPI S5 state entry may require CXL memory to stay active). Fixes: 560f78559006 ("cxl/pci: Retrieve CXL DVSEC memory info") Cc: Dan Carpenter [dan: fix early terminiation of range-allowed loop] Cc: Ariel Sibley [ariel: Memory_size must be non-zero] Cc: Jonathan Cameron Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/165307136375.2499769.861793697156744166.stgit@dwillia2-xfh Signed-off-by: Dan Williams --- drivers/cxl/core/pci.c | 167 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 152 insertions(+), 15 deletions(-) diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index 3305e9b750af1..c4c99ff7b55ef 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -175,16 +175,149 @@ static int wait_for_valid(struct cxl_dev_state *cxlds) return -ETIMEDOUT; } +static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + u16 ctrl; + int rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc < 0) + return rc; + + if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) + return 1; + ctrl &= ~CXL_DVSEC_MEM_ENABLE; + ctrl |= val; + + rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); + if (rc < 0) + return rc; + + return 0; +} + +static void clear_mem_enable(void *cxlds) +{ + cxl_set_mem_enable(cxlds, 0); +} + +static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) +{ + int rc; + + rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); + if (rc < 0) + return rc; + if (rc > 0) + return 0; + return devm_add_action_or_reset(host, clear_mem_enable, cxlds); +} + +static bool range_contains(struct range *r1, struct range *r2) +{ + return r1->start <= r2->start && r1->end >= r2->end; +} + +/* require dvsec ranges to be covered by a locked platform window */ +static int dvsec_range_allowed(struct device *dev, void *arg) +{ + struct range *dev_range = arg; + struct cxl_decoder *cxld; + struct range root_range; + + if (!is_root_decoder(dev)) + return 0; + + cxld = to_cxl_decoder(dev); + + if (!(cxld->flags & CXL_DECODER_F_LOCK)) + return 0; + if (!(cxld->flags & CXL_DECODER_F_RAM)) + return 0; + + root_range = (struct range) { + .start = cxld->platform_res.start, + .end = cxld->platform_res.end, + }; + + return range_contains(&root_range, dev_range); +} + +static void disable_hdm(void *_cxlhdm) +{ + u32 global_ctrl; + struct cxl_hdm *cxlhdm = _cxlhdm; + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); +} + +static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + u32 global_ctrl; + + global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + hdm + CXL_HDM_DECODER_CTRL_OFFSET); + + return devm_add_action_or_reset(host, disable_hdm, cxlhdm); +} + static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, struct cxl_endpoint_dvsec_info *info) { void __iomem *hdm = cxlhdm->regs.hdm_decoder; - bool global_enable; + struct cxl_port *port = cxlhdm->port; + struct device *dev = cxlds->dev; + struct cxl_port *root; + int i, rc, allowed; u32 global_ctrl; global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); - global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; + + /* + * If the HDM Decoder Capability is already enabled then assume + * that some other agent like platform firmware set it up. + */ + if (global_ctrl & CXL_HDM_DECODER_ENABLE) { + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; + return true; + } + + root = to_cxl_port(port->dev.parent); + while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) + root = to_cxl_port(root->dev.parent); + if (!is_cxl_root(root)) { + dev_err(dev, "Failed to acquire root port for HDM enable\n"); + return false; + } + + for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) { + struct device *cxld_dev; + + cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], + dvsec_range_allowed); + if (!cxld_dev) { + dev_dbg(dev, "DVSEC Range%d denied by platform\n", i); + continue; + } + dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i); + put_device(cxld_dev); + allowed++; + } + + if (!allowed) { + cxl_set_mem_enable(cxlds, 0); + info->mem_enabled = 0; + } /* * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base @@ -192,21 +325,19 @@ static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds, * are ignored by the device, but the spec also recommends matching the * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges * are expected even though Linux does not require or maintain that - * match. + * match. If at least one DVSEC range is enabled and allowed, skip HDM + * Decoder Capability Enable. */ - if (!global_enable && info->mem_enabled && info->ranges) + if (info->mem_enabled) return false; - /* - * Permanently (for this boot at least) opt the device into HDM - * operation. Individual HDM decoders still need to be enabled after - * this point. - */ - if (!global_enable) { - dev_dbg(cxlds->dev, "Enabling HDM decode\n"); - writel(global_ctrl | CXL_HDM_DECODER_ENABLE, - hdm + CXL_HDM_DECODER_CTRL_OFFSET); - } + rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); + if (rc) + return false; + + rc = devm_cxl_enable_mem(&port->dev, cxlds); + if (rc) + return false; return true; } @@ -261,9 +392,14 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) return rc; } + /* + * The current DVSEC values are moot if the memory capability is + * disabled, and they will remain moot after the HDM Decoder + * capability is enabled. + */ info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); if (!info.mem_enabled) - return 0; + goto hdm_init; for (i = 0; i < hdm_count; i++) { u64 base, size; @@ -312,6 +448,7 @@ int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm) * If DVSEC ranges are being used instead of HDM decoder registers there * is no use in trying to manage those. */ +hdm_init: if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) { dev_err(dev, "Legacy range registers configuration prevents HDM operation.\n");